feat: image hash implemented
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful

This commit is contained in:
Luna 2024-07-01 14:59:51 +02:00
parent 17c28722fb
commit 93c9a367a2
5 changed files with 52 additions and 57 deletions

View File

@ -175,17 +175,18 @@ class Pages:
# https://stackoverflow.com/a/17016257 # https://stackoverflow.com/a/17016257
naming[key] = list(dict.fromkeys(value)) naming[key] = list(dict.fromkeys(value))
artwork: ArtworkCollection = artist.artwork artwork_collection: ArtworkCollection = artist.artwork
for image_number, variant in enumerate(artwork): artwork_collection.compile()
for image_number, artwork in enumerate(artwork_collection):
for artwork_variant in artwork.variants:
naming["image_number"] = [str(image_number)] naming["image_number"] = [str(image_number)]
url: str = variant.url
target = Target( target = Target(
relative_to_music_dir=True, relative_to_music_dir=True,
file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming)) file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming))
) )
artwork.compile(target) with Image.open(artwork_variant.target.file_path) as img:
img.save(target.file_path, main_settings["image_format"])
artwork_variant.target = Target
def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult:
# fetch the given object # fetch the given object

View File

@ -18,6 +18,8 @@ from .parents import OuterProxy as Base
from .target import Target from .target import Target
from PIL import Image from PIL import Image
import imagehash
artwork_connection: Connection = Connection(module="artwork") artwork_connection: Connection = Connection(module="artwork")
@ -168,24 +170,37 @@ class ArtworkCollection:
for value in values: for value in values:
self.append(value, **kwargs) self.append(value, **kwargs)
def compile(self, target: Target, **kwargs) -> None: def compile(self, **kwargs) -> None:
""" """
This will make the artworks ready for download This will make the artworks ready for download and delete duplicates.
""" """
artwork_hashes: list = list()
for artwork in self._data: for artwork in self._data:
index = 0
for artwork_variant in artwork.variants: for artwork_variant in artwork.variants:
r = artwork_connection.get( r = artwork_connection.get(
url=artwork_variant.url, url=artwork_variant.url,
name=artwork_variant.url, name=artwork_variant.url,
) )
target: Target = artwork_variant.target
temp_target: Target = Target.temp() with target.open("wb") as f:
with temp_target.open("wb") as f:
f.write(r.content) f.write(r.content)
converted_target: Target = Target.temp(file_extension=main_settings["image_format"]) with Image.open(target.file_path) as img:
with Image.open(temp_target.file_path) as img: # https://stackoverflow.com/a/59476938/16804841
# crop the image if it isn't square in the middle with minimum data loss if img.mode != 'RGB':
img = img.convert('RGB')
try:
image_hash = imagehash.crop_resistant_hash(img)
except Exception as e:
continue
if image_hash in artwork_hashes:
artwork.variants.pop(index)
target.delete()
continue
artwork_hashes.append(image_hash)
width, height = img.size width, height = img.size
if width != height: if width != height:
if width > height: if width > height:
@ -195,12 +210,7 @@ class ArtworkCollection:
# resize the image to the preferred resolution # resize the image to the preferred resolution
img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"]))
index =+ 1
# https://stackoverflow.com/a/59476938/16804841
if img.mode != 'RGB':
img = img.convert('RGB')
if target is not None:
img.save(target.file_path, main_settings["image_format"])
@ -208,12 +218,10 @@ class ArtworkCollection:
self.parent_artworks.update(other.parent_artworks) self.parent_artworks.update(other.parent_artworks)
for other_artwork in other._data: for other_artwork in other._data:
for other_variant in other_artwork.variants: for other_variant in other_artwork.variants:
if len(self._data) != 0: if self.__contains__(other_variant.url):
for artwork in self._data: continue
for variant in artwork.variants: self.append(ArtworkVariant(other_variant.url))
variant.__merge__(other_variant)
else:
self.add_data(other_variant.url)
def __hash__(self) -> int: def __hash__(self) -> int:
return id(self) return id(self)
@ -224,21 +232,5 @@ class ArtworkCollection:
def get_urls(self) -> Generator[str, None, None]: def get_urls(self) -> Generator[str, None, None]:
yield from (artwork.url for artwork in self._data if artwork.url is not None) yield from (artwork.url for artwork in self._data if artwork.url is not None)
"""
@property
def flat_empty(self) -> bool:
return len(self._variant_mapping.keys()) <= 0
def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]:
return min(artwork_variants, key=lambda x: x["deviation"])
@property
def best_variant(self) -> ArtworkVariant:
if self.flat_empty:
return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks])
return self._get_best_from_list(self._variant_mapping.values())
def get_variant_name(self, variant: ArtworkVariant) -> str:
return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}"
"""

View File

@ -31,7 +31,8 @@ class Target(OuterProxy):
} }
@classmethod @classmethod
def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P: def temp(cls, name: str = None, file_extension: Optional[str] = None) -> P:
name = name or str(random.randint(0, HIGHEST_ID))
if file_extension is not None: if file_extension is not None:
name = f"{name}.{file_extension}" name = f"{name}.{file_extension}"

View File

@ -8,9 +8,10 @@ import pycountry
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ..connection import Connection from ..connection import Connection
from ..objects import (Album, Artist, ArtworkCollection, DatabaseObject, from ..objects import (Album, Artist, DatabaseObject,
FormattedText, ID3Timestamp, Label, Lyrics, Song, FormattedText, ID3Timestamp, Label, Lyrics, Song,
Source, Target) Source, Target)
from ..objects.artwork import (Artwork, ArtworkVariant, ArtworkCollection)
from ..utils import shared, string_processing from ..utils import shared, string_processing
from ..utils.config import logging_settings, main_settings from ..utils.config import logging_settings, main_settings
from ..utils.enums import ALL_SOURCE_TYPES, SourceType from ..utils.enums import ALL_SOURCE_TYPES, SourceType
@ -1069,7 +1070,7 @@ class Musify(Page):
gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent") gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent")
gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img") gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img")
for gallery_image_element in gallery_image_element_list: for gallery_image_element in gallery_image_element_list:
artist.artwork.add_data(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) artist.artwork.append(ArtworkVariant(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247))
def fetch_artist(self, source: Source, **kwargs) -> Artist: def fetch_artist(self, source: Source, **kwargs) -> Artist:

View File

@ -441,7 +441,7 @@ class YoutubeMusic(SuperYouTube):
# fetch artist artwork # fetch artist artwork
artist_thumbnails = musicImmersiveHeaderRenderer.get("thumbnail", {}).get("musicThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) artist_thumbnails = musicImmersiveHeaderRenderer.get("thumbnail", {}).get("musicThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {})
for artist_thumbnail in artist_thumbnails: for artist_thumbnail in artist_thumbnails:
artist.artwork.append(**artist_thumbnail) artist.artwork.append(artist_thumbnail)
if DEBUG: if DEBUG:
for i, content in enumerate(renderer_list): for i, content in enumerate(renderer_list):
@ -493,7 +493,7 @@ class YoutubeMusic(SuperYouTube):
# album artwork # album artwork
album_thumbnails = musicDetailHeaderRenderer.get("thumbnail", {}).get("croppedSquareThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) album_thumbnails = musicDetailHeaderRenderer.get("thumbnail", {}).get("croppedSquareThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {})
for album_thumbnail in album_thumbnails: for album_thumbnail in album_thumbnails:
album.artwork.append(**album_thumbnail) album.artwork.append(value=album_thumbnail)
title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", [])
subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", [])