From 93c9a367a257647fb0bf2758dea3f20c5edafeab Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Mon, 1 Jul 2024 14:59:51 +0200 Subject: [PATCH] feat: image hash implemented --- music_kraken/download/page_attributes.py | 23 +++--- music_kraken/objects/artwork.py | 74 +++++++++---------- music_kraken/objects/target.py | 3 +- music_kraken/pages/musify.py | 5 +- .../pages/youtube_music/youtube_music.py | 4 +- 5 files changed, 52 insertions(+), 57 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 5582bc9..248ce6c 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -175,17 +175,18 @@ class Pages: # https://stackoverflow.com/a/17016257 naming[key] = list(dict.fromkeys(value)) - artwork: ArtworkCollection = artist.artwork - for image_number, variant in enumerate(artwork): - naming["image_number"] = [str(image_number)] - - url: str = variant.url - - target = Target( - relative_to_music_dir=True, - file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming)) - ) - artwork.compile(target) + artwork_collection: ArtworkCollection = artist.artwork + artwork_collection.compile() + for image_number, artwork in enumerate(artwork_collection): + for artwork_variant in artwork.variants: + naming["image_number"] = [str(image_number)] + target = Target( + relative_to_music_dir=True, + file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming)) + ) + with Image.open(artwork_variant.target.file_path) as img: + img.save(target.file_path, main_settings["image_format"]) + artwork_variant.target = Target def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: # fetch the given object diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 2417405..fcd69cf 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -18,6 +18,8 @@ from .parents import OuterProxy as Base from .target import Target from PIL import Image +import imagehash + artwork_connection: Connection = Connection(module="artwork") @@ -168,24 +170,37 @@ class ArtworkCollection: for value in values: self.append(value, **kwargs) - def compile(self, target: Target, **kwargs) -> None: + def compile(self, **kwargs) -> None: """ - This will make the artworks ready for download + This will make the artworks ready for download and delete duplicates. """ + artwork_hashes: list = list() for artwork in self._data: + index = 0 for artwork_variant in artwork.variants: r = artwork_connection.get( url=artwork_variant.url, name=artwork_variant.url, ) - - temp_target: Target = Target.temp() - with temp_target.open("wb") as f: + target: Target = artwork_variant.target + with target.open("wb") as f: f.write(r.content) - converted_target: Target = Target.temp(file_extension=main_settings["image_format"]) - with Image.open(temp_target.file_path) as img: - # crop the image if it isn't square in the middle with minimum data loss + with Image.open(target.file_path) as img: + # https://stackoverflow.com/a/59476938/16804841 + if img.mode != 'RGB': + img = img.convert('RGB') + + try: + image_hash = imagehash.crop_resistant_hash(img) + except Exception as e: + continue + + if image_hash in artwork_hashes: + artwork.variants.pop(index) + target.delete() + continue + artwork_hashes.append(image_hash) width, height = img.size if width != height: if width > height: @@ -193,27 +208,20 @@ class ArtworkCollection: else: img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2)) - # resize the image to the preferred resolution - img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) - - # https://stackoverflow.com/a/59476938/16804841 - if img.mode != 'RGB': - img = img.convert('RGB') - if target is not None: - img.save(target.file_path, main_settings["image_format"]) - - + # resize the image to the preferred resolution + img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) + index =+ 1 + + def __merge__(self, other: ArtworkCollection, **kwargs) -> None: self.parent_artworks.update(other.parent_artworks) for other_artwork in other._data: for other_variant in other_artwork.variants: - if len(self._data) != 0: - for artwork in self._data: - for variant in artwork.variants: - variant.__merge__(other_variant) - else: - self.add_data(other_variant.url) + if self.__contains__(other_variant.url): + continue + self.append(ArtworkVariant(other_variant.url)) + def __hash__(self) -> int: return id(self) @@ -224,21 +232,5 @@ class ArtworkCollection: def get_urls(self) -> Generator[str, None, None]: yield from (artwork.url for artwork in self._data if artwork.url is not None) - """ - @property - def flat_empty(self) -> bool: - return len(self._variant_mapping.keys()) <= 0 - - def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]: - return min(artwork_variants, key=lambda x: x["deviation"]) - - @property - def best_variant(self) -> ArtworkVariant: - if self.flat_empty: - return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks]) - return self._get_best_from_list(self._variant_mapping.values()) - - def get_variant_name(self, variant: ArtworkVariant) -> str: - return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" - """ + \ No newline at end of file diff --git a/music_kraken/objects/target.py b/music_kraken/objects/target.py index ced2ddc..28bdb26 100644 --- a/music_kraken/objects/target.py +++ b/music_kraken/objects/target.py @@ -31,7 +31,8 @@ class Target(OuterProxy): } @classmethod - def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P: + def temp(cls, name: str = None, file_extension: Optional[str] = None) -> P: + name = name or str(random.randint(0, HIGHEST_ID)) if file_extension is not None: name = f"{name}.{file_extension}" diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index ed1caed..bfb19ff 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -8,9 +8,10 @@ import pycountry from bs4 import BeautifulSoup from ..connection import Connection -from ..objects import (Album, Artist, ArtworkCollection, DatabaseObject, +from ..objects import (Album, Artist, DatabaseObject, FormattedText, ID3Timestamp, Label, Lyrics, Song, Source, Target) +from ..objects.artwork import (Artwork, ArtworkVariant, ArtworkCollection) from ..utils import shared, string_processing from ..utils.config import logging_settings, main_settings from ..utils.enums import ALL_SOURCE_TYPES, SourceType @@ -1069,7 +1070,7 @@ class Musify(Page): gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent") gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img") for gallery_image_element in gallery_image_element_list: - artist.artwork.add_data(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) + artist.artwork.append(ArtworkVariant(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247)) def fetch_artist(self, source: Source, **kwargs) -> Artist: diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index c709f5c..96006ea 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -441,7 +441,7 @@ class YoutubeMusic(SuperYouTube): # fetch artist artwork artist_thumbnails = musicImmersiveHeaderRenderer.get("thumbnail", {}).get("musicThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) for artist_thumbnail in artist_thumbnails: - artist.artwork.append(**artist_thumbnail) + artist.artwork.append(artist_thumbnail) if DEBUG: for i, content in enumerate(renderer_list): @@ -493,7 +493,7 @@ class YoutubeMusic(SuperYouTube): # album artwork album_thumbnails = musicDetailHeaderRenderer.get("thumbnail", {}).get("croppedSquareThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) for album_thumbnail in album_thumbnails: - album.artwork.append(**album_thumbnail) + album.artwork.append(value=album_thumbnail) title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", [])