feat: image hash implemented
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful

This commit is contained in:
Luna 2024-07-01 14:59:51 +02:00
parent 17c28722fb
commit 93c9a367a2
5 changed files with 52 additions and 57 deletions

View File

@ -175,17 +175,18 @@ class Pages:
# https://stackoverflow.com/a/17016257
naming[key] = list(dict.fromkeys(value))
artwork: ArtworkCollection = artist.artwork
for image_number, variant in enumerate(artwork):
naming["image_number"] = [str(image_number)]
url: str = variant.url
target = Target(
relative_to_music_dir=True,
file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming))
)
artwork.compile(target)
artwork_collection: ArtworkCollection = artist.artwork
artwork_collection.compile()
for image_number, artwork in enumerate(artwork_collection):
for artwork_variant in artwork.variants:
naming["image_number"] = [str(image_number)]
target = Target(
relative_to_music_dir=True,
file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming))
)
with Image.open(artwork_variant.target.file_path) as img:
img.save(target.file_path, main_settings["image_format"])
artwork_variant.target = Target
def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult:
# fetch the given object

View File

@ -18,6 +18,8 @@ from .parents import OuterProxy as Base
from .target import Target
from PIL import Image
import imagehash
artwork_connection: Connection = Connection(module="artwork")
@ -168,24 +170,37 @@ class ArtworkCollection:
for value in values:
self.append(value, **kwargs)
def compile(self, target: Target, **kwargs) -> None:
def compile(self, **kwargs) -> None:
"""
This will make the artworks ready for download
This will make the artworks ready for download and delete duplicates.
"""
artwork_hashes: list = list()
for artwork in self._data:
index = 0
for artwork_variant in artwork.variants:
r = artwork_connection.get(
url=artwork_variant.url,
name=artwork_variant.url,
)
temp_target: Target = Target.temp()
with temp_target.open("wb") as f:
target: Target = artwork_variant.target
with target.open("wb") as f:
f.write(r.content)
converted_target: Target = Target.temp(file_extension=main_settings["image_format"])
with Image.open(temp_target.file_path) as img:
# crop the image if it isn't square in the middle with minimum data loss
with Image.open(target.file_path) as img:
# https://stackoverflow.com/a/59476938/16804841
if img.mode != 'RGB':
img = img.convert('RGB')
try:
image_hash = imagehash.crop_resistant_hash(img)
except Exception as e:
continue
if image_hash in artwork_hashes:
artwork.variants.pop(index)
target.delete()
continue
artwork_hashes.append(image_hash)
width, height = img.size
if width != height:
if width > height:
@ -193,27 +208,20 @@ class ArtworkCollection:
else:
img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2))
# resize the image to the preferred resolution
img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"]))
# https://stackoverflow.com/a/59476938/16804841
if img.mode != 'RGB':
img = img.convert('RGB')
if target is not None:
img.save(target.file_path, main_settings["image_format"])
# resize the image to the preferred resolution
img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"]))
index =+ 1
def __merge__(self, other: ArtworkCollection, **kwargs) -> None:
self.parent_artworks.update(other.parent_artworks)
for other_artwork in other._data:
for other_variant in other_artwork.variants:
if len(self._data) != 0:
for artwork in self._data:
for variant in artwork.variants:
variant.__merge__(other_variant)
else:
self.add_data(other_variant.url)
if self.__contains__(other_variant.url):
continue
self.append(ArtworkVariant(other_variant.url))
def __hash__(self) -> int:
return id(self)
@ -224,21 +232,5 @@ class ArtworkCollection:
def get_urls(self) -> Generator[str, None, None]:
yield from (artwork.url for artwork in self._data if artwork.url is not None)
"""
@property
def flat_empty(self) -> bool:
return len(self._variant_mapping.keys()) <= 0
def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]:
return min(artwork_variants, key=lambda x: x["deviation"])
@property
def best_variant(self) -> ArtworkVariant:
if self.flat_empty:
return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks])
return self._get_best_from_list(self._variant_mapping.values())
def get_variant_name(self, variant: ArtworkVariant) -> str:
return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}"
"""

View File

@ -31,7 +31,8 @@ class Target(OuterProxy):
}
@classmethod
def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P:
def temp(cls, name: str = None, file_extension: Optional[str] = None) -> P:
name = name or str(random.randint(0, HIGHEST_ID))
if file_extension is not None:
name = f"{name}.{file_extension}"

View File

@ -8,9 +8,10 @@ import pycountry
from bs4 import BeautifulSoup
from ..connection import Connection
from ..objects import (Album, Artist, ArtworkCollection, DatabaseObject,
from ..objects import (Album, Artist, DatabaseObject,
FormattedText, ID3Timestamp, Label, Lyrics, Song,
Source, Target)
from ..objects.artwork import (Artwork, ArtworkVariant, ArtworkCollection)
from ..utils import shared, string_processing
from ..utils.config import logging_settings, main_settings
from ..utils.enums import ALL_SOURCE_TYPES, SourceType
@ -1069,7 +1070,7 @@ class Musify(Page):
gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent")
gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img")
for gallery_image_element in gallery_image_element_list:
artist.artwork.add_data(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247)
artist.artwork.append(ArtworkVariant(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247))
def fetch_artist(self, source: Source, **kwargs) -> Artist:

View File

@ -441,7 +441,7 @@ class YoutubeMusic(SuperYouTube):
# fetch artist artwork
artist_thumbnails = musicImmersiveHeaderRenderer.get("thumbnail", {}).get("musicThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {})
for artist_thumbnail in artist_thumbnails:
artist.artwork.append(**artist_thumbnail)
artist.artwork.append(artist_thumbnail)
if DEBUG:
for i, content in enumerate(renderer_list):
@ -493,7 +493,7 @@ class YoutubeMusic(SuperYouTube):
# album artwork
album_thumbnails = musicDetailHeaderRenderer.get("thumbnail", {}).get("croppedSquareThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {})
for album_thumbnail in album_thumbnails:
album.artwork.append(**album_thumbnail)
album.artwork.append(value=album_thumbnail)
title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", [])
subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", [])