From 4e50bb1fba5aeb4b9e631230ecdbd9dbe6b10261 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 6 Jun 2024 17:53:17 +0200 Subject: [PATCH] draft implemented add_data --- .vscode/settings.json | 1 + music_kraken/audio/metadata.py | 14 +- music_kraken/objects/artwork.py | 130 ++++++++++++++---- music_kraken/objects/song.py | 4 + music_kraken/pages/bandcamp.py | 6 +- music_kraken/pages/genius.py | 8 +- music_kraken/pages/musify.py | 8 +- .../pages/youtube_music/youtube_music.py | 2 +- music_kraken/utils/__init__.py | 47 ++++++- music_kraken/utils/enums/__init__.py | 76 +++++++++- music_kraken/utils/string_processing.py | 20 ++- 11 files changed, 255 insertions(+), 61 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index fbc21fa..48df21d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "APIC", "Bandcamp", "bitrate", + "CALLSTACK", "DEEZER", "dotenv", "encyclopaedia", diff --git a/music_kraken/audio/metadata.py b/music_kraken/audio/metadata.py index bceb775..e68ea24 100644 --- a/music_kraken/audio/metadata.py +++ b/music_kraken/audio/metadata.py @@ -1,21 +1,21 @@ -import mutagen -from mutagen.id3 import ID3, Frame, APIC, USLT +import logging from pathlib import Path from typing import List -import logging + +import mutagen +from mutagen.id3 import APIC, ID3, USLT, Frame from PIL import Image -from ..utils.config import logging_settings, main_settings -from ..objects import Song, Target, Metadata -from ..objects.metadata import Mapping from ..connection import Connection +from ..objects import Metadata, Song, Target +from ..objects.metadata import Mapping +from ..utils.config import logging_settings, main_settings LOGGER = logging_settings["tagging_logger"] artwork_connection: Connection = Connection() - class AudioMetadata: def __init__(self, file_location: str = None) -> None: self._file_location = None diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index eb5fa51..94396fc 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -1,8 +1,12 @@ from __future__ import annotations +from copy import copy +from dataclasses import dataclass, field from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union +from ..utils import create_dataclass_instance, custom_hash from ..utils.config import main_settings +from ..utils.enums import PictureType from ..utils.string_processing import hash_url, unify from .collection import Collection from .metadata import ID3Timestamp @@ -11,37 +15,113 @@ from .metadata import Metadata from .parents import OuterProxy as Base -class ArtworkVariant(TypedDict): +@dataclass +class ArtworkVariant: url: str - width: int - height: int - deviation: float + width: Optional[int] = None + height: Optional[int] = None + image_format: Optional[str] = "" + + def __hash__(self) -> int: + return custom_hash(self.url) + + def __eq__(self, other: ArtworkVariant) -> bool: + return hash(self) == hash(other) + + def __contains__(self, other: str) -> bool: + return custom_hash(other) == hash(self.url) + + +@dataclass +class Artwork: + variants: List[ArtworkVariant] = field(default_factory=list) + + artwork_type: PictureType = PictureType.OTHER + + def search_variant(self, url: str) -> Optional[ArtworkVariant]: + if url is None: + return None + + for variant in self.variants: + if url in variant: + return variant + + return None + + def __contains__(self, other: str) -> bool: + return self.search_variant(other) is not None + + def add_data(self, **kwargs) -> None: + variant = self.search_variant(kwargs.get("url")) + + if variant is None: + variant, kwargs = create_dataclass_instance(ArtworkVariant, **kwargs) + self.variants.append(variant) + + variant.url = url + variant.__dict__.update(kwargs) class ArtworkCollection: - def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[ArtworkCollection] = None, crop_images: bool = True) -> None: - self.crop_images: bool = crop_images + """ + Stores all the images/artworks for one data object. + + There could be duplicates before calling ArtworkCollection.compile() + _this is called before one object is downloaded automatically._ + """ + + artwork_type: PictureType = PictureType.OTHER + + def __init__( + self, + *data: List[Union[Artwork, ArtworkVariant, dict]], + parent_artworks: Set[ArtworkCollection] = None, + crop_images: bool = True + ) -> None: + # this is used for the song artwork, to fall back to the song artwork self.parent_artworks: Set[ArtworkCollection] = parent_artworks or set() + self.crop_images: bool = crop_images - self._variant_mapping: Dict[str, ArtworkVariant] = {} + self._data = [] - for variant in variants: - self.append(**variant) + def search_artwork(self, url: str) -> Optional[ArtworkVariant]: + for artwork in self._data: + if url in artwork: + return artwork - @staticmethod - def _calculate_deviation(*dimensions: List[int]) -> float: - return sum(abs(d - main_settings["preferred_artwork_resolution"]) for d in dimensions) / len(dimensions) + return None + + def __contains__(self, other: str) -> bool: + return self.search_artwork(other) is not None - def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None: - if url is None: + def _create_new_artwork(self, **kwargs) -> Tuple[Artwork, dict]: + kwargs["artwork_type"] = kwargs.get("artwork_type", self.artwork_type) + + return create_dataclass_instance(Artwork, **kwargs) + + def add_data(self, url: str, **kwargs) -> None: + kwargs["url"] = url + + artwork = self.search_artwork(url) + + if artwork is None: + artwork, kwargs = self._create_new_artwork(url=url, **kwargs) + self._data.append(artwork) + + artwork.add_data(url, **kwargs) + + def append(self, value: Union[Artwork, ArtworkVariant, dict], **kwargs): + if isinstance(value, dict): + kwargs.update(value) + value, kwargs = create_dataclass_instance(ArtworkVariant, kwargs) + + if isinstance(value, ArtworkVariant): + kwargs["variants"] = [value] + value, kwargs = create_dataclass_instance(Artwork, kwargs) + + if isinstance(value, Artwork): + self._data.append(value) return - - self._variant_mapping[hash_url(url=url)] = { - "url": url, - "width": width, - "height": height, - "deviation": self._calculate_deviation(width, height), - } @property def flat_empty(self) -> bool: @@ -69,14 +149,6 @@ class ArtworkCollection: def __hash__(self) -> int: return id(self) - def __eq__(self, other: ArtworkCollection) -> bool: - if hash(self) == hash(other): - return True - - if not isinstance(other, ArtworkCollection): - return False - return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) - def __iter__(self) -> Generator[ArtworkVariant, None, None]: yield from self._variant_mapping.values() diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 567fdd5..f39aa96 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -184,6 +184,10 @@ class Song(Base): self.album_collection.extend(object_list) return + def _compile(self): + self.artwork.compile() + + INDEX_DEPENDS_ON = ("title", "isrc", "source_collection") @property diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 0941f5e..fe58fb3 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -231,7 +231,7 @@ class Bandcamp(Page): # artist artwork artist_artwork: BeautifulSoup = soup.find("img", {"class":"band-photo"}) if artist_artwork is not None: - artist.artwork.append(artist_artwork.get("data-src", artist_artwork.get("src"))) + artist.artwork.add_data(artist_artwork.get("data-src", artist_artwork.get("src"))) for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})): data_blob = data_blob_soup["data-blob"] @@ -308,12 +308,12 @@ class Bandcamp(Page): _artwork_url = _get_artwork_url(data) if _artwork_url is not None: - artwork.append(url=_artwork_url, width=350, height=350) + artwork.add_data(url=_artwork_url, width=350, height=350) else: for album_release in data.get("albumRelease", []): _artwork_url = _get_artwork_url(album_release) if _artwork_url is not None: - artwork.append(url=_artwork_url, width=350, height=350) + artwork.add_data(url=_artwork_url, width=350, height=350) break for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): diff --git a/music_kraken/pages/genius.py b/music_kraken/pages/genius.py index df7bafc..3b8f184 100644 --- a/music_kraken/pages/genius.py +++ b/music_kraken/pages/genius.py @@ -51,21 +51,21 @@ class Genius(Page): url_frags = url.split(".") if len(url_frags) < 2: - artwork.append(url=url) + artwork.add_data(url=url) return dimensions = url_frags[-2].split("x") if len(dimensions) < 2: - artwork.append(url=url) + artwork.add_data(url=url) return if len(dimensions) == 3: dimensions = dimensions[:-1] try: - artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1])) + artwork.add_data(url=url, width=int(dimensions[0]), height=int(dimensions[1])) except ValueError: - artwork.append(url=url) + artwork.add_data(url=url) def parse_api_object(self, data: dict) -> Optional[DatabaseObject]: if data is None: diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index d636bce..ed1caed 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -479,7 +479,7 @@ class Musify(Page): artwork: ArtworkCollection = ArtworkCollection() album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"}) for album_image_element in album_image_element_list: - artwork.append(url=album_image_element.get("data-src", album_image_element.get("src"))) + artwork.add_data(url=album_image_element.get("data-src", album_image_element.get("src"))) # lyrics lyrics_container: List[BeautifulSoup] = soup.find_all("div", {"id": "tabLyrics"}) @@ -748,7 +748,7 @@ class Musify(Page): album_artwork: ArtworkCollection = ArtworkCollection() album_artwork_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) for album_artwork in album_artwork_list: - album_artwork.append(url=album_artwork.get("data-src", album_artwork.get("src"))) + album_artwork.add_data(url=album_artwork.get("data-src", album_artwork.get("src"))) return Album( title=name, @@ -917,7 +917,7 @@ class Musify(Page): main_artist_artwork: ArtworkCollection = ArtworkCollection() artist_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) for artist_image_element in artist_image_element_list: - main_artist_artwork.append(url=artist_image_element.get("data-src", artist_image_element.get("src"))) + main_artist_artwork.add_data(url=artist_image_element.get("data-src", artist_image_element.get("src"))) return Artist( name=name, @@ -1069,7 +1069,7 @@ class Musify(Page): gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent") gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img") for gallery_image_element in gallery_image_element_list: - artist.artwork.append(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) + artist.artwork.add_data(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) def fetch_artist(self, source: Source, **kwargs) -> Artist: diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 9780977..22b7c7c 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -672,7 +672,7 @@ class YoutubeMusic(SuperYouTube): for album in song.album_list: album.album_type = AlbumType.LIVE_ALBUM for thumbnail in video_details.get("thumbnails", []): - song.artwork.append(**thumbnail) + song.artwork.add_data(**thumbnail) song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId"))) diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index a8d658b..a0be99c 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -1,15 +1,18 @@ -from datetime import datetime -from pathlib import Path +import inspect import json import logging -import inspect -from typing import List, Union +from datetime import datetime +from functools import lru_cache +from pathlib import Path +from typing import Any, List, Union -from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK from .config import config, read_config, write_config from .enums.colors import BColors -from .path_manager import LOCATIONS from .hacking import merge_args +from .path_manager import LOCATIONS +from .shared import (DEBUG, DEBUG_DUMP, DEBUG_LOGGING, DEBUG_OBJECT_TRACE, + DEBUG_OBJECT_TRACE_CALLSTACK, DEBUG_TRACE, URL_PATTERN) +from .string_processing import hash_url, is_url, unify """ IO functions @@ -125,4 +128,34 @@ def get_current_millis() -> int: def get_unix_time() -> int: - return int(datetime.now().timestamp()) \ No newline at end of file + return int(datetime.now().timestamp()) + + +@lru_cache +def custom_hash(value: Any) -> int: + if is_url(value): + value = hash_url(value) + elif isinstance(value, str): + try: + value = int(value) + except ValueError: + value = unify(value) + + return hash(value) + + +def create_dataclass_instance(t, data: dict): + """Creates an instance of a dataclass with the given data. + It filters out all data key, which has no attribute in the dataclass. + + Args: + t (Type): The dataclass type class + data (dict): the attribute to pass into the constructor + + Returns: + Tuple[Type, dict]: The created instance and a dict, containing the data, which was not used in the creation + """ + + data = {k: v for k, v in data.items() if hasattr(t, k)} + removed_data = {k: v for k, v in data.items() if not hasattr(t, k)} + return t(**data), removed_data diff --git a/music_kraken/utils/enums/__init__.py b/music_kraken/utils/enums/__init__.py index 28f0b9f..67e4586 100644 --- a/music_kraken/utils/enums/__init__.py +++ b/music_kraken/utils/enums/__init__.py @@ -1,7 +1,11 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Optional, TYPE_CHECKING, Type +from enum import Enum +from typing import TYPE_CHECKING, Optional, Type + +from mutagen.id3 import PictureType + if TYPE_CHECKING: from ...pages.abstract import Page @@ -52,3 +56,73 @@ class ALL_SOURCE_TYPES: MANUAL = SourceType(name="manual") PRESET = SourceType(name="preset") + + +class PictureType(Enum): + """Enumeration of image types defined by the ID3 standard for the APIC + frame, but also reused in WMA/FLAC/VorbisComment. + + This is copied from mutagen.id3.PictureType + """ + + OTHER = 0 + + FILE_ICON = 1 + """32x32 pixels 'file icon' (PNG only)""" + + OTHER_FILE_ICON = 2 + """Other file icon""" + + COVER_FRONT = 3 + """Cover (front)""" + + COVER_BACK = 4 + """Cover (back)""" + + LEAFLET_PAGE = 5 + """Leaflet page""" + + MEDIA = 6 + """Media (e.g. label side of CD)""" + + LEAD_ARTIST = 7 + """Lead artist/lead performer/soloist""" + + ARTIST = 8 + """Artist/performer""" + + CONDUCTOR = 9 + """Conductor""" + + BAND = 10 + """Band/Orchestra""" + + COMPOSER = 11 + """Composer""" + + LYRICIST = 12 + """Lyricist/text writer""" + + RECORDING_LOCATION = 13 + """Recording Location""" + + DURING_RECORDING = 14 + """During recording""" + + DURING_PERFORMANCE = 15 + """During performance""" + + SCREEN_CAPTURE = 16 + """Movie/video screen capture""" + + FISH = 17 + """A bright colored fish""" + + ILLUSTRATION = 18 + """Illustration""" + + BAND_LOGOTYPE = 19 + """Band/artist logotype""" + + PUBLISHER_LOGOTYPE = 20 + """Publisher/Studio logotype""" diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index b76e3fc..b53d245 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -1,13 +1,14 @@ -from typing import Tuple, Union, Optional -from pathlib import Path import string from functools import lru_cache +from pathlib import Path +from typing import Any, Optional, Tuple, Union +from urllib.parse import ParseResult, parse_qs, urlparse -from transliterate.exceptions import LanguageDetectionError -from transliterate import translit from pathvalidate import sanitize_filename -from urllib.parse import urlparse, ParseResult, parse_qs +from transliterate import translit +from transliterate.exceptions import LanguageDetectionError +from .shared import URL_PATTERN COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = ( "(official video)", @@ -229,3 +230,12 @@ def shorten_display_url(url: str, max_length: int = 150, chars_at_end: int = 4, return url return url[:max_length] + shorten_string + url[-chars_at_end:] + +def is_url(value: Any) -> bool: + if isinstance(value, ParseResult): + return True + + if not isinstance(value, str): + return True + + return re.match(URL_PATTERN, query) is not None