draft implemented add_data

This commit is contained in:
Hazel 2024-06-06 17:53:17 +02:00
parent df98a70717
commit 4e50bb1fba
11 changed files with 255 additions and 61 deletions

View File

@ -20,6 +20,7 @@
"APIC", "APIC",
"Bandcamp", "Bandcamp",
"bitrate", "bitrate",
"CALLSTACK",
"DEEZER", "DEEZER",
"dotenv", "dotenv",
"encyclopaedia", "encyclopaedia",

View File

@ -1,21 +1,21 @@
import mutagen import logging
from mutagen.id3 import ID3, Frame, APIC, USLT
from pathlib import Path from pathlib import Path
from typing import List from typing import List
import logging
import mutagen
from mutagen.id3 import APIC, ID3, USLT, Frame
from PIL import Image from PIL import Image
from ..utils.config import logging_settings, main_settings
from ..objects import Song, Target, Metadata
from ..objects.metadata import Mapping
from ..connection import Connection from ..connection import Connection
from ..objects import Metadata, Song, Target
from ..objects.metadata import Mapping
from ..utils.config import logging_settings, main_settings
LOGGER = logging_settings["tagging_logger"] LOGGER = logging_settings["tagging_logger"]
artwork_connection: Connection = Connection() artwork_connection: Connection = Connection()
class AudioMetadata: class AudioMetadata:
def __init__(self, file_location: str = None) -> None: def __init__(self, file_location: str = None) -> None:
self._file_location = None self._file_location = None

View File

@ -1,8 +1,12 @@
from __future__ import annotations from __future__ import annotations
from copy import copy
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union
from ..utils import create_dataclass_instance, custom_hash
from ..utils.config import main_settings from ..utils.config import main_settings
from ..utils.enums import PictureType
from ..utils.string_processing import hash_url, unify from ..utils.string_processing import hash_url, unify
from .collection import Collection from .collection import Collection
from .metadata import ID3Timestamp from .metadata import ID3Timestamp
@ -11,38 +15,114 @@ from .metadata import Metadata
from .parents import OuterProxy as Base from .parents import OuterProxy as Base
class ArtworkVariant(TypedDict): @dataclass
class ArtworkVariant:
url: str url: str
width: int width: Optional[int] = None
height: int height: Optional[int] = None
deviation: float image_format: Optional[str] = ""
def __hash__(self) -> int:
return custom_hash(self.url)
def __eq__(self, other: ArtworkVariant) -> bool:
return hash(self) == hash(other)
def __contains__(self, other: str) -> bool:
return custom_hash(other) == hash(self.url)
@dataclass
class Artwork:
variants: List[ArtworkVariant] = field(default_factory=list)
artwork_type: PictureType = PictureType.OTHER
def search_variant(self, url: str) -> Optional[ArtworkVariant]:
if url is None:
return None
for variant in self.variants:
if url in variant:
return variant
return None
def __contains__(self, other: str) -> bool:
return self.search_variant(other) is not None
def add_data(self, **kwargs) -> None:
variant = self.search_variant(kwargs.get("url"))
if variant is None:
variant, kwargs = create_dataclass_instance(ArtworkVariant, **kwargs)
self.variants.append(variant)
variant.url = url
variant.__dict__.update(kwargs)
class ArtworkCollection: class ArtworkCollection:
def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[ArtworkCollection] = None, crop_images: bool = True) -> None: """
self.crop_images: bool = crop_images Stores all the images/artworks for one data object.
There could be duplicates before calling ArtworkCollection.compile()
_this is called before one object is downloaded automatically._
"""
artwork_type: PictureType = PictureType.OTHER
def __init__(
self,
*data: List[Union[Artwork, ArtworkVariant, dict]],
parent_artworks: Set[ArtworkCollection] = None,
crop_images: bool = True
) -> None:
# this is used for the song artwork, to fall back to the song artwork
self.parent_artworks: Set[ArtworkCollection] = parent_artworks or set() self.parent_artworks: Set[ArtworkCollection] = parent_artworks or set()
self.crop_images: bool = crop_images
self._variant_mapping: Dict[str, ArtworkVariant] = {} self._data = []
for variant in variants: def search_artwork(self, url: str) -> Optional[ArtworkVariant]:
self.append(**variant) for artwork in self._data:
if url in artwork:
return artwork
@staticmethod return None
def _calculate_deviation(*dimensions: List[int]) -> float:
return sum(abs(d - main_settings["preferred_artwork_resolution"]) for d in dimensions) / len(dimensions)
def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None: def __contains__(self, other: str) -> bool:
if url is None: return self.search_artwork(other) is not None
def _create_new_artwork(self, **kwargs) -> Tuple[Artwork, dict]:
kwargs["artwork_type"] = kwargs.get("artwork_type", self.artwork_type)
return create_dataclass_instance(Artwork, **kwargs)
def add_data(self, url: str, **kwargs) -> None:
kwargs["url"] = url
artwork = self.search_artwork(url)
if artwork is None:
artwork, kwargs = self._create_new_artwork(url=url, **kwargs)
self._data.append(artwork)
artwork.add_data(url, **kwargs)
def append(self, value: Union[Artwork, ArtworkVariant, dict], **kwargs):
if isinstance(value, dict):
kwargs.update(value)
value, kwargs = create_dataclass_instance(ArtworkVariant, kwargs)
if isinstance(value, ArtworkVariant):
kwargs["variants"] = [value]
value, kwargs = create_dataclass_instance(Artwork, kwargs)
if isinstance(value, Artwork):
self._data.append(value)
return return
self._variant_mapping[hash_url(url=url)] = {
"url": url,
"width": width,
"height": height,
"deviation": self._calculate_deviation(width, height),
}
@property @property
def flat_empty(self) -> bool: def flat_empty(self) -> bool:
return len(self._variant_mapping.keys()) <= 0 return len(self._variant_mapping.keys()) <= 0
@ -69,14 +149,6 @@ class ArtworkCollection:
def __hash__(self) -> int: def __hash__(self) -> int:
return id(self) return id(self)
def __eq__(self, other: ArtworkCollection) -> bool:
if hash(self) == hash(other):
return True
if not isinstance(other, ArtworkCollection):
return False
return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys()))
def __iter__(self) -> Generator[ArtworkVariant, None, None]: def __iter__(self) -> Generator[ArtworkVariant, None, None]:
yield from self._variant_mapping.values() yield from self._variant_mapping.values()

View File

@ -184,6 +184,10 @@ class Song(Base):
self.album_collection.extend(object_list) self.album_collection.extend(object_list)
return return
def _compile(self):
self.artwork.compile()
INDEX_DEPENDS_ON = ("title", "isrc", "source_collection") INDEX_DEPENDS_ON = ("title", "isrc", "source_collection")
@property @property

View File

@ -231,7 +231,7 @@ class Bandcamp(Page):
# artist artwork # artist artwork
artist_artwork: BeautifulSoup = soup.find("img", {"class":"band-photo"}) artist_artwork: BeautifulSoup = soup.find("img", {"class":"band-photo"})
if artist_artwork is not None: if artist_artwork is not None:
artist.artwork.append(artist_artwork.get("data-src", artist_artwork.get("src"))) artist.artwork.add_data(artist_artwork.get("data-src", artist_artwork.get("src")))
for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})): for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})):
data_blob = data_blob_soup["data-blob"] data_blob = data_blob_soup["data-blob"]
@ -308,12 +308,12 @@ class Bandcamp(Page):
_artwork_url = _get_artwork_url(data) _artwork_url = _get_artwork_url(data)
if _artwork_url is not None: if _artwork_url is not None:
artwork.append(url=_artwork_url, width=350, height=350) artwork.add_data(url=_artwork_url, width=350, height=350)
else: else:
for album_release in data.get("albumRelease", []): for album_release in data.get("albumRelease", []):
_artwork_url = _get_artwork_url(album_release) _artwork_url = _get_artwork_url(album_release)
if _artwork_url is not None: if _artwork_url is not None:
artwork.append(url=_artwork_url, width=350, height=350) artwork.add_data(url=_artwork_url, width=350, height=350)
break break
for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])):

View File

@ -51,21 +51,21 @@ class Genius(Page):
url_frags = url.split(".") url_frags = url.split(".")
if len(url_frags) < 2: if len(url_frags) < 2:
artwork.append(url=url) artwork.add_data(url=url)
return return
dimensions = url_frags[-2].split("x") dimensions = url_frags[-2].split("x")
if len(dimensions) < 2: if len(dimensions) < 2:
artwork.append(url=url) artwork.add_data(url=url)
return return
if len(dimensions) == 3: if len(dimensions) == 3:
dimensions = dimensions[:-1] dimensions = dimensions[:-1]
try: try:
artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1])) artwork.add_data(url=url, width=int(dimensions[0]), height=int(dimensions[1]))
except ValueError: except ValueError:
artwork.append(url=url) artwork.add_data(url=url)
def parse_api_object(self, data: dict) -> Optional[DatabaseObject]: def parse_api_object(self, data: dict) -> Optional[DatabaseObject]:
if data is None: if data is None:

View File

@ -479,7 +479,7 @@ class Musify(Page):
artwork: ArtworkCollection = ArtworkCollection() artwork: ArtworkCollection = ArtworkCollection()
album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"}) album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"})
for album_image_element in album_image_element_list: for album_image_element in album_image_element_list:
artwork.append(url=album_image_element.get("data-src", album_image_element.get("src"))) artwork.add_data(url=album_image_element.get("data-src", album_image_element.get("src")))
# lyrics # lyrics
lyrics_container: List[BeautifulSoup] = soup.find_all("div", {"id": "tabLyrics"}) lyrics_container: List[BeautifulSoup] = soup.find_all("div", {"id": "tabLyrics"})
@ -748,7 +748,7 @@ class Musify(Page):
album_artwork: ArtworkCollection = ArtworkCollection() album_artwork: ArtworkCollection = ArtworkCollection()
album_artwork_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) album_artwork_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"})
for album_artwork in album_artwork_list: for album_artwork in album_artwork_list:
album_artwork.append(url=album_artwork.get("data-src", album_artwork.get("src"))) album_artwork.add_data(url=album_artwork.get("data-src", album_artwork.get("src")))
return Album( return Album(
title=name, title=name,
@ -917,7 +917,7 @@ class Musify(Page):
main_artist_artwork: ArtworkCollection = ArtworkCollection() main_artist_artwork: ArtworkCollection = ArtworkCollection()
artist_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) artist_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"})
for artist_image_element in artist_image_element_list: for artist_image_element in artist_image_element_list:
main_artist_artwork.append(url=artist_image_element.get("data-src", artist_image_element.get("src"))) main_artist_artwork.add_data(url=artist_image_element.get("data-src", artist_image_element.get("src")))
return Artist( return Artist(
name=name, name=name,
@ -1069,7 +1069,7 @@ class Musify(Page):
gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent") gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent")
gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img") gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img")
for gallery_image_element in gallery_image_element_list: for gallery_image_element in gallery_image_element_list:
artist.artwork.append(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) artist.artwork.add_data(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247)
def fetch_artist(self, source: Source, **kwargs) -> Artist: def fetch_artist(self, source: Source, **kwargs) -> Artist:

View File

@ -672,7 +672,7 @@ class YoutubeMusic(SuperYouTube):
for album in song.album_list: for album in song.album_list:
album.album_type = AlbumType.LIVE_ALBUM album.album_type = AlbumType.LIVE_ALBUM
for thumbnail in video_details.get("thumbnails", []): for thumbnail in video_details.get("thumbnails", []):
song.artwork.append(**thumbnail) song.artwork.add_data(**thumbnail)
song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId"))) song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId")))

View File

@ -1,15 +1,18 @@
from datetime import datetime import inspect
from pathlib import Path
import json import json
import logging import logging
import inspect from datetime import datetime
from typing import List, Union from functools import lru_cache
from pathlib import Path
from typing import Any, List, Union
from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK
from .config import config, read_config, write_config from .config import config, read_config, write_config
from .enums.colors import BColors from .enums.colors import BColors
from .path_manager import LOCATIONS
from .hacking import merge_args from .hacking import merge_args
from .path_manager import LOCATIONS
from .shared import (DEBUG, DEBUG_DUMP, DEBUG_LOGGING, DEBUG_OBJECT_TRACE,
DEBUG_OBJECT_TRACE_CALLSTACK, DEBUG_TRACE, URL_PATTERN)
from .string_processing import hash_url, is_url, unify
""" """
IO functions IO functions
@ -126,3 +129,33 @@ def get_current_millis() -> int:
def get_unix_time() -> int: def get_unix_time() -> int:
return int(datetime.now().timestamp()) return int(datetime.now().timestamp())
@lru_cache
def custom_hash(value: Any) -> int:
if is_url(value):
value = hash_url(value)
elif isinstance(value, str):
try:
value = int(value)
except ValueError:
value = unify(value)
return hash(value)
def create_dataclass_instance(t, data: dict):
"""Creates an instance of a dataclass with the given data.
It filters out all data key, which has no attribute in the dataclass.
Args:
t (Type): The dataclass type class
data (dict): the attribute to pass into the constructor
Returns:
Tuple[Type, dict]: The created instance and a dict, containing the data, which was not used in the creation
"""
data = {k: v for k, v in data.items() if hasattr(t, k)}
removed_data = {k: v for k, v in data.items() if not hasattr(t, k)}
return t(**data), removed_data

View File

@ -1,7 +1,11 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, TYPE_CHECKING, Type from enum import Enum
from typing import TYPE_CHECKING, Optional, Type
from mutagen.id3 import PictureType
if TYPE_CHECKING: if TYPE_CHECKING:
from ...pages.abstract import Page from ...pages.abstract import Page
@ -52,3 +56,73 @@ class ALL_SOURCE_TYPES:
MANUAL = SourceType(name="manual") MANUAL = SourceType(name="manual")
PRESET = SourceType(name="preset") PRESET = SourceType(name="preset")
class PictureType(Enum):
"""Enumeration of image types defined by the ID3 standard for the APIC
frame, but also reused in WMA/FLAC/VorbisComment.
This is copied from mutagen.id3.PictureType
"""
OTHER = 0
FILE_ICON = 1
"""32x32 pixels 'file icon' (PNG only)"""
OTHER_FILE_ICON = 2
"""Other file icon"""
COVER_FRONT = 3
"""Cover (front)"""
COVER_BACK = 4
"""Cover (back)"""
LEAFLET_PAGE = 5
"""Leaflet page"""
MEDIA = 6
"""Media (e.g. label side of CD)"""
LEAD_ARTIST = 7
"""Lead artist/lead performer/soloist"""
ARTIST = 8
"""Artist/performer"""
CONDUCTOR = 9
"""Conductor"""
BAND = 10
"""Band/Orchestra"""
COMPOSER = 11
"""Composer"""
LYRICIST = 12
"""Lyricist/text writer"""
RECORDING_LOCATION = 13
"""Recording Location"""
DURING_RECORDING = 14
"""During recording"""
DURING_PERFORMANCE = 15
"""During performance"""
SCREEN_CAPTURE = 16
"""Movie/video screen capture"""
FISH = 17
"""A bright colored fish"""
ILLUSTRATION = 18
"""Illustration"""
BAND_LOGOTYPE = 19
"""Band/artist logotype"""
PUBLISHER_LOGOTYPE = 20
"""Publisher/Studio logotype"""

View File

@ -1,13 +1,14 @@
from typing import Tuple, Union, Optional
from pathlib import Path
import string import string
from functools import lru_cache from functools import lru_cache
from pathlib import Path
from typing import Any, Optional, Tuple, Union
from urllib.parse import ParseResult, parse_qs, urlparse
from transliterate.exceptions import LanguageDetectionError
from transliterate import translit
from pathvalidate import sanitize_filename from pathvalidate import sanitize_filename
from urllib.parse import urlparse, ParseResult, parse_qs from transliterate import translit
from transliterate.exceptions import LanguageDetectionError
from .shared import URL_PATTERN
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = ( COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
"(official video)", "(official video)",
@ -229,3 +230,12 @@ def shorten_display_url(url: str, max_length: int = 150, chars_at_end: int = 4,
return url return url
return url[:max_length] + shorten_string + url[-chars_at_end:] return url[:max_length] + shorten_string + url[-chars_at_end:]
def is_url(value: Any) -> bool:
if isinstance(value, ParseResult):
return True
if not isinstance(value, str):
return True
return re.match(URL_PATTERN, query) is not None