draft implemented add_data

This commit is contained in:
Hazel 2024-06-06 17:53:17 +02:00
parent df98a70717
commit 4e50bb1fba
11 changed files with 255 additions and 61 deletions

View File

@ -20,6 +20,7 @@
"APIC",
"Bandcamp",
"bitrate",
"CALLSTACK",
"DEEZER",
"dotenv",
"encyclopaedia",

View File

@ -1,21 +1,21 @@
import mutagen
from mutagen.id3 import ID3, Frame, APIC, USLT
import logging
from pathlib import Path
from typing import List
import logging
import mutagen
from mutagen.id3 import APIC, ID3, USLT, Frame
from PIL import Image
from ..utils.config import logging_settings, main_settings
from ..objects import Song, Target, Metadata
from ..objects.metadata import Mapping
from ..connection import Connection
from ..objects import Metadata, Song, Target
from ..objects.metadata import Mapping
from ..utils.config import logging_settings, main_settings
LOGGER = logging_settings["tagging_logger"]
artwork_connection: Connection = Connection()
class AudioMetadata:
def __init__(self, file_location: str = None) -> None:
self._file_location = None

View File

@ -1,8 +1,12 @@
from __future__ import annotations
from copy import copy
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union
from ..utils import create_dataclass_instance, custom_hash
from ..utils.config import main_settings
from ..utils.enums import PictureType
from ..utils.string_processing import hash_url, unify
from .collection import Collection
from .metadata import ID3Timestamp
@ -11,37 +15,113 @@ from .metadata import Metadata
from .parents import OuterProxy as Base
class ArtworkVariant(TypedDict):
@dataclass
class ArtworkVariant:
url: str
width: int
height: int
deviation: float
width: Optional[int] = None
height: Optional[int] = None
image_format: Optional[str] = ""
def __hash__(self) -> int:
return custom_hash(self.url)
def __eq__(self, other: ArtworkVariant) -> bool:
return hash(self) == hash(other)
def __contains__(self, other: str) -> bool:
return custom_hash(other) == hash(self.url)
@dataclass
class Artwork:
variants: List[ArtworkVariant] = field(default_factory=list)
artwork_type: PictureType = PictureType.OTHER
def search_variant(self, url: str) -> Optional[ArtworkVariant]:
if url is None:
return None
for variant in self.variants:
if url in variant:
return variant
return None
def __contains__(self, other: str) -> bool:
return self.search_variant(other) is not None
def add_data(self, **kwargs) -> None:
variant = self.search_variant(kwargs.get("url"))
if variant is None:
variant, kwargs = create_dataclass_instance(ArtworkVariant, **kwargs)
self.variants.append(variant)
variant.url = url
variant.__dict__.update(kwargs)
class ArtworkCollection:
def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[ArtworkCollection] = None, crop_images: bool = True) -> None:
self.crop_images: bool = crop_images
"""
Stores all the images/artworks for one data object.
There could be duplicates before calling ArtworkCollection.compile()
_this is called before one object is downloaded automatically._
"""
artwork_type: PictureType = PictureType.OTHER
def __init__(
self,
*data: List[Union[Artwork, ArtworkVariant, dict]],
parent_artworks: Set[ArtworkCollection] = None,
crop_images: bool = True
) -> None:
# this is used for the song artwork, to fall back to the song artwork
self.parent_artworks: Set[ArtworkCollection] = parent_artworks or set()
self.crop_images: bool = crop_images
self._variant_mapping: Dict[str, ArtworkVariant] = {}
self._data = []
for variant in variants:
self.append(**variant)
def search_artwork(self, url: str) -> Optional[ArtworkVariant]:
for artwork in self._data:
if url in artwork:
return artwork
@staticmethod
def _calculate_deviation(*dimensions: List[int]) -> float:
return sum(abs(d - main_settings["preferred_artwork_resolution"]) for d in dimensions) / len(dimensions)
return None
def __contains__(self, other: str) -> bool:
return self.search_artwork(other) is not None
def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None:
if url is None:
def _create_new_artwork(self, **kwargs) -> Tuple[Artwork, dict]:
kwargs["artwork_type"] = kwargs.get("artwork_type", self.artwork_type)
return create_dataclass_instance(Artwork, **kwargs)
def add_data(self, url: str, **kwargs) -> None:
kwargs["url"] = url
artwork = self.search_artwork(url)
if artwork is None:
artwork, kwargs = self._create_new_artwork(url=url, **kwargs)
self._data.append(artwork)
artwork.add_data(url, **kwargs)
def append(self, value: Union[Artwork, ArtworkVariant, dict], **kwargs):
if isinstance(value, dict):
kwargs.update(value)
value, kwargs = create_dataclass_instance(ArtworkVariant, kwargs)
if isinstance(value, ArtworkVariant):
kwargs["variants"] = [value]
value, kwargs = create_dataclass_instance(Artwork, kwargs)
if isinstance(value, Artwork):
self._data.append(value)
return
self._variant_mapping[hash_url(url=url)] = {
"url": url,
"width": width,
"height": height,
"deviation": self._calculate_deviation(width, height),
}
@property
def flat_empty(self) -> bool:
@ -69,14 +149,6 @@ class ArtworkCollection:
def __hash__(self) -> int:
return id(self)
def __eq__(self, other: ArtworkCollection) -> bool:
if hash(self) == hash(other):
return True
if not isinstance(other, ArtworkCollection):
return False
return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys()))
def __iter__(self) -> Generator[ArtworkVariant, None, None]:
yield from self._variant_mapping.values()

View File

@ -184,6 +184,10 @@ class Song(Base):
self.album_collection.extend(object_list)
return
def _compile(self):
self.artwork.compile()
INDEX_DEPENDS_ON = ("title", "isrc", "source_collection")
@property

View File

@ -231,7 +231,7 @@ class Bandcamp(Page):
# artist artwork
artist_artwork: BeautifulSoup = soup.find("img", {"class":"band-photo"})
if artist_artwork is not None:
artist.artwork.append(artist_artwork.get("data-src", artist_artwork.get("src")))
artist.artwork.add_data(artist_artwork.get("data-src", artist_artwork.get("src")))
for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})):
data_blob = data_blob_soup["data-blob"]
@ -308,12 +308,12 @@ class Bandcamp(Page):
_artwork_url = _get_artwork_url(data)
if _artwork_url is not None:
artwork.append(url=_artwork_url, width=350, height=350)
artwork.add_data(url=_artwork_url, width=350, height=350)
else:
for album_release in data.get("albumRelease", []):
_artwork_url = _get_artwork_url(album_release)
if _artwork_url is not None:
artwork.append(url=_artwork_url, width=350, height=350)
artwork.add_data(url=_artwork_url, width=350, height=350)
break
for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])):

View File

@ -51,21 +51,21 @@ class Genius(Page):
url_frags = url.split(".")
if len(url_frags) < 2:
artwork.append(url=url)
artwork.add_data(url=url)
return
dimensions = url_frags[-2].split("x")
if len(dimensions) < 2:
artwork.append(url=url)
artwork.add_data(url=url)
return
if len(dimensions) == 3:
dimensions = dimensions[:-1]
try:
artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1]))
artwork.add_data(url=url, width=int(dimensions[0]), height=int(dimensions[1]))
except ValueError:
artwork.append(url=url)
artwork.add_data(url=url)
def parse_api_object(self, data: dict) -> Optional[DatabaseObject]:
if data is None:

View File

@ -479,7 +479,7 @@ class Musify(Page):
artwork: ArtworkCollection = ArtworkCollection()
album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"})
for album_image_element in album_image_element_list:
artwork.append(url=album_image_element.get("data-src", album_image_element.get("src")))
artwork.add_data(url=album_image_element.get("data-src", album_image_element.get("src")))
# lyrics
lyrics_container: List[BeautifulSoup] = soup.find_all("div", {"id": "tabLyrics"})
@ -748,7 +748,7 @@ class Musify(Page):
album_artwork: ArtworkCollection = ArtworkCollection()
album_artwork_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"})
for album_artwork in album_artwork_list:
album_artwork.append(url=album_artwork.get("data-src", album_artwork.get("src")))
album_artwork.add_data(url=album_artwork.get("data-src", album_artwork.get("src")))
return Album(
title=name,
@ -917,7 +917,7 @@ class Musify(Page):
main_artist_artwork: ArtworkCollection = ArtworkCollection()
artist_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"})
for artist_image_element in artist_image_element_list:
main_artist_artwork.append(url=artist_image_element.get("data-src", artist_image_element.get("src")))
main_artist_artwork.add_data(url=artist_image_element.get("data-src", artist_image_element.get("src")))
return Artist(
name=name,
@ -1069,7 +1069,7 @@ class Musify(Page):
gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent")
gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img")
for gallery_image_element in gallery_image_element_list:
artist.artwork.append(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247)
artist.artwork.add_data(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247)
def fetch_artist(self, source: Source, **kwargs) -> Artist:

View File

@ -672,7 +672,7 @@ class YoutubeMusic(SuperYouTube):
for album in song.album_list:
album.album_type = AlbumType.LIVE_ALBUM
for thumbnail in video_details.get("thumbnails", []):
song.artwork.append(**thumbnail)
song.artwork.add_data(**thumbnail)
song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId")))

View File

@ -1,15 +1,18 @@
from datetime import datetime
from pathlib import Path
import inspect
import json
import logging
import inspect
from typing import List, Union
from datetime import datetime
from functools import lru_cache
from pathlib import Path
from typing import Any, List, Union
from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK
from .config import config, read_config, write_config
from .enums.colors import BColors
from .path_manager import LOCATIONS
from .hacking import merge_args
from .path_manager import LOCATIONS
from .shared import (DEBUG, DEBUG_DUMP, DEBUG_LOGGING, DEBUG_OBJECT_TRACE,
DEBUG_OBJECT_TRACE_CALLSTACK, DEBUG_TRACE, URL_PATTERN)
from .string_processing import hash_url, is_url, unify
"""
IO functions
@ -125,4 +128,34 @@ def get_current_millis() -> int:
def get_unix_time() -> int:
return int(datetime.now().timestamp())
return int(datetime.now().timestamp())
@lru_cache
def custom_hash(value: Any) -> int:
if is_url(value):
value = hash_url(value)
elif isinstance(value, str):
try:
value = int(value)
except ValueError:
value = unify(value)
return hash(value)
def create_dataclass_instance(t, data: dict):
"""Creates an instance of a dataclass with the given data.
It filters out all data key, which has no attribute in the dataclass.
Args:
t (Type): The dataclass type class
data (dict): the attribute to pass into the constructor
Returns:
Tuple[Type, dict]: The created instance and a dict, containing the data, which was not used in the creation
"""
data = {k: v for k, v in data.items() if hasattr(t, k)}
removed_data = {k: v for k, v in data.items() if not hasattr(t, k)}
return t(**data), removed_data

View File

@ -1,7 +1,11 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, TYPE_CHECKING, Type
from enum import Enum
from typing import TYPE_CHECKING, Optional, Type
from mutagen.id3 import PictureType
if TYPE_CHECKING:
from ...pages.abstract import Page
@ -52,3 +56,73 @@ class ALL_SOURCE_TYPES:
MANUAL = SourceType(name="manual")
PRESET = SourceType(name="preset")
class PictureType(Enum):
"""Enumeration of image types defined by the ID3 standard for the APIC
frame, but also reused in WMA/FLAC/VorbisComment.
This is copied from mutagen.id3.PictureType
"""
OTHER = 0
FILE_ICON = 1
"""32x32 pixels 'file icon' (PNG only)"""
OTHER_FILE_ICON = 2
"""Other file icon"""
COVER_FRONT = 3
"""Cover (front)"""
COVER_BACK = 4
"""Cover (back)"""
LEAFLET_PAGE = 5
"""Leaflet page"""
MEDIA = 6
"""Media (e.g. label side of CD)"""
LEAD_ARTIST = 7
"""Lead artist/lead performer/soloist"""
ARTIST = 8
"""Artist/performer"""
CONDUCTOR = 9
"""Conductor"""
BAND = 10
"""Band/Orchestra"""
COMPOSER = 11
"""Composer"""
LYRICIST = 12
"""Lyricist/text writer"""
RECORDING_LOCATION = 13
"""Recording Location"""
DURING_RECORDING = 14
"""During recording"""
DURING_PERFORMANCE = 15
"""During performance"""
SCREEN_CAPTURE = 16
"""Movie/video screen capture"""
FISH = 17
"""A bright colored fish"""
ILLUSTRATION = 18
"""Illustration"""
BAND_LOGOTYPE = 19
"""Band/artist logotype"""
PUBLISHER_LOGOTYPE = 20
"""Publisher/Studio logotype"""

View File

@ -1,13 +1,14 @@
from typing import Tuple, Union, Optional
from pathlib import Path
import string
from functools import lru_cache
from pathlib import Path
from typing import Any, Optional, Tuple, Union
from urllib.parse import ParseResult, parse_qs, urlparse
from transliterate.exceptions import LanguageDetectionError
from transliterate import translit
from pathvalidate import sanitize_filename
from urllib.parse import urlparse, ParseResult, parse_qs
from transliterate import translit
from transliterate.exceptions import LanguageDetectionError
from .shared import URL_PATTERN
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
"(official video)",
@ -229,3 +230,12 @@ def shorten_display_url(url: str, max_length: int = 150, chars_at_end: int = 4,
return url
return url[:max_length] + shorten_string + url[-chars_at_end:]
def is_url(value: Any) -> bool:
if isinstance(value, ParseResult):
return True
if not isinstance(value, str):
return True
return re.match(URL_PATTERN, query) is not None