diff --git a/.vscode/settings.json b/.vscode/settings.json index fbc21fa..48df21d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "APIC", "Bandcamp", "bitrate", + "CALLSTACK", "DEEZER", "dotenv", "encyclopaedia", diff --git a/development/actual_donwload.py b/development/actual_donwload.py index ad8f1d0..be10369 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -1,15 +1,13 @@ +import logging + import music_kraken -import logging print("Setting logging-level to DEBUG") logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Crystal F", - "10", - "1", - "3", + "s: #a Ghost Bath", ] diff --git a/music_kraken/audio/metadata.py b/music_kraken/audio/metadata.py index bceb775..5b9f7ac 100644 --- a/music_kraken/audio/metadata.py +++ b/music_kraken/audio/metadata.py @@ -1,21 +1,21 @@ -import mutagen -from mutagen.id3 import ID3, Frame, APIC, USLT +import logging from pathlib import Path from typing import List -import logging + +import mutagen +from mutagen.id3 import APIC, ID3, USLT, Frame from PIL import Image -from ..utils.config import logging_settings, main_settings -from ..objects import Song, Target, Metadata -from ..objects.metadata import Mapping from ..connection import Connection +from ..objects import Metadata, Song, Target +from ..objects.metadata import Mapping +from ..utils.config import logging_settings, main_settings LOGGER = logging_settings["tagging_logger"] artwork_connection: Connection = Connection() - class AudioMetadata: def __init__(self, file_location: str = None) -> None: self._file_location = None @@ -67,13 +67,14 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): id3_object = AudioMetadata(file_location=target.file_path) LOGGER.info(str(metadata)) - - if song.artwork.best_variant is not None: - best_variant = song.artwork.best_variant + ## REWRITE COMPLETLY !!!!!!!!!!!! + if len(song.artwork._data) != 0: + variants = song.artwork._data.__getitem__(0) + best_variant = variants.variants.__getitem__(0) r = artwork_connection.get( - url=best_variant["url"], - name=song.artwork.get_variant_name(best_variant), + url=best_variant.url, + name=best_variant.url, ) temp_target: Target = Target.temp() @@ -107,7 +108,7 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): mime="image/jpeg", type=3, desc=u"Cover", - data=converted_target.read_bytes(), + data=converted_target.raw_content, ) ) id3_object.frames.delall("USLT") diff --git a/music_kraken/connection/connection.py b/music_kraken/connection/connection.py index 31e4b8a..7b68f2d 100644 --- a/music_kraken/connection/connection.py +++ b/music_kraken/connection/connection.py @@ -1,12 +1,12 @@ from __future__ import annotations +import copy +import inspect import logging import threading import time -from typing import List, Dict, Optional, Set -from urllib.parse import urlparse, urlunsplit, ParseResult -import copy -import inspect +from typing import TYPE_CHECKING, Dict, List, Optional, Set +from urllib.parse import ParseResult, urlparse, urlunsplit import requests import responses @@ -14,12 +14,15 @@ from tqdm import tqdm from .cache import Cache from .rotating import RotatingProxy -from ..objects import Target + +if TYPE_CHECKING: + from ..objects import Target + from ..utils import request_trace -from ..utils.string_processing import shorten_display_url from ..utils.config import main_settings -from ..utils.support_classes.download_result import DownloadResult from ..utils.hacking import merge_args +from ..utils.string_processing import shorten_display_url +from ..utils.support_classes.download_result import DownloadResult class Connection: diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 1db24be..788c13f 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -3,6 +3,9 @@ from collections import defaultdict from pathlib import Path import re import logging +import subprocess + +from PIL import Image from . import FetchOptions, DownloadOptions from .results import SearchResults @@ -17,6 +20,7 @@ from ..objects import ( Artist, Label, ) +from ..objects.artwork import ArtworkVariant from ..audio import write_metadata_to_target, correct_codec from ..utils import output, BColors from ..utils.string_processing import fit_to_file_system @@ -29,10 +33,10 @@ from ..utils.support_classes.download_result import DownloadResult from ..utils.exception import MKMissingNameException from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES +from ..connection import Connection from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS - ALL_PAGES: Set[Type[Page]] = { # EncyclopaediaMetallum, Genius, @@ -73,33 +77,37 @@ if DEBUG_PAGES: class Pages: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): self.LOGGER = logging.getLogger("download") - + self.download_options: DownloadOptions = download_options or DownloadOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions() # initialize all page instances self._page_instances: Dict[Type[Page], Page] = dict() self._source_to_page: Dict[SourceType, Type[Page]] = dict() - + exclude_pages = exclude_pages if exclude_pages is not None else set() - + if exclude_shady: exclude_pages = exclude_pages.union(SHADY_PAGES) - + if not exclude_pages.issubset(ALL_PAGES): - raise ValueError(f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}") - + raise ValueError( + f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}") + def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]: return tuple(sorted(page_set, key=lambda page: page.__name__)) - + self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages) self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set) - self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES) - self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set) - + self._audio_pages_set: Set[Type[Page] + ] = self._pages_set.intersection(AUDIO_PAGES) + self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple( + self._audio_pages_set) + for page_type in self.pages: - self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options) + self._page_instances[page_type] = page_type( + fetch_options=self.fetch_options, download_options=self.download_options) self._source_to_page[page_type.SOURCE_TYPE] = page_type def _get_page_from_enum(self, source_page: SourceType) -> Page: @@ -109,24 +117,26 @@ class Pages: def search(self, query: Query) -> SearchResults: result = SearchResults() - + for page_type in self.pages: result.add( page=page_type, - search_result=self._page_instances[page_type].search(query=query) + search_result=self._page_instances[page_type].search( + query=query) ) - + return result - + def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: if not isinstance(data_object, INDEPENDENT_DB_OBJECTS): return data_object - + source: Source for source in data_object.source_collection.get_sources(source_type_sorting={ "only_with_page": True, }): - new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level) + new_data_object = self.fetch_from_source( + source=source, stop_at_level=stop_at_level) if new_data_object is not None: data_object.merge(new_data_object) @@ -135,14 +145,14 @@ class Pages: def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]: if not source.has_page: return None - + source_type = source.page.get_source_type(source=source) if source_type is None: self.LOGGER.debug(f"Could not determine source type for {source}.") return None func = getattr(source.page, fetch_map[source_type]) - + # fetching the data object and marking it as fetched data_object: DataObject = func(source=source, **kwargs) data_object.mark_as_fetched(source.hash_url) @@ -152,21 +162,49 @@ class Pages: source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) if source is None: return None - + return self.fetch_from_source(source=source) - + def _skip_object(self, data_object: DataObject) -> bool: if isinstance(data_object, Album): if not self.download_options.download_all and data_object.album_type in self.download_options.album_type_blacklist: return True - + return False + def _fetch_artist_artwork(self, artist: Artist, naming: dict): + naming: Dict[str, List[str]] = defaultdict(list, naming) + naming["artist"].append(artist.name) + naming["label"].extend( + [l.title_value for l in artist.label_collection]) + # removing duplicates from the naming, and process the strings + for key, value in naming.items(): + # https://stackoverflow.com/a/17016257 + naming[key] = list(dict.fromkeys(value)) + + artwork_collection: ArtworkCollection = artist.artwork + artwork_collection.compile() + for image_number, artwork in enumerate(artwork_collection): + for artwork_variant in artwork.variants: + naming["image_number"] = [str(image_number)] + target = Target( + relative_to_music_dir=True, + file_path=Path(self._parse_path_template( + main_settings["artist_artwork_path"], naming=naming)) + ) + if not target.file_path.parent.exists(): + target.create_path() + subprocess.Popen(["gio", "set", target.file_path.parent, "metadata::custom-icon", "file://"+str(target.file_path)]) + with Image.open(artwork_variant.target.file_path) as img: + img.save(target.file_path, main_settings["image_format"]) + artwork_variant.target = Target + def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: # fetch the given object self.fetch_details(data_object) - output(f"\nDownloading {data_object.option_string}...", color=BColors.BOLD) - + output( + f"\nDownloading {data_object.option_string}...", color=BColors.BOLD) + # fetching all parent objects (e.g. if you only download a song) if not kwargs.get("fetched_upwards", False): to_fetch: List[DataObject] = [data_object] @@ -183,9 +221,19 @@ class Pages: new_to_fetch.extend(c) to_fetch = new_to_fetch - + kwargs["fetched_upwards"] = True - + + naming = kwargs.get("naming", { + "genre": [genre], + "audio_format": [main_settings["audio_format"]], + "image_format": [main_settings["image_format"]] + }) + + # download artist artwork + if isinstance(data_object, Artist): + self._fetch_artist_artwork(artist=data_object, naming=naming) + # download all children download_result: DownloadResult = DownloadResult() for c in data_object.get_child_collections(): @@ -203,10 +251,7 @@ class Pages: I am able to do that, because duplicate values are removed later on. """ - self._download_song(data_object, naming={ - "genre": [genre], - "audio_format": [main_settings["audio_format"]], - }) + self._download_song(data_object, naming=naming) return download_result @@ -214,13 +259,15 @@ class Pages: return set(re.findall(r"{([^}]+)}", path_template)) def _parse_path_template(self, path_template: str, naming: Dict[str, List[str]]) -> str: - field_names: Set[str] = self._extract_fields_from_template(path_template) - + field_names: Set[str] = self._extract_fields_from_template( + path_template) + for field in field_names: if len(naming[field]) == 0: raise MKMissingNameException(f"Missing field for {field}.") - path_template = path_template.replace(f"{{{field}}}", naming[field][0]) + path_template = path_template.replace( + f"{{{field}}}", naming[field][0]) return path_template @@ -230,16 +277,17 @@ class Pages: Search the song in the file system. """ r = DownloadResult(total=1) - + # pre process the data recursively song.compile() - + # manage the naming naming: Dict[str, List[str]] = defaultdict(list, naming) naming["song"].append(song.title_value) naming["isrc"].append(song.isrc) naming["album"].extend(a.title_value for a in song.album_collection) - naming["album_type"].extend(a.album_type.value for a in song.album_collection) + naming["album_type"].extend( + a.album_type.value for a in song.album_collection) naming["artist"].extend(a.name for a in song.artist_collection) naming["artist"].extend(a.name for a in song.feature_artist_collection) for a in song.album_collection: @@ -256,13 +304,16 @@ class Pages: song.target_collection.append(Target( relative_to_music_dir=True, file_path=Path( - self._parse_path_template(main_settings["download_path"], naming=naming), - self._parse_path_template(main_settings["download_file"], naming=naming), + self._parse_path_template( + main_settings["download_path"], naming=naming), + self._parse_path_template( + main_settings["download_file"], naming=naming), ) )) for target in song.target_collection: if target.exists: - output(f'{target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) + output( + f'{target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) r.found_on_disk += 1 if not self.download_options.download_again_if_found: @@ -283,8 +334,10 @@ class Pages: break used_source = source - streaming_results = source.page.download_song_to_target(source=source, target=tmp, desc="download") - skip_intervals = source.page.get_skip_intervals(song=song, source=source) + streaming_results = source.page.download_song_to_target( + source=source, target=tmp, desc="download") + skip_intervals = source.page.get_skip_intervals( + song=song, source=source) # if something has been downloaded but it somehow failed, delete the file if streaming_results.is_fatal_error and tmp.exists: @@ -308,7 +361,8 @@ class Pages: used_source.page.post_process_hook(song=song, temp_target=tmp) if not found_on_disk or self.download_options.process_metadata_if_found: - write_metadata_to_target(metadata=song.metadata, target=tmp, song=song) + write_metadata_to_target( + metadata=song.metadata, target=tmp, song=song) # copy the tmp target to the final locations for target in song.target_collection: @@ -319,10 +373,10 @@ class Pages: def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) - + if source is None: raise UrlNotFoundException(url=url) - + _actual_page = self._source_to_page[source.source_type] - - return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) \ No newline at end of file + + return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) diff --git a/music_kraken/objects/__init__.py b/music_kraken/objects/__init__.py index 7c7515d..2a85abf 100644 --- a/music_kraken/objects/__init__.py +++ b/music_kraken/objects/__init__.py @@ -1,27 +1,16 @@ from typing_extensions import TypeVar + +from .artwork import ArtworkCollection +from .collection import Collection +from .contact import Contact +from .country import Country +from .formatted_text import FormattedText +from .metadata import ID3Timestamp +from .metadata import Mapping as ID3Mapping +from .metadata import Metadata from .option import Options - -from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp - +from .parents import OuterProxy +from .song import Album, Artist, Label, Lyrics, Song, Target from .source import Source, SourceType -from .song import ( - Song, - Album, - Artist, - Target, - Lyrics, - Label -) - -from .formatted_text import FormattedText -from .collection import Collection - -from .country import Country -from .contact import Contact - -from .parents import OuterProxy - -from .artwork import Artwork - DatabaseObject = OuterProxy diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 178edf6..9effaba 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -1,64 +1,243 @@ from __future__ import annotations -from typing import List, Optional, Dict, Tuple, Type, Union, TypedDict - -from .collection import Collection -from .metadata import ( - Mapping as id3Mapping, - ID3Timestamp, - Metadata -) -from ..utils.string_processing import unify, hash_url - -from .parents import OuterProxy as Base +from copy import copy +from dataclasses import dataclass, field +from functools import cached_property +from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union +from ..connection import Connection +from ..utils import create_dataclass_instance, custom_hash from ..utils.config import main_settings +from ..utils.enums import PictureType +from ..utils.string_processing import hash_url, unify +from .collection import Collection +from .metadata import ID3Timestamp +from .metadata import Mapping as id3Mapping +from .metadata import Metadata +from .parents import OuterProxy as Base +from .target import Target +from PIL import Image + +import imagehash + +artwork_connection: Connection = Connection(module="artwork") -class ArtworkVariant(TypedDict): +@dataclass +class ArtworkVariant: url: str - width: int - height: int - deviation: float + width: Optional[int] = None + heigth: Optional[int] = None + image_format: Optional[str] = None + def __hash__(self) -> int: + return custom_hash(self.url) -class Artwork: - def __init__(self, *variants: List[ArtworkVariant]) -> None: - self._variant_mapping: Dict[str, ArtworkVariant] = {} + def __eq__(self, other: ArtworkVariant) -> bool: + return hash(self) == hash(other) - for variant in variants: - self.append(**variant) + def __contains__(self, other: str) -> bool: + return custom_hash(other) == hash(self.url) - @staticmethod - def _calculate_deviation(*dimensions: List[int]) -> float: - return sum(abs(d - main_settings["preferred_artwork_resolution"]) for d in dimensions) / len(dimensions) + def __merge__(self, other: ArtworkVariant) -> None: + for key, value in other.__dict__.items(): + if value is None: + continue - def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None: - if url is None: + if getattr(self, key) is None: + setattr(self, key, value) + + @cached_property + def target(self) -> Target: + return Target.temp() + + def fetch(self) -> None: + global artwork_connection + + r = artwork_connection.get(self.url, name=hash_url(self.url)) + if r is None: return - - self._variant_mapping[hash_url(url=url)] = { - "url": url, - "width": width, - "height": height, - "deviation": self._calculate_deviation(width, height), - } + + self.target.raw_content = r.content + +@dataclass +class Artwork: + variants: List[ArtworkVariant] = field(default_factory=list) + + artwork_type: PictureType = PictureType.OTHER + + def search_variant(self, url: str) -> Optional[ArtworkVariant]: + if url is None: + return None + + for variant in self.variants: + if url in variant: + return variant + + return None + + def __contains__(self, other: str) -> bool: + return self.search_variant(other) is not None + + def add_data(self, **kwargs) -> None: + variant = self.search_variant(kwargs.get("url")) + + if variant is None: + variant, kwargs = create_dataclass_instance(ArtworkVariant, kwargs) + self.variants.append(variant) + + variant.__dict__.update(kwargs) @property - def best_variant(self) -> ArtworkVariant: - if len(self._variant_mapping.keys()) <= 0: + def url(self) -> Optional[str]: + if len(self.variants) <= 0: return None - return min(self._variant_mapping.values(), key=lambda x: x["deviation"]) + return self.variants[0].url - def get_variant_name(self, variant: ArtworkVariant) -> str: - return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" + def fetch(self) -> None: + for variant in self.variants: + variant.fetch() - def __merge__(self, other: Artwork, **kwargs) -> None: - for key, value in other._variant_mapping.items(): - if key not in self._variant_mapping: - self._variant_mapping[key] = value - def __eq__(self, other: Artwork) -> bool: - if not isinstance(other, Artwork): - return False - return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) +class ArtworkCollection: + """ + Stores all the images/artworks for one data object. + + There could be duplicates before calling ArtworkCollection.compile() + _this is called before one object is downloaded automatically._ + """ + + artwork_type: PictureType = PictureType.OTHER + + def __init__( + self, + *data: List[Artwork], + parent_artworks: Set[ArtworkCollection] = None, + crop_images: bool = True, + ) -> None: + # this is used for the song artwork, to fall back to the song artwork + self.parent_artworks: Set[ArtworkCollection] = parent_artworks or set() + self.crop_images: bool = crop_images + + self._data = [] + self.extend(data) + + def search_artwork(self, url: str) -> Optional[ArtworkVariant]: + for artwork in self._data: + if url in artwork: + return artwork + + return None + + def __contains__(self, other: str) -> bool: + return self.search_artwork(other) is not None + + def _create_new_artwork(self, **kwargs) -> Tuple[Artwork, dict]: + kwargs["artwork_type"] = kwargs.get("artwork_type", self.artwork_type) + + return create_dataclass_instance(Artwork, dict(**kwargs)) + + def add_data(self, url: str, **kwargs) -> Artwork: + kwargs["url"] = url + + artwork = self.search_artwork(url) + + if artwork is None: + artwork, kwargs = self._create_new_artwork(**kwargs) + self._data.append(artwork) + + artwork.add_data(**kwargs) + return artwork + + def append(self, value: Union[Artwork, ArtworkVariant, dict], **kwargs): + """ + You can append the types Artwork, ArtworkVariant or dict + the best option would be to use Artwork and avoid the other options. + """ + if isinstance(value, dict): + kwargs.update(value) + value, kwargs = create_dataclass_instance(ArtworkVariant, kwargs) + + if isinstance(value, ArtworkVariant): + kwargs["variants"] = [value] + value, kwargs = create_dataclass_instance(Artwork, kwargs) + + if isinstance(value, Artwork): + self._data.append(value) + return + + def extend(self, values: List[Union[Artwork, ArtworkVariant, dict]], **kwargs): + for value in values: + self.append(value, **kwargs) + + def compile(self, **kwargs) -> None: + """ + This will make the artworks ready for download and delete duplicates. + """ + artwork_hashes: list = list() + artwork_urls: list = list() + for artwork in self._data: + index = 0 + for artwork_variant in artwork.variants: + r = artwork_connection.get( + url=artwork_variant.url, + name=artwork_variant.url, + ) + + if artwork_variant.url in artwork_urls: + artwork.variants.pop(index) + continue + artwork_urls.append(artwork_variant.url) + + target: Target = artwork_variant.target + with target.open("wb") as f: + f.write(r.content) + + with Image.open(target.file_path) as img: + # https://stackoverflow.com/a/59476938/16804841 + if img.mode != 'RGB': + img = img.convert('RGB') + + try: + image_hash = imagehash.crop_resistant_hash(img) + except Exception as e: + continue + + if image_hash in artwork_hashes: + artwork.variants.pop(index) + target.delete() + continue + artwork_hashes.append(image_hash) + width, height = img.size + if width != height: + if width > height: + img = img.crop((width // 2 - height // 2, 0, width // 2 + height // 2, height)) + else: + img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2)) + + # resize the image to the preferred resolution + img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) + index =+ 1 + + + + def __merge__(self, other: ArtworkCollection, **kwargs) -> None: + self.parent_artworks.update(other.parent_artworks) + for other_artwork in other._data: + for other_variant in other_artwork.variants: + if self.__contains__(other_variant.url): + continue + self.append(ArtworkVariant(other_variant.url)) + + + def __hash__(self) -> int: + return id(self) + + def __iter__(self) -> Generator[Artwork, None, None]: + yield from self._data + + def get_urls(self) -> Generator[str, None, None]: + yield from (artwork.url for artwork in self._data if artwork.url is not None) + + + \ No newline at end of file diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index f1d7e75..687b069 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -1,16 +1,43 @@ from __future__ import annotations -from collections import defaultdict -from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set import copy +from collections import defaultdict +from dataclasses import dataclass +from typing import (Any, Callable, Dict, Generator, Generic, Iterable, + Iterator, List, Optional, Set, Tuple, TypeVar, Union) -from .parents import OuterProxy -from ..utils import object_trace -from ..utils import output, BColors +from ..utils import BColors, object_trace, output +from .parents import InnerData, OuterProxy T = TypeVar('T', bound=OuterProxy) +@dataclass +class AppendHookArguments: + """ + This class is used to store the arguments for the append hook. + The best explanation is with an examples: + + ``` + album = Album() + song = Song() + album.song_collection.append(song) + ``` + + In this case, the append hook is triggered with the following arguments: + ``` + AppendHookArguments( + collection=album.song_collection, + new_object=song, + collection_root_objects=[album] + ) + ``` + """ + + collection: Collection + new_object: T + collection_root_objects: Set[InnerData] + class Collection(Generic[T]): __is_collection__ = True @@ -27,6 +54,7 @@ class Collection(Generic[T]): sync_on_append: Dict[str, Collection] = None, append_object_to_attribute: Dict[str, T] = None, extend_object_to_attribute: Dict[str, Collection] = None, + append_callbacks: Set[Callable[[AppendHookArguments], None]] = None, ) -> None: self._collection_for: dict = dict() @@ -41,6 +69,7 @@ class Collection(Generic[T]): self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self.pull_from: List[Collection] = [] self.push_to: List[Collection] = [] + self.append_callbacks: Set[Callable[[AppendHookArguments], None]] = append_callbacks or set() # This is to cleanly unmap previously mapped items by their id self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict) @@ -141,6 +170,14 @@ class Collection(Generic[T]): for attribute, new_object in self.append_object_to_attribute.items(): other.__getattribute__(attribute).append(new_object, **kwargs) + append_hook_args = AppendHookArguments( + collection=self, + new_object=other, + collection_root_objects=self._collection_for.keys(), + ) + for callback in self.append_callbacks: + callback(append_hook_args) + def append(self, other: Optional[T], **kwargs): """ If an object, that represents the same entity exists in a relevant collection, diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 980bc08..f39aa96 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -1,35 +1,32 @@ from __future__ import annotations +import copy import random from collections import defaultdict -from typing import List, Optional, Dict, Tuple, Type, Union -import copy +from typing import Dict, List, Optional, Tuple, Type, Union import pycountry -from ..utils.enums.album import AlbumType, AlbumStatus -from .collection import Collection -from .formatted_text import FormattedText -from .lyrics import Lyrics -from .contact import Contact -from .artwork import Artwork -from .metadata import ( - Mapping as id3Mapping, - ID3Timestamp, - Metadata -) -from .option import Options -from .parents import OuterProxy, P -from .source import Source, SourceCollection -from .target import Target -from .country import Language, Country +from ..utils.config import main_settings +from ..utils.enums.album import AlbumStatus, AlbumType +from ..utils.enums.colors import BColors from ..utils.shared import DEBUG_PRINT_ID from ..utils.string_processing import unify - +from .artwork import ArtworkCollection +from .collection import AppendHookArguments, Collection +from .contact import Contact +from .country import Country, Language +from .formatted_text import FormattedText +from .lyrics import Lyrics +from .metadata import ID3Timestamp +from .metadata import Mapping as id3Mapping +from .metadata import Metadata +from .option import Options +from .parents import OuterProxy from .parents import OuterProxy as Base - -from ..utils.config import main_settings -from ..utils.enums.colors import BColors +from .parents import P +from .source import Source, SourceCollection +from .target import Target """ All Objects dependent @@ -89,7 +86,7 @@ class Song(Base): genre: str note: FormattedText tracksort: int - artwork: Artwork + artwork: ArtworkCollection source_collection: SourceCollection target_collection: Collection[Target] @@ -105,7 +102,7 @@ class Song(Base): "source_collection": SourceCollection, "target_collection": Collection, "lyrics_collection": Collection, - "artwork": Artwork, + "artwork": ArtworkCollection, "album_collection": Collection, "artist_collection": Collection, @@ -133,7 +130,7 @@ class Song(Base): feature_artist_list: List[Artist] = None, album_list: List[Album] = None, tracksort: int = 0, - artwork: Optional[Artwork] = None, + artwork: Optional[ArtworkCollection] = None, **kwargs ) -> None: real_kwargs = copy.copy(locals()) @@ -144,6 +141,14 @@ class Song(Base): UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "feature_artist_collection", "album_collection") TITEL = "title" + @staticmethod + def register_artwork_parent(append_hook_arguments: AppendHookArguments): + album: Album = append_hook_arguments.new_object + + song: Song + for song in append_hook_arguments.collection_root_objects: + song.artwork.parent_artworks.add(album.artwork) + def __init_collections__(self) -> None: self.feature_artist_collection.push_to = [self.artist_collection] self.artist_collection.pull_from = [self.feature_artist_collection] @@ -161,6 +166,7 @@ class Song(Base): self.feature_artist_collection.extend_object_to_attribute = { "album_collection": self.album_collection } + self.album_collection.append_callbacks = set((Song.register_artwork_parent, )) def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: @@ -178,6 +184,10 @@ class Song(Base): self.album_collection.extend(object_list) return + def _compile(self): + self.artwork.compile() + + INDEX_DEPENDS_ON = ("title", "isrc", "source_collection") @property @@ -249,6 +259,7 @@ class Album(Base): albumsort: int notes: FormattedText + artwork: ArtworkCollection source_collection: SourceCollection song_collection: Collection[Song] @@ -268,6 +279,7 @@ class Album(Base): "date": ID3Timestamp, "notes": FormattedText, + "artwork": lambda: ArtworkCollection(crop_images=False), "source_collection": SourceCollection, "song_collection": Collection, @@ -290,6 +302,7 @@ class Album(Base): barcode: str = None, albumsort: int = None, notes: FormattedText = None, + artwork: ArtworkCollection = None, source_list: List[Source] = None, artist_list: List[Artist] = None, song_list: List[Song] = None, @@ -304,6 +317,13 @@ class Album(Base): DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection") + @staticmethod + def register_artwork_parent(append_hook_arguments: AppendHookArguments): + song: Song = append_hook_arguments.new_object + + for root_object in append_hook_arguments.collection_root_objects: + song.artwork.parent_artworks.add(root_object.artwork) + def __init_collections__(self): self.feature_artist_collection.push_to = [self.artist_collection] self.artist_collection.pull_from = [self.feature_artist_collection] @@ -322,6 +342,8 @@ class Album(Base): "label_collection": self.label_collection } + self.song_collection.append_callbacks = set((Album.register_artwork_parent, )) + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: self.song_collection.extend(object_list) @@ -477,6 +499,8 @@ class Artist(Base): general_genre: str unformatted_location: str + artwork: ArtworkCollection + source_collection: SourceCollection contact_collection: Collection[Contact] @@ -493,6 +517,8 @@ class Artist(Base): "lyrical_themes": list, "general_genre": lambda: "", + "artwork": ArtworkCollection, + "source_collection": SourceCollection, "album_collection": Collection, "contact_collection": Collection, @@ -511,6 +537,7 @@ class Artist(Base): notes: FormattedText = None, lyrical_themes: List[str] = None, general_genre: str = None, + artwork: ArtworkCollection = None, unformatted_location: str = None, source_list: List[Source] = None, contact_list: List[Contact] = None, diff --git a/music_kraken/objects/target.py b/music_kraken/objects/target.py index 2491a5a..28bdb26 100644 --- a/music_kraken/objects/target.py +++ b/music_kraken/objects/target.py @@ -1,17 +1,17 @@ from __future__ import annotations -from pathlib import Path -from typing import List, Tuple, TextIO, Union, Optional import logging import random +from pathlib import Path +from typing import List, Optional, TextIO, Tuple, Union + import requests from tqdm import tqdm -from .parents import OuterProxy +from ..utils.config import logging_settings, main_settings from ..utils.shared import HIGHEST_ID -from ..utils.config import main_settings, logging_settings from ..utils.string_processing import fit_to_file_system - +from .parents import OuterProxy LOGGER = logging.getLogger("target") @@ -31,7 +31,8 @@ class Target(OuterProxy): } @classmethod - def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P: + def temp(cls, name: str = None, file_extension: Optional[str] = None) -> P: + name = name or str(random.randint(0, HIGHEST_ID)) if file_extension is not None: name = f"{name}.{file_extension}" @@ -117,3 +118,11 @@ class Target(OuterProxy): def read_bytes(self) -> bytes: return self.file_path.read_bytes() + + @property + def raw_content(self) -> bytes: + return self.file_path.read_bytes() + + @raw_content.setter + def raw_content(self, content: bytes): + self.file_path.write_bytes(content) diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 1caf803..fe58fb3 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -1,33 +1,22 @@ -from typing import List, Optional, Type -from urllib.parse import urlparse, urlunparse import json from enum import Enum -from bs4 import BeautifulSoup -import pycountry +from typing import List, Optional, Type +from urllib.parse import urlparse, urlunparse + +import pycountry +from bs4 import BeautifulSoup -from ..objects import Source, DatabaseObject -from .abstract import Page -from ..objects import ( - Artist, - Source, - SourceType, - Song, - Album, - Label, - Target, - Contact, - ID3Timestamp, - Lyrics, - FormattedText, - Artwork, -) from ..connection import Connection +from ..objects import (Album, Artist, ArtworkCollection, Contact, + DatabaseObject, FormattedText, ID3Timestamp, Label, + Lyrics, Song, Source, SourceType, Target) from ..utils import dump_to_file -from ..utils.enums import SourceType, ALL_SOURCE_TYPES -from ..utils.support_classes.download_result import DownloadResult -from ..utils.string_processing import clean_song_title -from ..utils.config import main_settings, logging_settings +from ..utils.config import logging_settings, main_settings +from ..utils.enums import ALL_SOURCE_TYPES, SourceType from ..utils.shared import DEBUG +from ..utils.string_processing import clean_song_title +from ..utils.support_classes.download_result import DownloadResult +from .abstract import Page if DEBUG: from ..utils import dump_to_file @@ -239,6 +228,11 @@ class Bandcamp(Page): for subsoup in html_music_grid.find_all("li"): artist.album_collection.append(self._parse_album(soup=subsoup, initial_source=source)) + # artist artwork + artist_artwork: BeautifulSoup = soup.find("img", {"class":"band-photo"}) + if artist_artwork is not None: + artist.artwork.add_data(artist_artwork.get("data-src", artist_artwork.get("src"))) + for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})): data_blob = data_blob_soup["data-blob"] @@ -253,7 +247,7 @@ class Bandcamp(Page): artist.source_collection.append(source) return artist - def _parse_track_element(self, track: dict, artwork: Artwork) -> Optional[Song]: + def _parse_track_element(self, track: dict, artwork: ArtworkCollection) -> Optional[Song]: lyrics_list: List[Lyrics] = [] _lyrics: Optional[str] = track.get("item", {}).get("recordingOf", {}).get("lyrics", {}).get("text") @@ -287,9 +281,15 @@ class Bandcamp(Page): artist_source_list = [] if "@id" in artist_data: artist_source_list = [Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] + + + source_list: List[Source] = [source] + if "mainEntityOfPage" in data or "@id" in data: + source_list.append(Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))) + album = Album( title=data["name"].strip(), - source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], + source_list=source_list, date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), artist_list=[Artist( name=artist_data["name"].strip(), @@ -297,7 +297,7 @@ class Bandcamp(Page): )] ) - artwork: Artwork = Artwork() + artwork: ArtworkCollection = ArtworkCollection() def _get_artwork_url(_data: dict) -> Optional[str]: if "image" in _data: @@ -308,15 +308,14 @@ class Bandcamp(Page): _artwork_url = _get_artwork_url(data) if _artwork_url is not None: - artwork.append(url=_artwork_url, width=350, height=350) + artwork.add_data(url=_artwork_url, width=350, height=350) else: for album_release in data.get("albumRelease", []): _artwork_url = _get_artwork_url(album_release) if _artwork_url is not None: - artwork.append(url=_artwork_url, width=350, height=350) + artwork.add_data(url=_artwork_url, width=350, height=350) break - for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): if DEBUG: dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False) @@ -362,17 +361,29 @@ class Bandcamp(Page): for key, value in other_data.get("trackinfo", [{}])[0].get("file", {"": None}).items(): mp3_url = value + source_list: List[Source] = [source] + if "mainEntityOfPage" in data or "@id" in data: + source_list.append(Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)) + + source_list_album: List[Source] = [source] + if "@id" in album_data: + source_list_album.append(Source(self.SOURCE_TYPE, album_data["@id"])) + + source_list_artist: List[Source] = [source] + if "@id" in artist_data: + source_list_artist.append(Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))) + song = Song( title=clean_song_title(data["name"], artist_name=artist_data["name"]), - source_list=[source, Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)], + source_list=source_list, album_list=[Album( title=album_data["name"].strip(), date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), - source_list=[Source(self.SOURCE_TYPE, album_data["@id"])] + source_list=source_list_album )], artist_list=[Artist( name=artist_data["name"].strip(), - source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] + source_list=source_list_artist )], lyrics_list=self._fetch_lyrics(soup=soup) ) diff --git a/music_kraken/pages/genius.py b/music_kraken/pages/genius.py index 5afa556..c6414ba 100644 --- a/music_kraken/pages/genius.py +++ b/music_kraken/pages/genius.py @@ -1,33 +1,23 @@ -from typing import List, Optional, Type -from urllib.parse import urlparse, urlunparse, urlencode -import json +import simplejson as json +from json_unescape import escape_json, unescape_json from enum import Enum -from bs4 import BeautifulSoup -import pycountry +from typing import List, Optional, Type +from urllib.parse import urlencode, urlparse, urlunparse + +import pycountry +from bs4 import BeautifulSoup -from ..objects import Source, DatabaseObject -from .abstract import Page -from ..objects import ( - Artist, - Source, - SourceType, - Song, - Album, - Label, - Target, - Contact, - ID3Timestamp, - Lyrics, - FormattedText, - Artwork, -) from ..connection import Connection +from ..objects import (Album, Artist, ArtworkCollection, Contact, + DatabaseObject, FormattedText, ID3Timestamp, Label, + Lyrics, Song, Source, SourceType, Target) from ..utils import dump_to_file, traverse_json_path -from ..utils.enums import SourceType, ALL_SOURCE_TYPES -from ..utils.support_classes.download_result import DownloadResult -from ..utils.string_processing import clean_song_title -from ..utils.config import main_settings, logging_settings +from ..utils.config import logging_settings, main_settings +from ..utils.enums import ALL_SOURCE_TYPES, SourceType from ..utils.shared import DEBUG +from ..utils.string_processing import clean_song_title +from ..utils.support_classes.download_result import DownloadResult +from .abstract import Page if DEBUG: from ..utils import dump_to_file @@ -56,34 +46,34 @@ class Genius(Page): return Song - def add_to_artwork(self, artwork: Artwork, url: str): + def add_to_artwork(self, artwork: ArtworkCollection, url: str): if url is None: return url_frags = url.split(".") if len(url_frags) < 2: - artwork.append(url=url) + artwork.add_data(url=url) return dimensions = url_frags[-2].split("x") if len(dimensions) < 2: - artwork.append(url=url) + artwork.add_data(url=url) return if len(dimensions) == 3: dimensions = dimensions[:-1] try: - artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1])) + artwork.add_data(url=url, width=int(dimensions[0]), height=int(dimensions[1])) except ValueError: - artwork.append(url=url) + artwork.add_data(url=url) def parse_api_object(self, data: dict) -> Optional[DatabaseObject]: if data is None: return None object_type = data.get("_type") - artwork = Artwork() + artwork = ArtworkCollection() self.add_to_artwork(artwork, data.get("header_image_url")) self.add_to_artwork(artwork, data.get("image_url")) @@ -279,8 +269,9 @@ class Genius(Page): # get the contents that are between `JSON.parse('` and `');` content = self.get_json_content_from_response(r, start="window.__PRELOADED_STATE__ = JSON.parse('", end="');\n window.__APP_CONFIG__ = ") if content is not None: - content = content.replace("\\\\", "\\").replace('\\"', '"').replace("\\'", "'") - data = json.loads(content) + #IMPLEMENT FIX FROM HAZEL + content = escape_json(content) + data = json.loads(content) lyrics_html = traverse_json_path(data, "songPage.lyricsData.body.html", default=None) if lyrics_html is not None: diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index e8078fb..bfb19ff 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -1,34 +1,25 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum -from typing import List, Optional, Type, Union, Generator, Dict, Any +from typing import Any, Dict, Generator, List, Optional, Type, Union from urllib.parse import urlparse import pycountry from bs4 import BeautifulSoup from ..connection import Connection -from .abstract import Page -from ..utils.enums import SourceType, ALL_SOURCE_TYPES -from ..utils.enums.album import AlbumType, AlbumStatus -from ..objects import ( - Artist, - Source, - Song, - Album, - ID3Timestamp, - FormattedText, - Label, - Target, - DatabaseObject, - Lyrics, - Artwork -) +from ..objects import (Album, Artist, DatabaseObject, + FormattedText, ID3Timestamp, Label, Lyrics, Song, + Source, Target) +from ..objects.artwork import (Artwork, ArtworkVariant, ArtworkCollection) +from ..utils import shared, string_processing from ..utils.config import logging_settings, main_settings -from ..utils import string_processing, shared +from ..utils.enums import ALL_SOURCE_TYPES, SourceType +from ..utils.enums.album import AlbumStatus, AlbumType from ..utils.string_processing import clean_song_title -from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult +from ..utils.support_classes.query import Query +from .abstract import Page """ https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent @@ -457,17 +448,17 @@ class Musify(Page): for album_info in soup.find_all("ul", {"class": "album-info"}): list_element: BeautifulSoup = album_info.find("li") - if list_element is not None: - artist_soup: BeautifulSoup - for artist_soup in list_element.find_all("a"): - artist_source_list = [] - href = artist_soup["href"] - if href is not None: - artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)] - artist_list.append(Artist( - name=artist_soup.text.strip(), - source_list=artist_source_list - )) + if list_element is not None: + artist_soup: BeautifulSoup + for artist_soup in list_element.find_all("a"): + artist_source_list = [] + href = artist_soup["href"] + if href is not None: + artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)] + artist_list.append(Artist( + name=artist_soup.text.strip(), + source_list=artist_source_list + )) # breadcrums breadcrumb_list_element_list: List[BeautifulSoup] = soup.find_all("ol", {"class": "breadcrumb"}) @@ -485,11 +476,11 @@ class Musify(Page): track_name = list_points[4].text.strip() - # artwork - artwork: Artwork = Artwork() + # album artwork + artwork: ArtworkCollection = ArtworkCollection() album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"}) for album_image_element in album_image_element_list: - artwork.append(url=album_image_element.get("data-src", album_image_element.get("src"))) + artwork.add_data(url=album_image_element.get("data-src", album_image_element.get("src"))) # lyrics lyrics_container: List[BeautifulSoup] = soup.find_all("div", {"id": "tabLyrics"}) @@ -754,11 +745,18 @@ class Musify(Page): except ValueError: self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}") + # album artwork + album_artwork: ArtworkCollection = ArtworkCollection() + album_artwork_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) + for album_artwork in album_artwork_list: + album_artwork.add_data(url=album_artwork.get("data-src", album_artwork.get("src"))) + return Album( title=name, source_list=source_list, artist_list=artist_list, - date=date + date=date, + artwork=album_artwork ) def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: @@ -795,6 +793,8 @@ class Musify(Page): new_song = self._parse_song_card(card_soup) album.song_collection.append(new_song) + + album.update_tracksort() return album @@ -914,11 +914,18 @@ class Musify(Page): if note_soup is not None: notes.html = note_soup.decode_contents() + # get artist profile artwork + main_artist_artwork: ArtworkCollection = ArtworkCollection() + artist_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) + for artist_image_element in artist_image_element_list: + main_artist_artwork.add_data(url=artist_image_element.get("data-src", artist_image_element.get("src"))) + return Artist( name=name, country=country, source_list=source_list, - notes=notes + notes=notes, + artwork=main_artist_artwork ) def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album: @@ -1056,19 +1063,29 @@ class Musify(Page): artist.album_collection.append(album) + def _fetch_artist_artwork(self, source: str, artist: Artist, **kwargs): + # artist artwork + artwork_gallery = self.get_soup_from_response(self.connection.get(source.strip().strip("/") + "/photos")) + if artwork_gallery is not None: + gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent") + gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img") + for gallery_image_element in gallery_image_element_list: + artist.artwork.append(ArtworkVariant(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247)) + + def fetch_artist(self, source: Source, **kwargs) -> Artist: """ TODO [x] discography [x] attributes - [] picture gallery + [x] picture gallery """ url = parse_url(source.url) artist = self._fetch_initial_artist(url, source=source, **kwargs) self._fetch_artist_discography(artist, url, artist.name, **kwargs) - + self._fetch_artist_artwork(url.url, artist, **kwargs) return artist def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 08e2207..96006ea 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -1,46 +1,33 @@ -from __future__ import unicode_literals, annotations +from __future__ import annotations, unicode_literals -from typing import Dict, List, Optional, Set, Type -from urllib.parse import urlparse, urlunparse, quote, parse_qs, urlencode +import json import logging import random -import json -from dataclasses import dataclass import re -from functools import lru_cache from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from typing import Dict, List, Optional, Set, Type +from urllib.parse import parse_qs, quote, urlencode, urlparse, urlunparse import youtube_dl from youtube_dl.extractor.youtube import YoutubeIE from youtube_dl.utils import DownloadError +from ...connection import Connection +from ...objects import Album, Artist, ArtworkCollection +from ...objects import DatabaseObject as DataObject +from ...objects import (FormattedText, ID3Timestamp, Label, Lyrics, Song, + Source, Target) +from ...utils import dump_to_file, get_current_millis, traverse_json_path +from ...utils.config import logging_settings, main_settings, youtube_settings +from ...utils.enums import ALL_SOURCE_TYPES, SourceType +from ...utils.enums.album import AlbumType from ...utils.exception.config import SettingValueError -from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING from ...utils.string_processing import clean_song_title -from ...utils import get_current_millis, traverse_json_path - -from ...utils import dump_to_file - -from ..abstract import Page -from ...objects import ( - DatabaseObject as DataObject, - Source, - FormattedText, - ID3Timestamp, - Artwork, - Artist, - Song, - Album, - Label, - Target, - Lyrics, -) -from ...connection import Connection -from ...utils.enums import SourceType, ALL_SOURCE_TYPES -from ...utils.enums.album import AlbumType from ...utils.support_classes.download_result import DownloadResult - +from ..abstract import Page from ._list_render import parse_renderer from ._music_object_render import parse_run_element from .super_youtube import SuperYouTube @@ -438,6 +425,7 @@ class YoutubeMusic(SuperYouTube): data: dict = r.json() header = data.get("header", {}) musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {}) + musicImmersiveHeaderRenderer = header.get("musicImmersiveHeaderRenderer", {}) title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) @@ -450,6 +438,11 @@ class YoutubeMusic(SuperYouTube): renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[ 0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) + # fetch artist artwork + artist_thumbnails = musicImmersiveHeaderRenderer.get("thumbnail", {}).get("musicThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) + for artist_thumbnail in artist_thumbnails: + artist.artwork.append(artist_thumbnail) + if DEBUG: for i, content in enumerate(renderer_list): dump_to_file(f"{i}-artists-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False) @@ -496,7 +489,12 @@ class YoutubeMusic(SuperYouTube): # album details header = data.get("header", {}) musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {}) - + + # album artwork + album_thumbnails = musicDetailHeaderRenderer.get("thumbnail", {}).get("croppedSquareThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) + for album_thumbnail in album_thumbnails: + album.artwork.append(value=album_thumbnail) + title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) @@ -646,7 +644,7 @@ class YoutubeMusic(SuperYouTube): note=ydl_res.get("descriptions"), album_list=album_list, length=int(ydl_res.get("duration", 0)) * 1000, - artwork=Artwork(*ydl_res.get("thumbnails", [])), + artwork=ArtworkCollection(*ydl_res.get("thumbnails", [])), artist_list=artist_list, source_list=[Source( self.SOURCE_TYPE, @@ -685,7 +683,7 @@ class YoutubeMusic(SuperYouTube): for album in song.album_list: album.album_type = AlbumType.LIVE_ALBUM for thumbnail in video_details.get("thumbnails", []): - song.artwork.append(**thumbnail) + song.artwork.add_data(**thumbnail) song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId"))) diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index a8d658b..95013e5 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -1,15 +1,18 @@ -from datetime import datetime -from pathlib import Path +import inspect import json import logging -import inspect -from typing import List, Union +from datetime import datetime +from functools import lru_cache +from pathlib import Path +from typing import Any, List, Union -from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK from .config import config, read_config, write_config from .enums.colors import BColors -from .path_manager import LOCATIONS from .hacking import merge_args +from .path_manager import LOCATIONS +from .shared import (DEBUG, DEBUG_DUMP, DEBUG_LOGGING, DEBUG_OBJECT_TRACE, + DEBUG_OBJECT_TRACE_CALLSTACK, DEBUG_TRACE, URL_PATTERN) +from .string_processing import hash_url, is_url, unify """ IO functions @@ -125,4 +128,35 @@ def get_current_millis() -> int: def get_unix_time() -> int: - return int(datetime.now().timestamp()) \ No newline at end of file + return int(datetime.now().timestamp()) + + +@lru_cache +def custom_hash(value: Any) -> int: + if is_url(value): + value = hash_url(value) + elif isinstance(value, str): + try: + value = int(value) + except ValueError: + value = unify(value) + + return hash(value) + + +def create_dataclass_instance(t, data: dict): + """Creates an instance of a dataclass with the given data. + It filters out all data key, which has no attribute in the dataclass. + + Args: + t (Type): The dataclass type class + data (dict): the attribute to pass into the constructor + + Returns: + Tuple[Type, dict]: The created instance and a dict, containing the data, which was not used in the creation + """ + + needed_data = {k: v for k, v in data.items() if k in t.__dataclass_fields__} + removed_data = {k: v for k, v in data.items() if k not in t.__dataclass_fields__} + + return t(**needed_data), removed_data diff --git a/music_kraken/utils/config/__init__.py b/music_kraken/utils/config/__init__.py index e1def0a..2543d8a 100644 --- a/music_kraken/utils/config/__init__.py +++ b/music_kraken/utils/config/__init__.py @@ -1,11 +1,8 @@ from typing import Tuple from .config import Config -from .config_files import ( - main_config, - logging_config, - youtube_config, -) +from .config_files import main_config, logging_config, youtube_config + _sections: Tuple[Config, ...] = ( main_config.config, diff --git a/music_kraken/utils/config/config_files/main_config.py b/music_kraken/utils/config/config_files/main_config.py index decdf3b..a7b2ae9 100644 --- a/music_kraken/utils/config/config_files/main_config.py +++ b/music_kraken/utils/config/config_files/main_config.py @@ -18,6 +18,7 @@ config = Config(( AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format. You can use Audio formats which support ID3.2 and ID3.1, but you will have cleaner Metadata using ID3.2."""), + Attribute(name="image_format", default_value="jpeg", description="This Changes the format in which images are getting downloaded"), Attribute(name="result_history", default_value=True, description="""If enabled, you can go back to the previous results. The consequence is a higher meory consumption, because every result is saved."""), @@ -28,6 +29,7 @@ The further you choose to be able to go back, the higher the memory usage. EmptyLine(), Attribute(name="preferred_artwork_resolution", default_value=1000), + Attribute(name="download_artist_artworks", default_value=True, description="Enables the fetching of artist galleries."), EmptyLine(), @@ -44,6 +46,7 @@ This means for example, the Studio Albums and EP's are always in front of Single - album_type The folder music kraken should put the songs into."""), Attribute(name="download_file", default_value="{song}.{audio_format}", description="The filename of the audio file."), + Attribute(name="artist_artwork_path", default_value="{genre}/{artist}/{artist}_{image_number}.{image_format}", description="The Path to download artist images to."), SelectAttribute(name="album_type_blacklist", default_value=[ "Compilation Album", "Live Album", @@ -152,10 +155,13 @@ class SettingsStructure(TypedDict): # artwork preferred_artwork_resolution: int + image_format: str + download_artist_artworks: bool # paths music_directory: Path temp_directory: Path + artist_artwork_path: Path log_file: Path not_a_genre_regex: List[str] ffmpeg_binary: Path diff --git a/music_kraken/utils/enums/__init__.py b/music_kraken/utils/enums/__init__.py index 28f0b9f..67e4586 100644 --- a/music_kraken/utils/enums/__init__.py +++ b/music_kraken/utils/enums/__init__.py @@ -1,7 +1,11 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Optional, TYPE_CHECKING, Type +from enum import Enum +from typing import TYPE_CHECKING, Optional, Type + +from mutagen.id3 import PictureType + if TYPE_CHECKING: from ...pages.abstract import Page @@ -52,3 +56,73 @@ class ALL_SOURCE_TYPES: MANUAL = SourceType(name="manual") PRESET = SourceType(name="preset") + + +class PictureType(Enum): + """Enumeration of image types defined by the ID3 standard for the APIC + frame, but also reused in WMA/FLAC/VorbisComment. + + This is copied from mutagen.id3.PictureType + """ + + OTHER = 0 + + FILE_ICON = 1 + """32x32 pixels 'file icon' (PNG only)""" + + OTHER_FILE_ICON = 2 + """Other file icon""" + + COVER_FRONT = 3 + """Cover (front)""" + + COVER_BACK = 4 + """Cover (back)""" + + LEAFLET_PAGE = 5 + """Leaflet page""" + + MEDIA = 6 + """Media (e.g. label side of CD)""" + + LEAD_ARTIST = 7 + """Lead artist/lead performer/soloist""" + + ARTIST = 8 + """Artist/performer""" + + CONDUCTOR = 9 + """Conductor""" + + BAND = 10 + """Band/Orchestra""" + + COMPOSER = 11 + """Composer""" + + LYRICIST = 12 + """Lyricist/text writer""" + + RECORDING_LOCATION = 13 + """Recording Location""" + + DURING_RECORDING = 14 + """During recording""" + + DURING_PERFORMANCE = 15 + """During performance""" + + SCREEN_CAPTURE = 16 + """Movie/video screen capture""" + + FISH = 17 + """A bright colored fish""" + + ILLUSTRATION = 18 + """Illustration""" + + BAND_LOGOTYPE = 19 + """Band/artist logotype""" + + PUBLISHER_LOGOTYPE = 20 + """Publisher/Studio logotype""" diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index b76e3fc..d001c1e 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -1,13 +1,15 @@ -from typing import Tuple, Union, Optional -from pathlib import Path +import re import string from functools import lru_cache +from pathlib import Path +from typing import Any, Optional, Tuple, Union +from urllib.parse import ParseResult, parse_qs, urlparse -from transliterate.exceptions import LanguageDetectionError -from transliterate import translit from pathvalidate import sanitize_filename -from urllib.parse import urlparse, ParseResult, parse_qs +from transliterate import translit +from transliterate.exceptions import LanguageDetectionError +from .shared import URL_PATTERN COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = ( "(official video)", @@ -229,3 +231,13 @@ def shorten_display_url(url: str, max_length: int = 150, chars_at_end: int = 4, return url return url[:max_length] + shorten_string + url[-chars_at_end:] + +def is_url(value: Any) -> bool: + if isinstance(value, ParseResult): + return True + + if not isinstance(value, str): + return True + + # value has to be a string + return re.match(URL_PATTERN, value) is not None diff --git a/music_kraken/utils/support_classes/download_result.py b/music_kraken/utils/support_classes/download_result.py index 5458a34..67cb1c1 100644 --- a/music_kraken/utils/support_classes/download_result.py +++ b/music_kraken/utils/support_classes/download_result.py @@ -1,9 +1,13 @@ -from dataclasses import dataclass, field -from typing import List, Tuple +from __future__ import annotations -from ...utils.config import main_settings, logging_settings +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, List, Tuple + +if TYPE_CHECKING: + from ...objects import Target + +from ...utils.config import logging_settings, main_settings from ...utils.enums.colors import BColors -from ...objects import Target UNIT_PREFIXES: List[str] = ["", "k", "m", "g", "t"] UNIT_DIVISOR = 1024