From 5d26fdbf9436df8d5ab2bf622d65662ee2d11c79 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 4 Jun 2024 07:58:18 +0200 Subject: [PATCH 01/23] Artwork gallery Musify --- music_kraken/objects/song.py | 5 +++++ music_kraken/pages/musify.py | 43 ++++++++++++++++++++++++------------ 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 980bc08..37f4269 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -477,6 +477,8 @@ class Artist(Base): general_genre: str unformatted_location: str + artwork: List[Artwork] + source_collection: SourceCollection contact_collection: Collection[Contact] @@ -493,6 +495,8 @@ class Artist(Base): "lyrical_themes": list, "general_genre": lambda: "", + "artwork": list, + "source_collection": SourceCollection, "album_collection": Collection, "contact_collection": Collection, @@ -511,6 +515,7 @@ class Artist(Base): notes: FormattedText = None, lyrical_themes: List[str] = None, general_genre: str = None, + artwork: List[Artwork] = None, unformatted_location: str = None, source_list: List[Source] = None, contact_list: List[Contact] = None, diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index e8078fb..0374a86 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -457,17 +457,17 @@ class Musify(Page): for album_info in soup.find_all("ul", {"class": "album-info"}): list_element: BeautifulSoup = album_info.find("li") - if list_element is not None: - artist_soup: BeautifulSoup - for artist_soup in list_element.find_all("a"): - artist_source_list = [] - href = artist_soup["href"] - if href is not None: - artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)] - artist_list.append(Artist( - name=artist_soup.text.strip(), - source_list=artist_source_list - )) + if list_element is not None: + artist_soup: BeautifulSoup + for artist_soup in list_element.find_all("a"): + artist_source_list = [] + href = artist_soup["href"] + if href is not None: + artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)] + artist_list.append(Artist( + name=artist_soup.text.strip(), + source_list=artist_source_list + )) # breadcrums breadcrumb_list_element_list: List[BeautifulSoup] = soup.find_all("ol", {"class": "breadcrumb"}) @@ -485,7 +485,7 @@ class Musify(Page): track_name = list_points[4].text.strip() - # artwork + # album artwork artwork: Artwork = Artwork() album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"}) for album_image_element in album_image_element_list: @@ -918,7 +918,8 @@ class Musify(Page): name=name, country=country, source_list=source_list, - notes=notes + notes=notes, + artwork=self._fetch_artist_artwork(soup, **kwargs) ) def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album: @@ -1056,6 +1057,20 @@ class Musify(Page): artist.album_collection.append(album) + def _fetch_artist_artwork(self, soup: BeautifulSoup, **kwargs): + # artist artwork + artist_artwork: List[Artwork] = Artwork() + artist_a_element_list: List[BeautifulSoup] = soup.find_all("a") + for artist_a_element in artist_a_element_list: + if artist_a_element.find_all("img", {"class": "artist-img"}).count() > 0: + artwork_gallery = self.connection.get(artist_a_element("data-src", artist_a_element.get("href"))) + if artwork_gallery is not None: + gallery_image_element_list: List[BeautifulSoup] = artwork_gallery.find_all("img", {"class": "artist-img"}) + for gallery_image_element in gallery_image_element_list: + artist_artwork.push(Artwork(url=gallery_image_element.get("data-src", gallery_image_element.get("src")))) + + return artist_artwork + def fetch_artist(self, source: Source, **kwargs) -> Artist: """ TODO @@ -1068,7 +1083,7 @@ class Musify(Page): artist = self._fetch_initial_artist(url, source=source, **kwargs) self._fetch_artist_discography(artist, url, artist.name, **kwargs) - + self._fetch_artist_artwork(artist, **kwargs) return artist def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: From bc19a94e7f167ccb30ac9d6c36800b1b269ee038 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 4 Jun 2024 10:09:17 +0200 Subject: [PATCH 02/23] feat: added parent artwork options --- music_kraken/objects/artwork.py | 45 +++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 178edf6..4421f05 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -1,18 +1,14 @@ from __future__ import annotations -from typing import List, Optional, Dict, Tuple, Type, Union, TypedDict - -from .collection import Collection -from .metadata import ( - Mapping as id3Mapping, - ID3Timestamp, - Metadata -) -from ..utils.string_processing import unify, hash_url - -from .parents import OuterProxy as Base +from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union from ..utils.config import main_settings +from ..utils.string_processing import hash_url, unify +from .collection import Collection +from .metadata import ID3Timestamp +from .metadata import Mapping as id3Mapping +from .metadata import Metadata +from .parents import OuterProxy as Base class ArtworkVariant(TypedDict): @@ -23,7 +19,9 @@ class ArtworkVariant(TypedDict): class Artwork: - def __init__(self, *variants: List[ArtworkVariant]) -> None: + def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[Artwork] = None) -> None: + self.parent_artworks: Set[Artwork] = parent_artworks or set() + self._variant_mapping: Dict[str, ArtworkVariant] = {} for variant in variants: @@ -36,7 +34,7 @@ class Artwork: def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None: if url is None: return - + self._variant_mapping[hash_url(url=url)] = { "url": url, "width": width, @@ -44,21 +42,36 @@ class Artwork: "deviation": self._calculate_deviation(width, height), } + @property + def flat_empty(self) -> bool: + return len(self._variant_mapping.keys()) <= 0 + + def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]: + return min(artwork_variants, key=lambda x: x["deviation"]) + @property def best_variant(self) -> ArtworkVariant: - if len(self._variant_mapping.keys()) <= 0: - return None - return min(self._variant_mapping.values(), key=lambda x: x["deviation"]) + if self.flat_empty: + return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks]) + return self._get_best_from_list(self._variant_mapping.values()) def get_variant_name(self, variant: ArtworkVariant) -> str: return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" def __merge__(self, other: Artwork, **kwargs) -> None: + self.parent_artworks.update(other.parent_artworks) + for key, value in other._variant_mapping.items(): if key not in self._variant_mapping: self._variant_mapping[key] = value + def __hash__(self) -> int: + return id(self) + def __eq__(self, other: Artwork) -> bool: + if hash(self) == hash(other): + return True + if not isinstance(other, Artwork): return False return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) From 49c37345260e9873c1e387bbc7c15f1ed3bc36b8 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 4 Jun 2024 10:11:46 +0200 Subject: [PATCH 03/23] feat: added hooks for collection on append --- music_kraken/objects/collection.py | 48 ++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index f1d7e75..0296c70 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -1,16 +1,44 @@ from __future__ import annotations -from collections import defaultdict -from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set import copy +from collections import defaultdict +from dataclasses import dataclass +from typing import (Any, Callable, Dict, Generator, Generic, Iterable, + Iterator, List, Optional, Set, Tuple, TypeVar, Union) -from .parents import OuterProxy -from ..utils import object_trace -from ..utils import output, BColors +from ..utils import BColors, object_trace, output +from .parents import InnerData, OuterProxy T = TypeVar('T', bound=OuterProxy) +@dataclass +class AppendHookArguments: + """ + This class is used to store the arguments for the append hook. + The best explanation is with an examples: + + ``` + # this is the action that triggers the append hook + album = Album() + song = Song() + album.song_collection.append(song) + ``` + + In this case, the append hook is triggered with the following arguments: + ``` + AppendHookArguments( + collection=album.song_collection, + new_object=song, + collection_root_objects=[album] + ) + ``` + """ + + collection: Collection + new_object: T + collection_root_objects: Set[InnerData] + class Collection(Generic[T]): __is_collection__ = True @@ -27,6 +55,7 @@ class Collection(Generic[T]): sync_on_append: Dict[str, Collection] = None, append_object_to_attribute: Dict[str, T] = None, extend_object_to_attribute: Dict[str, Collection] = None, + append_callbacks: List[Callable[[AppendHookArguments], None]] = None, ) -> None: self._collection_for: dict = dict() @@ -41,6 +70,7 @@ class Collection(Generic[T]): self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self.pull_from: List[Collection] = [] self.push_to: List[Collection] = [] + self.append_callbacks: List[Callable[[AppendHookArguments], None]] = append_callbacks or [] # This is to cleanly unmap previously mapped items by their id self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict) @@ -141,6 +171,14 @@ class Collection(Generic[T]): for attribute, new_object in self.append_object_to_attribute.items(): other.__getattribute__(attribute).append(new_object, **kwargs) + append_hook_args = AppendHookArguments( + collection=self, + new_object=other, + collection_root_objects=self._collection_for.keys(), + ) + for callback in self.append_callbacks: + callback(append_hook_args) + def append(self, other: Optional[T], **kwargs): """ If an object, that represents the same entity exists in a relevant collection, From eb8fd5e580f0d14411a46ff6d15f9f3d8faeb093 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 4 Jun 2024 10:13:34 +0200 Subject: [PATCH 04/23] feat: added artist.artwork to data structure --- music_kraken/objects/song.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 37f4269..00a951b 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -477,7 +477,7 @@ class Artist(Base): general_genre: str unformatted_location: str - artwork: List[Artwork] + artwork: Artwork source_collection: SourceCollection contact_collection: Collection[Contact] @@ -495,7 +495,7 @@ class Artist(Base): "lyrical_themes": list, "general_genre": lambda: "", - "artwork": list, + "artwork": Artwork, "source_collection": SourceCollection, "album_collection": Collection, @@ -515,7 +515,7 @@ class Artist(Base): notes: FormattedText = None, lyrical_themes: List[str] = None, general_genre: str = None, - artwork: List[Artwork] = None, + artwork: Artwork = None, unformatted_location: str = None, source_list: List[Source] = None, contact_list: List[Contact] = None, From 1ef4b27f288e52868c8404f255358817e685f6a4 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 4 Jun 2024 10:31:23 +0200 Subject: [PATCH 05/23] feat: added album.artwork to datastructure --- music_kraken/objects/song.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 00a951b..48b52c4 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -8,7 +8,7 @@ import copy import pycountry from ..utils.enums.album import AlbumType, AlbumStatus -from .collection import Collection +from .collection import Collection, AppendHookArguments from .formatted_text import FormattedText from .lyrics import Lyrics from .contact import Contact @@ -144,6 +144,14 @@ class Song(Base): UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "feature_artist_collection", "album_collection") TITEL = "title" + @staticmethod + def register_artwork_parent(append_hook_arguments: AppendHookArguments): + album: Album = append_hook_arguments.new_object + + song: Song + for song in append_hook_arguments.collection_root_objects: + song.artwork.parent_artworks.add(album.artwork) + def __init_collections__(self) -> None: self.feature_artist_collection.push_to = [self.artist_collection] self.artist_collection.pull_from = [self.feature_artist_collection] @@ -161,6 +169,7 @@ class Song(Base): self.feature_artist_collection.extend_object_to_attribute = { "album_collection": self.album_collection } + self.album_collection.append_callbacks = set((Song.register_artwork_parent, )) def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: @@ -249,6 +258,7 @@ class Album(Base): albumsort: int notes: FormattedText + artwork: Artwork source_collection: SourceCollection song_collection: Collection[Song] @@ -268,6 +278,7 @@ class Album(Base): "date": ID3Timestamp, "notes": FormattedText, + "artwork": lambda: Artwork(crop_images=False), "source_collection": SourceCollection, "song_collection": Collection, @@ -290,6 +301,7 @@ class Album(Base): barcode: str = None, albumsort: int = None, notes: FormattedText = None, + artwork: Artwork = None, source_list: List[Source] = None, artist_list: List[Artist] = None, song_list: List[Song] = None, @@ -304,6 +316,13 @@ class Album(Base): DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection") + @staticmethod + def register_artwork_parent(append_hook_arguments: AppendHookArguments): + song: Song = append_hook_arguments.new_object + + for root_object in append_hook_arguments.collection_root_objects: + song.artwork.parent_artworks.add(root_object.artwork) + def __init_collections__(self): self.feature_artist_collection.push_to = [self.artist_collection] self.artist_collection.pull_from = [self.feature_artist_collection] @@ -322,6 +341,8 @@ class Album(Base): "label_collection": self.label_collection } + self.song_collection.append_callbacks = set((Album.register_artwork_parent, )) + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: self.song_collection.extend(object_list) From 05ee09e25fc196f805b5ab6d7297c62054784db0 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 4 Jun 2024 10:58:21 +0200 Subject: [PATCH 06/23] feat: musify completed --- music_kraken/pages/musify.py | 41 +++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 0374a86..c8e51d1 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -754,11 +754,18 @@ class Musify(Page): except ValueError: self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}") + # album artwork + album_artwork: Artwork = Artwork() + album_artwork_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) + for album_artwork in album_artwork_list: + album_artwork.append(url=album_artwork.get("data-src", album_artwork.get("src"))) + return Album( title=name, source_list=source_list, artist_list=artist_list, - date=date + date=date, + artwork=album_artwork ) def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: @@ -795,6 +802,8 @@ class Musify(Page): new_song = self._parse_song_card(card_soup) album.song_collection.append(new_song) + + album.update_tracksort() return album @@ -914,12 +923,18 @@ class Musify(Page): if note_soup is not None: notes.html = note_soup.decode_contents() + # get artist profile artwork + main_artist_artwork: Artwork = Artwork() + artist_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) + for artist_image_element in artist_image_element_list: + main_artist_artwork.append(url=artist_image_element.get("data-src", artist_image_element.get("src"))) + return Artist( name=name, country=country, source_list=source_list, notes=notes, - artwork=self._fetch_artist_artwork(soup, **kwargs) + artwork=main_artist_artwork ) def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album: @@ -1057,33 +1072,29 @@ class Musify(Page): artist.album_collection.append(album) - def _fetch_artist_artwork(self, soup: BeautifulSoup, **kwargs): + def _fetch_artist_artwork(self, source: str, artist: Artist, **kwargs): # artist artwork - artist_artwork: List[Artwork] = Artwork() - artist_a_element_list: List[BeautifulSoup] = soup.find_all("a") - for artist_a_element in artist_a_element_list: - if artist_a_element.find_all("img", {"class": "artist-img"}).count() > 0: - artwork_gallery = self.connection.get(artist_a_element("data-src", artist_a_element.get("href"))) - if artwork_gallery is not None: - gallery_image_element_list: List[BeautifulSoup] = artwork_gallery.find_all("img", {"class": "artist-img"}) - for gallery_image_element in gallery_image_element_list: - artist_artwork.push(Artwork(url=gallery_image_element.get("data-src", gallery_image_element.get("src")))) + artwork_gallery = self.get_soup_from_response(self.connection.get(source.strip().strip("/") + "/photos")) + if artwork_gallery is not None: + gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent") + gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img") + for gallery_image_element in gallery_image_element_list: + artist.artwork.append(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) - return artist_artwork def fetch_artist(self, source: Source, **kwargs) -> Artist: """ TODO [x] discography [x] attributes - [] picture gallery + [x] picture gallery """ url = parse_url(source.url) artist = self._fetch_initial_artist(url, source=source, **kwargs) self._fetch_artist_discography(artist, url, artist.name, **kwargs) - self._fetch_artist_artwork(artist, **kwargs) + self._fetch_artist_artwork(url.url, artist, **kwargs) return artist def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: From d51e3a56fb64dfa7366e1c89237d133307a59d6a Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 4 Jun 2024 11:04:00 +0200 Subject: [PATCH 07/23] feat: structure changes to artwork and collection objects --- music_kraken/objects/artwork.py | 3 ++- music_kraken/objects/collection.py | 21 ++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 4421f05..1ff9edc 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -19,7 +19,8 @@ class ArtworkVariant(TypedDict): class Artwork: - def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[Artwork] = None) -> None: + def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[Artwork] = None, crop_images: bool = True) -> None: + self.crop_images: bool = crop_images self.parent_artworks: Set[Artwork] = parent_artworks or set() self._variant_mapping: Dict[str, ArtworkVariant] = {} diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 0296c70..687b069 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -19,19 +19,18 @@ class AppendHookArguments: The best explanation is with an examples: ``` - # this is the action that triggers the append hook - album = Album() - song = Song() - album.song_collection.append(song) + album = Album() + song = Song() + album.song_collection.append(song) ``` In this case, the append hook is triggered with the following arguments: ``` - AppendHookArguments( - collection=album.song_collection, - new_object=song, - collection_root_objects=[album] - ) + AppendHookArguments( + collection=album.song_collection, + new_object=song, + collection_root_objects=[album] + ) ``` """ @@ -55,7 +54,7 @@ class Collection(Generic[T]): sync_on_append: Dict[str, Collection] = None, append_object_to_attribute: Dict[str, T] = None, extend_object_to_attribute: Dict[str, Collection] = None, - append_callbacks: List[Callable[[AppendHookArguments], None]] = None, + append_callbacks: Set[Callable[[AppendHookArguments], None]] = None, ) -> None: self._collection_for: dict = dict() @@ -70,7 +69,7 @@ class Collection(Generic[T]): self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self.pull_from: List[Collection] = [] self.push_to: List[Collection] = [] - self.append_callbacks: List[Callable[[AppendHookArguments], None]] = append_callbacks or [] + self.append_callbacks: Set[Callable[[AppendHookArguments], None]] = append_callbacks or set() # This is to cleanly unmap previously mapped items by their id self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict) From d83e40ed838dfaa7ddb8b9de7161e2b845cda724 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 4 Jun 2024 11:44:48 +0200 Subject: [PATCH 08/23] feat: config changes --- music_kraken/utils/config/config_files/main_config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/music_kraken/utils/config/config_files/main_config.py b/music_kraken/utils/config/config_files/main_config.py index decdf3b..8ed3ef1 100644 --- a/music_kraken/utils/config/config_files/main_config.py +++ b/music_kraken/utils/config/config_files/main_config.py @@ -18,6 +18,7 @@ config = Config(( AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format. You can use Audio formats which support ID3.2 and ID3.1, but you will have cleaner Metadata using ID3.2."""), + Attribute(name="image_format", default_value="jpg", description="This Changes the format in which images are getting downloaded") Attribute(name="result_history", default_value=True, description="""If enabled, you can go back to the previous results. The consequence is a higher meory consumption, because every result is saved."""), @@ -28,6 +29,7 @@ The further you choose to be able to go back, the higher the memory usage. EmptyLine(), Attribute(name="preferred_artwork_resolution", default_value=1000), + Attribute(name="download_artist_artworks", default_value=True, description="Changes if the artists Profile picture is being downloaded."), EmptyLine(), @@ -44,6 +46,7 @@ This means for example, the Studio Albums and EP's are always in front of Single - album_type The folder music kraken should put the songs into."""), Attribute(name="download_file", default_value="{song}.{audio_format}", description="The filename of the audio file."), + Attribute(name="artist_artwork_path" default_value="{genre}/{artist}/{artist}.{image_format}", description="The Path to download artist images to."), SelectAttribute(name="album_type_blacklist", default_value=[ "Compilation Album", "Live Album", @@ -152,10 +155,13 @@ class SettingsStructure(TypedDict): # artwork preferred_artwork_resolution: int + image_format: str + download_artist_artworks: bool # paths music_directory: Path temp_directory: Path + artist_artwork_path: Path log_file: Path not_a_genre_regex: List[str] ffmpeg_binary: Path From 7d23ecac066dd96f83080f15f526920ed7be0a16 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Wed, 5 Jun 2024 08:34:37 +0200 Subject: [PATCH 09/23] feat: bandcamp artist artwork --- music_kraken/download/page_attributes.py | 81 +++++++++++++++++-- music_kraken/objects/artwork.py | 6 +- music_kraken/pages/bandcamp.py | 6 +- music_kraken/utils/config/__init__.py | 7 +- .../utils/config/config_files/main_config.py | 6 +- 5 files changed, 89 insertions(+), 17 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 1db24be..eba3274 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -4,6 +4,8 @@ from pathlib import Path import re import logging +from PIL import Image + from . import FetchOptions, DownloadOptions from .results import SearchResults from ..objects import ( @@ -17,6 +19,7 @@ from ..objects import ( Artist, Label, ) +from ..objects.artwork import ArtworkVariant from ..audio import write_metadata_to_target, correct_codec from ..utils import output, BColors from ..utils.string_processing import fit_to_file_system @@ -29,9 +32,11 @@ from ..utils.support_classes.download_result import DownloadResult from ..utils.exception import MKMissingNameException from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES +from ..connection import Connection from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS +artwork_connection: Connection = Connection() ALL_PAGES: Set[Type[Page]] = { # EncyclopaediaMetallum, @@ -162,11 +167,64 @@ class Pages: return False + def download_artwork_variant_to_target(self, artwork_variant: ArtworkVariant, target: Target): + + r = artwork_connection.get( + url=artwork_variant["url"], + name=artwork_variant["url"], + ) + + temp_target: Target = Target.temp() + with temp_target.open("wb") as f: + f.write(r.content) + + converted_target: Target = Target.temp(file_extension=main_settings["image_format"]) + with Image.open(temp_target.file_path) as img: + # crop the image if it isn't square in the middle with minimum data loss + width, height = img.size + if width != height: + if width > height: + img = img.crop((width // 2 - height // 2, 0, width // 2 + height // 2, height)) + else: + img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2)) + + # resize the image to the preferred resolution + img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) + + # https://stackoverflow.com/a/59476938/16804841 + if img.mode != 'RGB': + img = img.convert('RGB') + + img.save(converted_target.file_path, main_settings["image_format"]) + + + + def _fetch_artist_artwork(self, artist: Artist, naming: dict): + naming: Dict[str, List[str]] = defaultdict(list, naming) + naming["artist"].append(artist.name) + naming["label"].extend([l.title_value for l in artist.label_collection]) + # removing duplicates from the naming, and process the strings + for key, value in naming.items(): + # https://stackoverflow.com/a/17016257 + naming[key] = list(dict.fromkeys(value)) + + artwork: Artwork = artist.artwork + for image_number, variant in enumerate(artwork): + naming["image_number"] = [str(image_number)] + + url: str = variant["url"] + + target = Target( + relative_to_music_dir=True, + file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming)) + ) + self.download_artwork_variant_to_target(variant, target) + def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: # fetch the given object self.fetch_details(data_object) output(f"\nDownloading {data_object.option_string}...", color=BColors.BOLD) - + # fetching all parent objects (e.g. if you only download a song) if not kwargs.get("fetched_upwards", False): to_fetch: List[DataObject] = [data_object] @@ -185,7 +243,17 @@ class Pages: to_fetch = new_to_fetch kwargs["fetched_upwards"] = True - + + naming = kwargs.get("naming", { + "genre": [genre], + "audio_format": [main_settings["audio_format"]], + "image_format": [main_settings["image_format"]] + }) + + # download artist artwork + if isinstance(data_object, Artist): + self._fetch_artist_artwork(artist=data_object, naming=naming) + # download all children download_result: DownloadResult = DownloadResult() for c in data_object.get_child_collections(): @@ -203,10 +271,7 @@ class Pages: I am able to do that, because duplicate values are removed later on. """ - self._download_song(data_object, naming={ - "genre": [genre], - "audio_format": [main_settings["audio_format"]], - }) + self._download_song(data_object, naming=naming) return download_result @@ -325,4 +390,6 @@ class Pages: _actual_page = self._source_to_page[source.source_type] - return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) \ No newline at end of file + return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) + + \ No newline at end of file diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 1ff9edc..6b5c096 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -10,7 +10,6 @@ from .metadata import Mapping as id3Mapping from .metadata import Metadata from .parents import OuterProxy as Base - class ArtworkVariant(TypedDict): url: str width: int @@ -76,3 +75,8 @@ class Artwork: if not isinstance(other, Artwork): return False return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) + + def __iter__(self) -> Generator[ArtworkVariant, None, None]: + yield from self._variant_mapping.values() + + \ No newline at end of file diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 1caf803..0eae4ac 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -239,6 +239,11 @@ class Bandcamp(Page): for subsoup in html_music_grid.find_all("li"): artist.album_collection.append(self._parse_album(soup=subsoup, initial_source=source)) + # artist artwork + artist_artwork: BeautifulSoup = soup.find("img", {"class":"band-photo"}) + if artist_artwork is not None: + artist.artwork.append(artist_artwork.get("data-src", artist_artwork.get("src"))) + for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})): data_blob = data_blob_soup["data-blob"] @@ -316,7 +321,6 @@ class Bandcamp(Page): artwork.append(url=_artwork_url, width=350, height=350) break - for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): if DEBUG: dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False) diff --git a/music_kraken/utils/config/__init__.py b/music_kraken/utils/config/__init__.py index e1def0a..2543d8a 100644 --- a/music_kraken/utils/config/__init__.py +++ b/music_kraken/utils/config/__init__.py @@ -1,11 +1,8 @@ from typing import Tuple from .config import Config -from .config_files import ( - main_config, - logging_config, - youtube_config, -) +from .config_files import main_config, logging_config, youtube_config + _sections: Tuple[Config, ...] = ( main_config.config, diff --git a/music_kraken/utils/config/config_files/main_config.py b/music_kraken/utils/config/config_files/main_config.py index 8ed3ef1..a7b2ae9 100644 --- a/music_kraken/utils/config/config_files/main_config.py +++ b/music_kraken/utils/config/config_files/main_config.py @@ -18,7 +18,7 @@ config = Config(( AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format. You can use Audio formats which support ID3.2 and ID3.1, but you will have cleaner Metadata using ID3.2."""), - Attribute(name="image_format", default_value="jpg", description="This Changes the format in which images are getting downloaded") + Attribute(name="image_format", default_value="jpeg", description="This Changes the format in which images are getting downloaded"), Attribute(name="result_history", default_value=True, description="""If enabled, you can go back to the previous results. The consequence is a higher meory consumption, because every result is saved."""), @@ -29,7 +29,7 @@ The further you choose to be able to go back, the higher the memory usage. EmptyLine(), Attribute(name="preferred_artwork_resolution", default_value=1000), - Attribute(name="download_artist_artworks", default_value=True, description="Changes if the artists Profile picture is being downloaded."), + Attribute(name="download_artist_artworks", default_value=True, description="Enables the fetching of artist galleries."), EmptyLine(), @@ -46,7 +46,7 @@ This means for example, the Studio Albums and EP's are always in front of Single - album_type The folder music kraken should put the songs into."""), Attribute(name="download_file", default_value="{song}.{audio_format}", description="The filename of the audio file."), - Attribute(name="artist_artwork_path" default_value="{genre}/{artist}/{artist}.{image_format}", description="The Path to download artist images to."), + Attribute(name="artist_artwork_path", default_value="{genre}/{artist}/{artist}_{image_number}.{image_format}", description="The Path to download artist images to."), SelectAttribute(name="album_type_blacklist", default_value=[ "Compilation Album", "Live Album", From 3118140f0f2d8c52c7940982216f1f5d0bcaa027 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Wed, 5 Jun 2024 09:47:02 +0200 Subject: [PATCH 10/23] feat: fix saving img in tmp --- music_kraken/download/page_attributes.py | 3 ++- music_kraken/pages/bandcamp.py | 26 ++++++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index eba3274..320192f 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -174,6 +174,7 @@ class Pages: name=artwork_variant["url"], ) + temp_target: Target = Target.temp() with temp_target.open("wb") as f: f.write(r.content) @@ -195,7 +196,7 @@ class Pages: if img.mode != 'RGB': img = img.convert('RGB') - img.save(converted_target.file_path, main_settings["image_format"]) + img.save(target.file_path, main_settings["image_format"]) diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 0eae4ac..2125c6e 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -292,9 +292,15 @@ class Bandcamp(Page): artist_source_list = [] if "@id" in artist_data: artist_source_list = [Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] + + + source_list: List[Source] = [source] + if "mainEntityOfPage" in data or "@id" in data: + source_list.append(Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))) + album = Album( title=data["name"].strip(), - source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], + source_list=source_list, date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), artist_list=[Artist( name=artist_data["name"].strip(), @@ -366,17 +372,29 @@ class Bandcamp(Page): for key, value in other_data.get("trackinfo", [{}])[0].get("file", {"": None}).items(): mp3_url = value + source_list: List[Source] = [source] + if "mainEntityOfPage" in data or "@id" in data: + source_list.append(Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)) + + source_list_album: List[Source] = [source] + if "@id" in album_data: + source_list_album.append(Source(self.SOURCE_TYPE, album_data["@id"])) + + source_list_artist: List[Source] = [source] + if "@id" in artist_data: + source_list_artist.append(Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))) + song = Song( title=clean_song_title(data["name"], artist_name=artist_data["name"]), - source_list=[source, Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)], + source_list=source_list, album_list=[Album( title=album_data["name"].strip(), date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), - source_list=[Source(self.SOURCE_TYPE, album_data["@id"])] + source_list=source_list_album )], artist_list=[Artist( name=artist_data["name"].strip(), - source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] + source_list=source_list_artist )], lyrics_list=self._fetch_lyrics(soup=soup) ) From df98a70717cd1dea82e1658025762f1580a0abea Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 5 Jun 2024 12:05:38 +0200 Subject: [PATCH 11/23] feat: renamed artwork --- music_kraken/objects/__init__.py | 33 ++++------- music_kraken/objects/artwork.py | 13 ++-- music_kraken/objects/song.py | 59 +++++++++---------- music_kraken/pages/bandcamp.py | 41 +++++-------- music_kraken/pages/genius.py | 41 +++++-------- music_kraken/pages/musify.py | 34 ++++------- .../pages/youtube_music/youtube_music.py | 47 ++++++--------- 7 files changed, 105 insertions(+), 163 deletions(-) diff --git a/music_kraken/objects/__init__.py b/music_kraken/objects/__init__.py index 7c7515d..2a85abf 100644 --- a/music_kraken/objects/__init__.py +++ b/music_kraken/objects/__init__.py @@ -1,27 +1,16 @@ from typing_extensions import TypeVar + +from .artwork import ArtworkCollection +from .collection import Collection +from .contact import Contact +from .country import Country +from .formatted_text import FormattedText +from .metadata import ID3Timestamp +from .metadata import Mapping as ID3Mapping +from .metadata import Metadata from .option import Options - -from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp - +from .parents import OuterProxy +from .song import Album, Artist, Label, Lyrics, Song, Target from .source import Source, SourceType -from .song import ( - Song, - Album, - Artist, - Target, - Lyrics, - Label -) - -from .formatted_text import FormattedText -from .collection import Collection - -from .country import Country -from .contact import Contact - -from .parents import OuterProxy - -from .artwork import Artwork - DatabaseObject = OuterProxy diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 6b5c096..eb5fa51 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -10,6 +10,7 @@ from .metadata import Mapping as id3Mapping from .metadata import Metadata from .parents import OuterProxy as Base + class ArtworkVariant(TypedDict): url: str width: int @@ -17,10 +18,10 @@ class ArtworkVariant(TypedDict): deviation: float -class Artwork: - def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[Artwork] = None, crop_images: bool = True) -> None: +class ArtworkCollection: + def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[ArtworkCollection] = None, crop_images: bool = True) -> None: self.crop_images: bool = crop_images - self.parent_artworks: Set[Artwork] = parent_artworks or set() + self.parent_artworks: Set[ArtworkCollection] = parent_artworks or set() self._variant_mapping: Dict[str, ArtworkVariant] = {} @@ -58,7 +59,7 @@ class Artwork: def get_variant_name(self, variant: ArtworkVariant) -> str: return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" - def __merge__(self, other: Artwork, **kwargs) -> None: + def __merge__(self, other: ArtworkCollection, **kwargs) -> None: self.parent_artworks.update(other.parent_artworks) for key, value in other._variant_mapping.items(): @@ -68,11 +69,11 @@ class Artwork: def __hash__(self) -> int: return id(self) - def __eq__(self, other: Artwork) -> bool: + def __eq__(self, other: ArtworkCollection) -> bool: if hash(self) == hash(other): return True - if not isinstance(other, Artwork): + if not isinstance(other, ArtworkCollection): return False return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 48b52c4..567fdd5 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -1,35 +1,32 @@ from __future__ import annotations +import copy import random from collections import defaultdict -from typing import List, Optional, Dict, Tuple, Type, Union -import copy +from typing import Dict, List, Optional, Tuple, Type, Union import pycountry -from ..utils.enums.album import AlbumType, AlbumStatus -from .collection import Collection, AppendHookArguments -from .formatted_text import FormattedText -from .lyrics import Lyrics -from .contact import Contact -from .artwork import Artwork -from .metadata import ( - Mapping as id3Mapping, - ID3Timestamp, - Metadata -) -from .option import Options -from .parents import OuterProxy, P -from .source import Source, SourceCollection -from .target import Target -from .country import Language, Country +from ..utils.config import main_settings +from ..utils.enums.album import AlbumStatus, AlbumType +from ..utils.enums.colors import BColors from ..utils.shared import DEBUG_PRINT_ID from ..utils.string_processing import unify - +from .artwork import ArtworkCollection +from .collection import AppendHookArguments, Collection +from .contact import Contact +from .country import Country, Language +from .formatted_text import FormattedText +from .lyrics import Lyrics +from .metadata import ID3Timestamp +from .metadata import Mapping as id3Mapping +from .metadata import Metadata +from .option import Options +from .parents import OuterProxy from .parents import OuterProxy as Base - -from ..utils.config import main_settings -from ..utils.enums.colors import BColors +from .parents import P +from .source import Source, SourceCollection +from .target import Target """ All Objects dependent @@ -89,7 +86,7 @@ class Song(Base): genre: str note: FormattedText tracksort: int - artwork: Artwork + artwork: ArtworkCollection source_collection: SourceCollection target_collection: Collection[Target] @@ -105,7 +102,7 @@ class Song(Base): "source_collection": SourceCollection, "target_collection": Collection, "lyrics_collection": Collection, - "artwork": Artwork, + "artwork": ArtworkCollection, "album_collection": Collection, "artist_collection": Collection, @@ -133,7 +130,7 @@ class Song(Base): feature_artist_list: List[Artist] = None, album_list: List[Album] = None, tracksort: int = 0, - artwork: Optional[Artwork] = None, + artwork: Optional[ArtworkCollection] = None, **kwargs ) -> None: real_kwargs = copy.copy(locals()) @@ -258,7 +255,7 @@ class Album(Base): albumsort: int notes: FormattedText - artwork: Artwork + artwork: ArtworkCollection source_collection: SourceCollection song_collection: Collection[Song] @@ -278,7 +275,7 @@ class Album(Base): "date": ID3Timestamp, "notes": FormattedText, - "artwork": lambda: Artwork(crop_images=False), + "artwork": lambda: ArtworkCollection(crop_images=False), "source_collection": SourceCollection, "song_collection": Collection, @@ -301,7 +298,7 @@ class Album(Base): barcode: str = None, albumsort: int = None, notes: FormattedText = None, - artwork: Artwork = None, + artwork: ArtworkCollection = None, source_list: List[Source] = None, artist_list: List[Artist] = None, song_list: List[Song] = None, @@ -498,7 +495,7 @@ class Artist(Base): general_genre: str unformatted_location: str - artwork: Artwork + artwork: ArtworkCollection source_collection: SourceCollection contact_collection: Collection[Contact] @@ -516,7 +513,7 @@ class Artist(Base): "lyrical_themes": list, "general_genre": lambda: "", - "artwork": Artwork, + "artwork": ArtworkCollection, "source_collection": SourceCollection, "album_collection": Collection, @@ -536,7 +533,7 @@ class Artist(Base): notes: FormattedText = None, lyrical_themes: List[str] = None, general_genre: str = None, - artwork: Artwork = None, + artwork: ArtworkCollection = None, unformatted_location: str = None, source_list: List[Source] = None, contact_list: List[Contact] = None, diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 2125c6e..0941f5e 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -1,33 +1,22 @@ -from typing import List, Optional, Type -from urllib.parse import urlparse, urlunparse import json from enum import Enum -from bs4 import BeautifulSoup -import pycountry +from typing import List, Optional, Type +from urllib.parse import urlparse, urlunparse + +import pycountry +from bs4 import BeautifulSoup -from ..objects import Source, DatabaseObject -from .abstract import Page -from ..objects import ( - Artist, - Source, - SourceType, - Song, - Album, - Label, - Target, - Contact, - ID3Timestamp, - Lyrics, - FormattedText, - Artwork, -) from ..connection import Connection +from ..objects import (Album, Artist, ArtworkCollection, Contact, + DatabaseObject, FormattedText, ID3Timestamp, Label, + Lyrics, Song, Source, SourceType, Target) from ..utils import dump_to_file -from ..utils.enums import SourceType, ALL_SOURCE_TYPES -from ..utils.support_classes.download_result import DownloadResult -from ..utils.string_processing import clean_song_title -from ..utils.config import main_settings, logging_settings +from ..utils.config import logging_settings, main_settings +from ..utils.enums import ALL_SOURCE_TYPES, SourceType from ..utils.shared import DEBUG +from ..utils.string_processing import clean_song_title +from ..utils.support_classes.download_result import DownloadResult +from .abstract import Page if DEBUG: from ..utils import dump_to_file @@ -258,7 +247,7 @@ class Bandcamp(Page): artist.source_collection.append(source) return artist - def _parse_track_element(self, track: dict, artwork: Artwork) -> Optional[Song]: + def _parse_track_element(self, track: dict, artwork: ArtworkCollection) -> Optional[Song]: lyrics_list: List[Lyrics] = [] _lyrics: Optional[str] = track.get("item", {}).get("recordingOf", {}).get("lyrics", {}).get("text") @@ -308,7 +297,7 @@ class Bandcamp(Page): )] ) - artwork: Artwork = Artwork() + artwork: ArtworkCollection = ArtworkCollection() def _get_artwork_url(_data: dict) -> Optional[str]: if "image" in _data: diff --git a/music_kraken/pages/genius.py b/music_kraken/pages/genius.py index 5afa556..df7bafc 100644 --- a/music_kraken/pages/genius.py +++ b/music_kraken/pages/genius.py @@ -1,33 +1,22 @@ -from typing import List, Optional, Type -from urllib.parse import urlparse, urlunparse, urlencode import json from enum import Enum -from bs4 import BeautifulSoup -import pycountry +from typing import List, Optional, Type +from urllib.parse import urlencode, urlparse, urlunparse + +import pycountry +from bs4 import BeautifulSoup -from ..objects import Source, DatabaseObject -from .abstract import Page -from ..objects import ( - Artist, - Source, - SourceType, - Song, - Album, - Label, - Target, - Contact, - ID3Timestamp, - Lyrics, - FormattedText, - Artwork, -) from ..connection import Connection +from ..objects import (Album, Artist, ArtworkCollection, Contact, + DatabaseObject, FormattedText, ID3Timestamp, Label, + Lyrics, Song, Source, SourceType, Target) from ..utils import dump_to_file, traverse_json_path -from ..utils.enums import SourceType, ALL_SOURCE_TYPES -from ..utils.support_classes.download_result import DownloadResult -from ..utils.string_processing import clean_song_title -from ..utils.config import main_settings, logging_settings +from ..utils.config import logging_settings, main_settings +from ..utils.enums import ALL_SOURCE_TYPES, SourceType from ..utils.shared import DEBUG +from ..utils.string_processing import clean_song_title +from ..utils.support_classes.download_result import DownloadResult +from .abstract import Page if DEBUG: from ..utils import dump_to_file @@ -56,7 +45,7 @@ class Genius(Page): return Song - def add_to_artwork(self, artwork: Artwork, url: str): + def add_to_artwork(self, artwork: ArtworkCollection, url: str): if url is None: return @@ -83,7 +72,7 @@ class Genius(Page): return None object_type = data.get("_type") - artwork = Artwork() + artwork = ArtworkCollection() self.add_to_artwork(artwork, data.get("header_image_url")) self.add_to_artwork(artwork, data.get("image_url")) diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index c8e51d1..d636bce 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -1,34 +1,24 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum -from typing import List, Optional, Type, Union, Generator, Dict, Any +from typing import Any, Dict, Generator, List, Optional, Type, Union from urllib.parse import urlparse import pycountry from bs4 import BeautifulSoup from ..connection import Connection -from .abstract import Page -from ..utils.enums import SourceType, ALL_SOURCE_TYPES -from ..utils.enums.album import AlbumType, AlbumStatus -from ..objects import ( - Artist, - Source, - Song, - Album, - ID3Timestamp, - FormattedText, - Label, - Target, - DatabaseObject, - Lyrics, - Artwork -) +from ..objects import (Album, Artist, ArtworkCollection, DatabaseObject, + FormattedText, ID3Timestamp, Label, Lyrics, Song, + Source, Target) +from ..utils import shared, string_processing from ..utils.config import logging_settings, main_settings -from ..utils import string_processing, shared +from ..utils.enums import ALL_SOURCE_TYPES, SourceType +from ..utils.enums.album import AlbumStatus, AlbumType from ..utils.string_processing import clean_song_title -from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult +from ..utils.support_classes.query import Query +from .abstract import Page """ https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent @@ -486,7 +476,7 @@ class Musify(Page): track_name = list_points[4].text.strip() # album artwork - artwork: Artwork = Artwork() + artwork: ArtworkCollection = ArtworkCollection() album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"}) for album_image_element in album_image_element_list: artwork.append(url=album_image_element.get("data-src", album_image_element.get("src"))) @@ -755,7 +745,7 @@ class Musify(Page): self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}") # album artwork - album_artwork: Artwork = Artwork() + album_artwork: ArtworkCollection = ArtworkCollection() album_artwork_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) for album_artwork in album_artwork_list: album_artwork.append(url=album_artwork.get("data-src", album_artwork.get("src"))) @@ -924,7 +914,7 @@ class Musify(Page): notes.html = note_soup.decode_contents() # get artist profile artwork - main_artist_artwork: Artwork = Artwork() + main_artist_artwork: ArtworkCollection = ArtworkCollection() artist_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) for artist_image_element in artist_image_element_list: main_artist_artwork.append(url=artist_image_element.get("data-src", artist_image_element.get("src"))) diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 08e2207..9780977 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -1,46 +1,33 @@ -from __future__ import unicode_literals, annotations +from __future__ import annotations, unicode_literals -from typing import Dict, List, Optional, Set, Type -from urllib.parse import urlparse, urlunparse, quote, parse_qs, urlencode +import json import logging import random -import json -from dataclasses import dataclass import re -from functools import lru_cache from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from typing import Dict, List, Optional, Set, Type +from urllib.parse import parse_qs, quote, urlencode, urlparse, urlunparse import youtube_dl from youtube_dl.extractor.youtube import YoutubeIE from youtube_dl.utils import DownloadError +from ...connection import Connection +from ...objects import Album, Artist, ArtworkCollection +from ...objects import DatabaseObject as DataObject +from ...objects import (FormattedText, ID3Timestamp, Label, Lyrics, Song, + Source, Target) +from ...utils import dump_to_file, get_current_millis, traverse_json_path +from ...utils.config import logging_settings, main_settings, youtube_settings +from ...utils.enums import ALL_SOURCE_TYPES, SourceType +from ...utils.enums.album import AlbumType from ...utils.exception.config import SettingValueError -from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING from ...utils.string_processing import clean_song_title -from ...utils import get_current_millis, traverse_json_path - -from ...utils import dump_to_file - -from ..abstract import Page -from ...objects import ( - DatabaseObject as DataObject, - Source, - FormattedText, - ID3Timestamp, - Artwork, - Artist, - Song, - Album, - Label, - Target, - Lyrics, -) -from ...connection import Connection -from ...utils.enums import SourceType, ALL_SOURCE_TYPES -from ...utils.enums.album import AlbumType from ...utils.support_classes.download_result import DownloadResult - +from ..abstract import Page from ._list_render import parse_renderer from ._music_object_render import parse_run_element from .super_youtube import SuperYouTube @@ -646,7 +633,7 @@ class YoutubeMusic(SuperYouTube): note=ydl_res.get("descriptions"), album_list=album_list, length=int(ydl_res.get("duration", 0)) * 1000, - artwork=Artwork(*ydl_res.get("thumbnails", [])), + artwork=ArtworkCollection(*ydl_res.get("thumbnails", [])), artist_list=artist_list, source_list=[Source( self.SOURCE_TYPE, From d447b103803b8a95e4833d0b2f31d9c6ad746ab7 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Wed, 5 Jun 2024 13:33:18 +0200 Subject: [PATCH 12/23] feat: youtube music album and artist artwork --- music_kraken/pages/youtube_music/youtube_music.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 08e2207..334bf15 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -438,6 +438,7 @@ class YoutubeMusic(SuperYouTube): data: dict = r.json() header = data.get("header", {}) musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {}) + musicImmersiveHeaderRenderer = header.get("musicImmersiveHeaderRenderer", {}) title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) @@ -450,6 +451,11 @@ class YoutubeMusic(SuperYouTube): renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[ 0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) + # fetch artist artwork + artist_thumbnails = musicImmersiveHeaderRenderer.get("thumbnail", {}).get("musicThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) + for artist_thumbnail in artist_thumbnails: + artist.artwork.append(**artist_thumbnail) + if DEBUG: for i, content in enumerate(renderer_list): dump_to_file(f"{i}-artists-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False) @@ -496,7 +502,12 @@ class YoutubeMusic(SuperYouTube): # album details header = data.get("header", {}) musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {}) - + + # album artwork + album_thumbnails = musicDetailHeaderRenderer.get("thumbnail", {}).get("croppedSquareThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) + for album_thumbnail in album_thumbnails: + album.artwork.append(**album_thumbnail) + title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) From 4e50bb1fba5aeb4b9e631230ecdbd9dbe6b10261 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 6 Jun 2024 17:53:17 +0200 Subject: [PATCH 13/23] draft implemented add_data --- .vscode/settings.json | 1 + music_kraken/audio/metadata.py | 14 +- music_kraken/objects/artwork.py | 130 ++++++++++++++---- music_kraken/objects/song.py | 4 + music_kraken/pages/bandcamp.py | 6 +- music_kraken/pages/genius.py | 8 +- music_kraken/pages/musify.py | 8 +- .../pages/youtube_music/youtube_music.py | 2 +- music_kraken/utils/__init__.py | 47 ++++++- music_kraken/utils/enums/__init__.py | 76 +++++++++- music_kraken/utils/string_processing.py | 20 ++- 11 files changed, 255 insertions(+), 61 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index fbc21fa..48df21d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "APIC", "Bandcamp", "bitrate", + "CALLSTACK", "DEEZER", "dotenv", "encyclopaedia", diff --git a/music_kraken/audio/metadata.py b/music_kraken/audio/metadata.py index bceb775..e68ea24 100644 --- a/music_kraken/audio/metadata.py +++ b/music_kraken/audio/metadata.py @@ -1,21 +1,21 @@ -import mutagen -from mutagen.id3 import ID3, Frame, APIC, USLT +import logging from pathlib import Path from typing import List -import logging + +import mutagen +from mutagen.id3 import APIC, ID3, USLT, Frame from PIL import Image -from ..utils.config import logging_settings, main_settings -from ..objects import Song, Target, Metadata -from ..objects.metadata import Mapping from ..connection import Connection +from ..objects import Metadata, Song, Target +from ..objects.metadata import Mapping +from ..utils.config import logging_settings, main_settings LOGGER = logging_settings["tagging_logger"] artwork_connection: Connection = Connection() - class AudioMetadata: def __init__(self, file_location: str = None) -> None: self._file_location = None diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index eb5fa51..94396fc 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -1,8 +1,12 @@ from __future__ import annotations +from copy import copy +from dataclasses import dataclass, field from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union +from ..utils import create_dataclass_instance, custom_hash from ..utils.config import main_settings +from ..utils.enums import PictureType from ..utils.string_processing import hash_url, unify from .collection import Collection from .metadata import ID3Timestamp @@ -11,37 +15,113 @@ from .metadata import Metadata from .parents import OuterProxy as Base -class ArtworkVariant(TypedDict): +@dataclass +class ArtworkVariant: url: str - width: int - height: int - deviation: float + width: Optional[int] = None + height: Optional[int] = None + image_format: Optional[str] = "" + + def __hash__(self) -> int: + return custom_hash(self.url) + + def __eq__(self, other: ArtworkVariant) -> bool: + return hash(self) == hash(other) + + def __contains__(self, other: str) -> bool: + return custom_hash(other) == hash(self.url) + + +@dataclass +class Artwork: + variants: List[ArtworkVariant] = field(default_factory=list) + + artwork_type: PictureType = PictureType.OTHER + + def search_variant(self, url: str) -> Optional[ArtworkVariant]: + if url is None: + return None + + for variant in self.variants: + if url in variant: + return variant + + return None + + def __contains__(self, other: str) -> bool: + return self.search_variant(other) is not None + + def add_data(self, **kwargs) -> None: + variant = self.search_variant(kwargs.get("url")) + + if variant is None: + variant, kwargs = create_dataclass_instance(ArtworkVariant, **kwargs) + self.variants.append(variant) + + variant.url = url + variant.__dict__.update(kwargs) class ArtworkCollection: - def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[ArtworkCollection] = None, crop_images: bool = True) -> None: - self.crop_images: bool = crop_images + """ + Stores all the images/artworks for one data object. + + There could be duplicates before calling ArtworkCollection.compile() + _this is called before one object is downloaded automatically._ + """ + + artwork_type: PictureType = PictureType.OTHER + + def __init__( + self, + *data: List[Union[Artwork, ArtworkVariant, dict]], + parent_artworks: Set[ArtworkCollection] = None, + crop_images: bool = True + ) -> None: + # this is used for the song artwork, to fall back to the song artwork self.parent_artworks: Set[ArtworkCollection] = parent_artworks or set() + self.crop_images: bool = crop_images - self._variant_mapping: Dict[str, ArtworkVariant] = {} + self._data = [] - for variant in variants: - self.append(**variant) + def search_artwork(self, url: str) -> Optional[ArtworkVariant]: + for artwork in self._data: + if url in artwork: + return artwork - @staticmethod - def _calculate_deviation(*dimensions: List[int]) -> float: - return sum(abs(d - main_settings["preferred_artwork_resolution"]) for d in dimensions) / len(dimensions) + return None + + def __contains__(self, other: str) -> bool: + return self.search_artwork(other) is not None - def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None: - if url is None: + def _create_new_artwork(self, **kwargs) -> Tuple[Artwork, dict]: + kwargs["artwork_type"] = kwargs.get("artwork_type", self.artwork_type) + + return create_dataclass_instance(Artwork, **kwargs) + + def add_data(self, url: str, **kwargs) -> None: + kwargs["url"] = url + + artwork = self.search_artwork(url) + + if artwork is None: + artwork, kwargs = self._create_new_artwork(url=url, **kwargs) + self._data.append(artwork) + + artwork.add_data(url, **kwargs) + + def append(self, value: Union[Artwork, ArtworkVariant, dict], **kwargs): + if isinstance(value, dict): + kwargs.update(value) + value, kwargs = create_dataclass_instance(ArtworkVariant, kwargs) + + if isinstance(value, ArtworkVariant): + kwargs["variants"] = [value] + value, kwargs = create_dataclass_instance(Artwork, kwargs) + + if isinstance(value, Artwork): + self._data.append(value) return - - self._variant_mapping[hash_url(url=url)] = { - "url": url, - "width": width, - "height": height, - "deviation": self._calculate_deviation(width, height), - } @property def flat_empty(self) -> bool: @@ -69,14 +149,6 @@ class ArtworkCollection: def __hash__(self) -> int: return id(self) - def __eq__(self, other: ArtworkCollection) -> bool: - if hash(self) == hash(other): - return True - - if not isinstance(other, ArtworkCollection): - return False - return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) - def __iter__(self) -> Generator[ArtworkVariant, None, None]: yield from self._variant_mapping.values() diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 567fdd5..f39aa96 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -184,6 +184,10 @@ class Song(Base): self.album_collection.extend(object_list) return + def _compile(self): + self.artwork.compile() + + INDEX_DEPENDS_ON = ("title", "isrc", "source_collection") @property diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 0941f5e..fe58fb3 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -231,7 +231,7 @@ class Bandcamp(Page): # artist artwork artist_artwork: BeautifulSoup = soup.find("img", {"class":"band-photo"}) if artist_artwork is not None: - artist.artwork.append(artist_artwork.get("data-src", artist_artwork.get("src"))) + artist.artwork.add_data(artist_artwork.get("data-src", artist_artwork.get("src"))) for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})): data_blob = data_blob_soup["data-blob"] @@ -308,12 +308,12 @@ class Bandcamp(Page): _artwork_url = _get_artwork_url(data) if _artwork_url is not None: - artwork.append(url=_artwork_url, width=350, height=350) + artwork.add_data(url=_artwork_url, width=350, height=350) else: for album_release in data.get("albumRelease", []): _artwork_url = _get_artwork_url(album_release) if _artwork_url is not None: - artwork.append(url=_artwork_url, width=350, height=350) + artwork.add_data(url=_artwork_url, width=350, height=350) break for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): diff --git a/music_kraken/pages/genius.py b/music_kraken/pages/genius.py index df7bafc..3b8f184 100644 --- a/music_kraken/pages/genius.py +++ b/music_kraken/pages/genius.py @@ -51,21 +51,21 @@ class Genius(Page): url_frags = url.split(".") if len(url_frags) < 2: - artwork.append(url=url) + artwork.add_data(url=url) return dimensions = url_frags[-2].split("x") if len(dimensions) < 2: - artwork.append(url=url) + artwork.add_data(url=url) return if len(dimensions) == 3: dimensions = dimensions[:-1] try: - artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1])) + artwork.add_data(url=url, width=int(dimensions[0]), height=int(dimensions[1])) except ValueError: - artwork.append(url=url) + artwork.add_data(url=url) def parse_api_object(self, data: dict) -> Optional[DatabaseObject]: if data is None: diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index d636bce..ed1caed 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -479,7 +479,7 @@ class Musify(Page): artwork: ArtworkCollection = ArtworkCollection() album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"}) for album_image_element in album_image_element_list: - artwork.append(url=album_image_element.get("data-src", album_image_element.get("src"))) + artwork.add_data(url=album_image_element.get("data-src", album_image_element.get("src"))) # lyrics lyrics_container: List[BeautifulSoup] = soup.find_all("div", {"id": "tabLyrics"}) @@ -748,7 +748,7 @@ class Musify(Page): album_artwork: ArtworkCollection = ArtworkCollection() album_artwork_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) for album_artwork in album_artwork_list: - album_artwork.append(url=album_artwork.get("data-src", album_artwork.get("src"))) + album_artwork.add_data(url=album_artwork.get("data-src", album_artwork.get("src"))) return Album( title=name, @@ -917,7 +917,7 @@ class Musify(Page): main_artist_artwork: ArtworkCollection = ArtworkCollection() artist_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class":"artist-img"}) for artist_image_element in artist_image_element_list: - main_artist_artwork.append(url=artist_image_element.get("data-src", artist_image_element.get("src"))) + main_artist_artwork.add_data(url=artist_image_element.get("data-src", artist_image_element.get("src"))) return Artist( name=name, @@ -1069,7 +1069,7 @@ class Musify(Page): gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent") gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img") for gallery_image_element in gallery_image_element_list: - artist.artwork.append(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) + artist.artwork.add_data(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) def fetch_artist(self, source: Source, **kwargs) -> Artist: diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 9780977..22b7c7c 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -672,7 +672,7 @@ class YoutubeMusic(SuperYouTube): for album in song.album_list: album.album_type = AlbumType.LIVE_ALBUM for thumbnail in video_details.get("thumbnails", []): - song.artwork.append(**thumbnail) + song.artwork.add_data(**thumbnail) song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId"))) diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index a8d658b..a0be99c 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -1,15 +1,18 @@ -from datetime import datetime -from pathlib import Path +import inspect import json import logging -import inspect -from typing import List, Union +from datetime import datetime +from functools import lru_cache +from pathlib import Path +from typing import Any, List, Union -from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK from .config import config, read_config, write_config from .enums.colors import BColors -from .path_manager import LOCATIONS from .hacking import merge_args +from .path_manager import LOCATIONS +from .shared import (DEBUG, DEBUG_DUMP, DEBUG_LOGGING, DEBUG_OBJECT_TRACE, + DEBUG_OBJECT_TRACE_CALLSTACK, DEBUG_TRACE, URL_PATTERN) +from .string_processing import hash_url, is_url, unify """ IO functions @@ -125,4 +128,34 @@ def get_current_millis() -> int: def get_unix_time() -> int: - return int(datetime.now().timestamp()) \ No newline at end of file + return int(datetime.now().timestamp()) + + +@lru_cache +def custom_hash(value: Any) -> int: + if is_url(value): + value = hash_url(value) + elif isinstance(value, str): + try: + value = int(value) + except ValueError: + value = unify(value) + + return hash(value) + + +def create_dataclass_instance(t, data: dict): + """Creates an instance of a dataclass with the given data. + It filters out all data key, which has no attribute in the dataclass. + + Args: + t (Type): The dataclass type class + data (dict): the attribute to pass into the constructor + + Returns: + Tuple[Type, dict]: The created instance and a dict, containing the data, which was not used in the creation + """ + + data = {k: v for k, v in data.items() if hasattr(t, k)} + removed_data = {k: v for k, v in data.items() if not hasattr(t, k)} + return t(**data), removed_data diff --git a/music_kraken/utils/enums/__init__.py b/music_kraken/utils/enums/__init__.py index 28f0b9f..67e4586 100644 --- a/music_kraken/utils/enums/__init__.py +++ b/music_kraken/utils/enums/__init__.py @@ -1,7 +1,11 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Optional, TYPE_CHECKING, Type +from enum import Enum +from typing import TYPE_CHECKING, Optional, Type + +from mutagen.id3 import PictureType + if TYPE_CHECKING: from ...pages.abstract import Page @@ -52,3 +56,73 @@ class ALL_SOURCE_TYPES: MANUAL = SourceType(name="manual") PRESET = SourceType(name="preset") + + +class PictureType(Enum): + """Enumeration of image types defined by the ID3 standard for the APIC + frame, but also reused in WMA/FLAC/VorbisComment. + + This is copied from mutagen.id3.PictureType + """ + + OTHER = 0 + + FILE_ICON = 1 + """32x32 pixels 'file icon' (PNG only)""" + + OTHER_FILE_ICON = 2 + """Other file icon""" + + COVER_FRONT = 3 + """Cover (front)""" + + COVER_BACK = 4 + """Cover (back)""" + + LEAFLET_PAGE = 5 + """Leaflet page""" + + MEDIA = 6 + """Media (e.g. label side of CD)""" + + LEAD_ARTIST = 7 + """Lead artist/lead performer/soloist""" + + ARTIST = 8 + """Artist/performer""" + + CONDUCTOR = 9 + """Conductor""" + + BAND = 10 + """Band/Orchestra""" + + COMPOSER = 11 + """Composer""" + + LYRICIST = 12 + """Lyricist/text writer""" + + RECORDING_LOCATION = 13 + """Recording Location""" + + DURING_RECORDING = 14 + """During recording""" + + DURING_PERFORMANCE = 15 + """During performance""" + + SCREEN_CAPTURE = 16 + """Movie/video screen capture""" + + FISH = 17 + """A bright colored fish""" + + ILLUSTRATION = 18 + """Illustration""" + + BAND_LOGOTYPE = 19 + """Band/artist logotype""" + + PUBLISHER_LOGOTYPE = 20 + """Publisher/Studio logotype""" diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index b76e3fc..b53d245 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -1,13 +1,14 @@ -from typing import Tuple, Union, Optional -from pathlib import Path import string from functools import lru_cache +from pathlib import Path +from typing import Any, Optional, Tuple, Union +from urllib.parse import ParseResult, parse_qs, urlparse -from transliterate.exceptions import LanguageDetectionError -from transliterate import translit from pathvalidate import sanitize_filename -from urllib.parse import urlparse, ParseResult, parse_qs +from transliterate import translit +from transliterate.exceptions import LanguageDetectionError +from .shared import URL_PATTERN COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = ( "(official video)", @@ -229,3 +230,12 @@ def shorten_display_url(url: str, max_length: int = 150, chars_at_end: int = 4, return url return url[:max_length] + shorten_string + url[-chars_at_end:] + +def is_url(value: Any) -> bool: + if isinstance(value, ParseResult): + return True + + if not isinstance(value, str): + return True + + return re.match(URL_PATTERN, query) is not None From eef3ea7f079766990efff3c0385fcfdeae8647a7 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Fri, 7 Jun 2024 11:15:23 +0200 Subject: [PATCH 14/23] feat: removed distracting code --- music_kraken/objects/artwork.py | 40 ++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 94396fc..a63129d 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -41,7 +41,7 @@ class Artwork: def search_variant(self, url: str) -> Optional[ArtworkVariant]: if url is None: return None - + for variant in self.variants: if url in variant: return variant @@ -61,6 +61,12 @@ class Artwork: variant.url = url variant.__dict__.update(kwargs) + @property + def url(self) -> Optional[str]: + if len(self.variants) <= 0: + return None + return self.variants[0].url + class ArtworkCollection: """ @@ -123,6 +129,23 @@ class ArtworkCollection: self._data.append(value) return + def __merge__(self, other: ArtworkCollection, **kwargs) -> None: + self.parent_artworks.update(other.parent_artworks) + + for key, value in other._variant_mapping.items(): + if key not in self._variant_mapping: + self._variant_mapping[key] = value + + def __hash__(self) -> int: + return id(self) + + def __iter__(self) -> Generator[Artwork, None, None]: + yield from self._data + + def get_urls(self) -> Generator[str, None, None]: + yield from (artwork.url for artwork in self._data if artwork.url is not None) + + """ @property def flat_empty(self) -> bool: return len(self._variant_mapping.keys()) <= 0 @@ -138,18 +161,5 @@ class ArtworkCollection: def get_variant_name(self, variant: ArtworkVariant) -> str: return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" - - def __merge__(self, other: ArtworkCollection, **kwargs) -> None: - self.parent_artworks.update(other.parent_artworks) - - for key, value in other._variant_mapping.items(): - if key not in self._variant_mapping: - self._variant_mapping[key] = value - - def __hash__(self) -> int: - return id(self) - - def __iter__(self) -> Generator[ArtworkVariant, None, None]: - yield from self._variant_mapping.values() - + """ \ No newline at end of file From 346d273201deb62ccb9bb2aeb8d45198065295a6 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Fri, 7 Jun 2024 11:17:47 +0200 Subject: [PATCH 15/23] feat: added extend --- music_kraken/objects/artwork.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index a63129d..f98a106 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -89,6 +89,7 @@ class ArtworkCollection: self.crop_images: bool = crop_images self._data = [] + self.extend(data) def search_artwork(self, url: str) -> Optional[ArtworkVariant]: for artwork in self._data: @@ -128,6 +129,10 @@ class ArtworkCollection: if isinstance(value, Artwork): self._data.append(value) return + + def extend(self, values: List[Union[Artwork, ArtworkVariant, dict]], **kwargs): + for value in values: + self.append(value, **kwargs) def __merge__(self, other: ArtworkCollection, **kwargs) -> None: self.parent_artworks.update(other.parent_artworks) From 2da7a48b726982e7e0c1bae9a5fa593531b49f6d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Fri, 7 Jun 2024 11:27:55 +0200 Subject: [PATCH 16/23] feat: added compile --- music_kraken/objects/artwork.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index f98a106..dc9a91c 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -134,6 +134,9 @@ class ArtworkCollection: for value in values: self.append(value, **kwargs) + def compile(self) -> None: + pass + def __merge__(self, other: ArtworkCollection, **kwargs) -> None: self.parent_artworks.update(other.parent_artworks) From 4ee6fd213703573e49f6f17d4e652fc83e46bdee Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Mon, 10 Jun 2024 12:23:12 +0200 Subject: [PATCH 17/23] feat:a lot of nonsences --- music_kraken/download/page_attributes.py | 7 ++++++- music_kraken/objects/artwork.py | 20 ++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 320192f..03f8de9 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -198,7 +198,12 @@ class Pages: img.save(target.file_path, main_settings["image_format"]) - + def remove_artwork_duplicates(self) -> None: + """ + This will eliminate duplicates within the given threshold + """ + + pass def _fetch_artist_artwork(self, artist: Artist, naming: dict): naming: Dict[str, List[str]] = defaultdict(list, naming) diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index dc9a91c..1423c3c 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -80,9 +80,9 @@ class ArtworkCollection: def __init__( self, - *data: List[Union[Artwork, ArtworkVariant, dict]], + *data: List[Artwork], parent_artworks: Set[ArtworkCollection] = None, - crop_images: bool = True + crop_images: bool = True, ) -> None: # this is used for the song artwork, to fall back to the song artwork self.parent_artworks: Set[ArtworkCollection] = parent_artworks or set() @@ -91,6 +91,8 @@ class ArtworkCollection: self._data = [] self.extend(data) + + def search_artwork(self, url: str) -> Optional[ArtworkVariant]: for artwork in self._data: if url in artwork: @@ -104,7 +106,7 @@ class ArtworkCollection: def _create_new_artwork(self, **kwargs) -> Tuple[Artwork, dict]: kwargs["artwork_type"] = kwargs.get("artwork_type", self.artwork_type) - return create_dataclass_instance(Artwork, **kwargs) + return create_dataclass_instance(ArtworkVariant, dict(**kwargs)) def add_data(self, url: str, **kwargs) -> None: kwargs["url"] = url @@ -112,12 +114,16 @@ class ArtworkCollection: artwork = self.search_artwork(url) if artwork is None: - artwork, kwargs = self._create_new_artwork(url=url, **kwargs) + artwork, kwargs = self._create_new_artwork(url=url) self._data.append(artwork) artwork.add_data(url, **kwargs) def append(self, value: Union[Artwork, ArtworkVariant, dict], **kwargs): + """ + You can append the types Artwork, ArtworkVariant or dict + the best option would be to use Artwork and avoid the other options. + """ if isinstance(value, dict): kwargs.update(value) value, kwargs = create_dataclass_instance(ArtworkVariant, kwargs) @@ -135,6 +141,12 @@ class ArtworkCollection: self.append(value, **kwargs) def compile(self) -> None: + """ + This will make the artworks ready for download + """ + for artwork in self._data: + for variants in artwork.variants: + pass pass def __merge__(self, other: ArtworkCollection, **kwargs) -> None: From b1a306f3f36f680f43ce10e7381ec2b98c5cf612 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 11 Jun 2024 14:34:58 +0200 Subject: [PATCH 18/23] fix: implemented artwork.add_data --- development/actual_donwload.py | 8 +++----- music_kraken/objects/artwork.py | 25 ++++++++++++++++--------- music_kraken/utils/__init__.py | 7 ++++--- music_kraken/utils/string_processing.py | 6 ++++-- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index ad8f1d0..be10369 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -1,15 +1,13 @@ +import logging + import music_kraken -import logging print("Setting logging-level to DEBUG") logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Crystal F", - "10", - "1", - "3", + "s: #a Ghost Bath", ] diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 1423c3c..40c6115 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -20,7 +20,7 @@ class ArtworkVariant: url: str width: Optional[int] = None height: Optional[int] = None - image_format: Optional[str] = "" + image_format: Optional[str] = None def __hash__(self) -> int: return custom_hash(self.url) @@ -31,6 +31,13 @@ class ArtworkVariant: def __contains__(self, other: str) -> bool: return custom_hash(other) == hash(self.url) + def __merge__(self, other: ArtworkVariant) -> None: + for key, value in other.__dict__.items(): + if value is None: + continue + + if getattr(self, key) is None: + setattr(self, key, value) @dataclass class Artwork: @@ -55,10 +62,9 @@ class Artwork: variant = self.search_variant(kwargs.get("url")) if variant is None: - variant, kwargs = create_dataclass_instance(ArtworkVariant, **kwargs) + variant, kwargs = create_dataclass_instance(ArtworkVariant, kwargs) self.variants.append(variant) - variant.url = url variant.__dict__.update(kwargs) @property @@ -91,8 +97,6 @@ class ArtworkCollection: self._data = [] self.extend(data) - - def search_artwork(self, url: str) -> Optional[ArtworkVariant]: for artwork in self._data: if url in artwork: @@ -106,18 +110,19 @@ class ArtworkCollection: def _create_new_artwork(self, **kwargs) -> Tuple[Artwork, dict]: kwargs["artwork_type"] = kwargs.get("artwork_type", self.artwork_type) - return create_dataclass_instance(ArtworkVariant, dict(**kwargs)) + return create_dataclass_instance(Artwork, dict(**kwargs)) - def add_data(self, url: str, **kwargs) -> None: + def add_data(self, url: str, **kwargs) -> Artwork: kwargs["url"] = url artwork = self.search_artwork(url) if artwork is None: - artwork, kwargs = self._create_new_artwork(url=url) + artwork, kwargs = self._create_new_artwork(**kwargs) self._data.append(artwork) - artwork.add_data(url, **kwargs) + artwork.add_data(**kwargs) + return artwork def append(self, value: Union[Artwork, ArtworkVariant, dict], **kwargs): """ @@ -144,6 +149,8 @@ class ArtworkCollection: """ This will make the artworks ready for download """ + from ..connection import Connection + for artwork in self._data: for variants in artwork.variants: pass diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index a0be99c..95013e5 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -156,6 +156,7 @@ def create_dataclass_instance(t, data: dict): Tuple[Type, dict]: The created instance and a dict, containing the data, which was not used in the creation """ - data = {k: v for k, v in data.items() if hasattr(t, k)} - removed_data = {k: v for k, v in data.items() if not hasattr(t, k)} - return t(**data), removed_data + needed_data = {k: v for k, v in data.items() if k in t.__dataclass_fields__} + removed_data = {k: v for k, v in data.items() if k not in t.__dataclass_fields__} + + return t(**needed_data), removed_data diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index b53d245..d001c1e 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -1,3 +1,4 @@ +import re import string from functools import lru_cache from pathlib import Path @@ -237,5 +238,6 @@ def is_url(value: Any) -> bool: if not isinstance(value, str): return True - - return re.match(URL_PATTERN, query) is not None + + # value has to be a string + return re.match(URL_PATTERN, value) is not None From 274f1bce9030f62e31c9b3a208eba7ff324ce14c Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 11 Jun 2024 14:54:36 +0200 Subject: [PATCH 19/23] feat: implemented fetching of artworks on compile --- music_kraken/audio/metadata.py | 2 +- music_kraken/connection/connection.py | 17 +++++++++------- music_kraken/objects/artwork.py | 28 ++++++++++++++++++++++----- music_kraken/objects/target.py | 18 ++++++++++++----- 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/music_kraken/audio/metadata.py b/music_kraken/audio/metadata.py index e68ea24..5da3616 100644 --- a/music_kraken/audio/metadata.py +++ b/music_kraken/audio/metadata.py @@ -107,7 +107,7 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): mime="image/jpeg", type=3, desc=u"Cover", - data=converted_target.read_bytes(), + data=converted_target.raw_content, ) ) id3_object.frames.delall("USLT") diff --git a/music_kraken/connection/connection.py b/music_kraken/connection/connection.py index 31e4b8a..7b68f2d 100644 --- a/music_kraken/connection/connection.py +++ b/music_kraken/connection/connection.py @@ -1,12 +1,12 @@ from __future__ import annotations +import copy +import inspect import logging import threading import time -from typing import List, Dict, Optional, Set -from urllib.parse import urlparse, urlunsplit, ParseResult -import copy -import inspect +from typing import TYPE_CHECKING, Dict, List, Optional, Set +from urllib.parse import ParseResult, urlparse, urlunsplit import requests import responses @@ -14,12 +14,15 @@ from tqdm import tqdm from .cache import Cache from .rotating import RotatingProxy -from ..objects import Target + +if TYPE_CHECKING: + from ..objects import Target + from ..utils import request_trace -from ..utils.string_processing import shorten_display_url from ..utils.config import main_settings -from ..utils.support_classes.download_result import DownloadResult from ..utils.hacking import merge_args +from ..utils.string_processing import shorten_display_url +from ..utils.support_classes.download_result import DownloadResult class Connection: diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 40c6115..29b67c2 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -2,8 +2,10 @@ from __future__ import annotations from copy import copy from dataclasses import dataclass, field +from functools import cached_property from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union +from ..connection import Connection from ..utils import create_dataclass_instance, custom_hash from ..utils.config import main_settings from ..utils.enums import PictureType @@ -13,6 +15,9 @@ from .metadata import ID3Timestamp from .metadata import Mapping as id3Mapping from .metadata import Metadata from .parents import OuterProxy as Base +from .target import Target + +artwork_connection: Connection = Connection(module="artwork") @dataclass @@ -39,6 +44,19 @@ class ArtworkVariant: if getattr(self, key) is None: setattr(self, key, value) + @cached_property + def target(self) -> Target: + return Target.temp() + + def fetch(self) -> None: + global artwork_connection + + r = artwork_connection.get(self.url, name=hash_url(url)) + if r is None: + return + + self.target.raw_content = r.content + @dataclass class Artwork: variants: List[ArtworkVariant] = field(default_factory=list) @@ -73,6 +91,10 @@ class Artwork: return None return self.variants[0].url + def fetch(self) -> None: + for variant in self.variants: + variant.fetch() + class ArtworkCollection: """ @@ -149,12 +171,8 @@ class ArtworkCollection: """ This will make the artworks ready for download """ - from ..connection import Connection - for artwork in self._data: - for variants in artwork.variants: - pass - pass + artwork.fetch() def __merge__(self, other: ArtworkCollection, **kwargs) -> None: self.parent_artworks.update(other.parent_artworks) diff --git a/music_kraken/objects/target.py b/music_kraken/objects/target.py index 2491a5a..ced2ddc 100644 --- a/music_kraken/objects/target.py +++ b/music_kraken/objects/target.py @@ -1,17 +1,17 @@ from __future__ import annotations -from pathlib import Path -from typing import List, Tuple, TextIO, Union, Optional import logging import random +from pathlib import Path +from typing import List, Optional, TextIO, Tuple, Union + import requests from tqdm import tqdm -from .parents import OuterProxy +from ..utils.config import logging_settings, main_settings from ..utils.shared import HIGHEST_ID -from ..utils.config import main_settings, logging_settings from ..utils.string_processing import fit_to_file_system - +from .parents import OuterProxy LOGGER = logging.getLogger("target") @@ -117,3 +117,11 @@ class Target(OuterProxy): def read_bytes(self) -> bytes: return self.file_path.read_bytes() + + @property + def raw_content(self) -> bytes: + return self.file_path.read_bytes() + + @raw_content.setter + def raw_content(self, content: bytes): + self.file_path.write_bytes(content) From dd99e60afddbbe91372c53565a7e4679ac838914 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 11 Jun 2024 14:58:04 +0200 Subject: [PATCH 20/23] fix: circular input --- .../utils/support_classes/download_result.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/music_kraken/utils/support_classes/download_result.py b/music_kraken/utils/support_classes/download_result.py index 5458a34..67cb1c1 100644 --- a/music_kraken/utils/support_classes/download_result.py +++ b/music_kraken/utils/support_classes/download_result.py @@ -1,9 +1,13 @@ -from dataclasses import dataclass, field -from typing import List, Tuple +from __future__ import annotations -from ...utils.config import main_settings, logging_settings +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, List, Tuple + +if TYPE_CHECKING: + from ...objects import Target + +from ...utils.config import logging_settings, main_settings from ...utils.enums.colors import BColors -from ...objects import Target UNIT_PREFIXES: List[str] = ["", "k", "m", "g", "t"] UNIT_DIVISOR = 1024 From 17c28722fb63fb4b65ee686f2f540a780b3f6d69 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Mon, 17 Jun 2024 14:50:17 +0200 Subject: [PATCH 21/23] feat: musify ArtworkCollection simple function --- music_kraken/audio/metadata.py | 11 +++--- music_kraken/download/page_attributes.py | 45 ++------------------- music_kraken/objects/artwork.py | 50 ++++++++++++++++++++---- music_kraken/objects/song.py | 2 +- 4 files changed, 52 insertions(+), 56 deletions(-) diff --git a/music_kraken/audio/metadata.py b/music_kraken/audio/metadata.py index 5da3616..5b9f7ac 100644 --- a/music_kraken/audio/metadata.py +++ b/music_kraken/audio/metadata.py @@ -67,13 +67,14 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): id3_object = AudioMetadata(file_location=target.file_path) LOGGER.info(str(metadata)) - - if song.artwork.best_variant is not None: - best_variant = song.artwork.best_variant + ## REWRITE COMPLETLY !!!!!!!!!!!! + if len(song.artwork._data) != 0: + variants = song.artwork._data.__getitem__(0) + best_variant = variants.variants.__getitem__(0) r = artwork_connection.get( - url=best_variant["url"], - name=song.artwork.get_variant_name(best_variant), + url=best_variant.url, + name=best_variant.url, ) temp_target: Target = Target.temp() diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 03f8de9..5582bc9 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -36,8 +36,6 @@ from ..connection import Connection from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS -artwork_connection: Connection = Connection() - ALL_PAGES: Set[Type[Page]] = { # EncyclopaediaMetallum, Genius, @@ -167,43 +165,6 @@ class Pages: return False - def download_artwork_variant_to_target(self, artwork_variant: ArtworkVariant, target: Target): - - r = artwork_connection.get( - url=artwork_variant["url"], - name=artwork_variant["url"], - ) - - - temp_target: Target = Target.temp() - with temp_target.open("wb") as f: - f.write(r.content) - - converted_target: Target = Target.temp(file_extension=main_settings["image_format"]) - with Image.open(temp_target.file_path) as img: - # crop the image if it isn't square in the middle with minimum data loss - width, height = img.size - if width != height: - if width > height: - img = img.crop((width // 2 - height // 2, 0, width // 2 + height // 2, height)) - else: - img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2)) - - # resize the image to the preferred resolution - img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) - - # https://stackoverflow.com/a/59476938/16804841 - if img.mode != 'RGB': - img = img.convert('RGB') - - img.save(target.file_path, main_settings["image_format"]) - - def remove_artwork_duplicates(self) -> None: - """ - This will eliminate duplicates within the given threshold - """ - - pass def _fetch_artist_artwork(self, artist: Artist, naming: dict): naming: Dict[str, List[str]] = defaultdict(list, naming) @@ -214,17 +175,17 @@ class Pages: # https://stackoverflow.com/a/17016257 naming[key] = list(dict.fromkeys(value)) - artwork: Artwork = artist.artwork + artwork: ArtworkCollection = artist.artwork for image_number, variant in enumerate(artwork): naming["image_number"] = [str(image_number)] - url: str = variant["url"] + url: str = variant.url target = Target( relative_to_music_dir=True, file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming)) ) - self.download_artwork_variant_to_target(variant, target) + artwork.compile(target) def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: # fetch the given object diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 29b67c2..2417405 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -16,6 +16,7 @@ from .metadata import Mapping as id3Mapping from .metadata import Metadata from .parents import OuterProxy as Base from .target import Target +from PIL import Image artwork_connection: Connection = Connection(module="artwork") @@ -24,7 +25,7 @@ artwork_connection: Connection = Connection(module="artwork") class ArtworkVariant: url: str width: Optional[int] = None - height: Optional[int] = None + heigth: Optional[int] = None image_format: Optional[str] = None def __hash__(self) -> int: @@ -51,7 +52,7 @@ class ArtworkVariant: def fetch(self) -> None: global artwork_connection - r = artwork_connection.get(self.url, name=hash_url(url)) + r = artwork_connection.get(self.url, name=hash_url(self.url)) if r is None: return @@ -167,19 +168,52 @@ class ArtworkCollection: for value in values: self.append(value, **kwargs) - def compile(self) -> None: + def compile(self, target: Target, **kwargs) -> None: """ This will make the artworks ready for download """ for artwork in self._data: - artwork.fetch() + for artwork_variant in artwork.variants: + r = artwork_connection.get( + url=artwork_variant.url, + name=artwork_variant.url, + ) + + temp_target: Target = Target.temp() + with temp_target.open("wb") as f: + f.write(r.content) + + converted_target: Target = Target.temp(file_extension=main_settings["image_format"]) + with Image.open(temp_target.file_path) as img: + # crop the image if it isn't square in the middle with minimum data loss + width, height = img.size + if width != height: + if width > height: + img = img.crop((width // 2 - height // 2, 0, width // 2 + height // 2, height)) + else: + img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2)) + + # resize the image to the preferred resolution + img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) + + # https://stackoverflow.com/a/59476938/16804841 + if img.mode != 'RGB': + img = img.convert('RGB') + if target is not None: + img.save(target.file_path, main_settings["image_format"]) + + def __merge__(self, other: ArtworkCollection, **kwargs) -> None: self.parent_artworks.update(other.parent_artworks) - - for key, value in other._variant_mapping.items(): - if key not in self._variant_mapping: - self._variant_mapping[key] = value + for other_artwork in other._data: + for other_variant in other_artwork.variants: + if len(self._data) != 0: + for artwork in self._data: + for variant in artwork.variants: + variant.__merge__(other_variant) + else: + self.add_data(other_variant.url) def __hash__(self) -> int: return id(self) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index f39aa96..d66bb12 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -185,7 +185,7 @@ class Song(Base): return def _compile(self): - self.artwork.compile() + self.artwork.compile(self.target_collection.get(0)) INDEX_DEPENDS_ON = ("title", "isrc", "source_collection") From 93c9a367a257647fb0bf2758dea3f20c5edafeab Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Mon, 1 Jul 2024 14:59:51 +0200 Subject: [PATCH 22/23] feat: image hash implemented --- music_kraken/download/page_attributes.py | 23 +++--- music_kraken/objects/artwork.py | 74 +++++++++---------- music_kraken/objects/target.py | 3 +- music_kraken/pages/musify.py | 5 +- .../pages/youtube_music/youtube_music.py | 4 +- 5 files changed, 52 insertions(+), 57 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 5582bc9..248ce6c 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -175,17 +175,18 @@ class Pages: # https://stackoverflow.com/a/17016257 naming[key] = list(dict.fromkeys(value)) - artwork: ArtworkCollection = artist.artwork - for image_number, variant in enumerate(artwork): - naming["image_number"] = [str(image_number)] - - url: str = variant.url - - target = Target( - relative_to_music_dir=True, - file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming)) - ) - artwork.compile(target) + artwork_collection: ArtworkCollection = artist.artwork + artwork_collection.compile() + for image_number, artwork in enumerate(artwork_collection): + for artwork_variant in artwork.variants: + naming["image_number"] = [str(image_number)] + target = Target( + relative_to_music_dir=True, + file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming)) + ) + with Image.open(artwork_variant.target.file_path) as img: + img.save(target.file_path, main_settings["image_format"]) + artwork_variant.target = Target def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: # fetch the given object diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 2417405..fcd69cf 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -18,6 +18,8 @@ from .parents import OuterProxy as Base from .target import Target from PIL import Image +import imagehash + artwork_connection: Connection = Connection(module="artwork") @@ -168,24 +170,37 @@ class ArtworkCollection: for value in values: self.append(value, **kwargs) - def compile(self, target: Target, **kwargs) -> None: + def compile(self, **kwargs) -> None: """ - This will make the artworks ready for download + This will make the artworks ready for download and delete duplicates. """ + artwork_hashes: list = list() for artwork in self._data: + index = 0 for artwork_variant in artwork.variants: r = artwork_connection.get( url=artwork_variant.url, name=artwork_variant.url, ) - - temp_target: Target = Target.temp() - with temp_target.open("wb") as f: + target: Target = artwork_variant.target + with target.open("wb") as f: f.write(r.content) - converted_target: Target = Target.temp(file_extension=main_settings["image_format"]) - with Image.open(temp_target.file_path) as img: - # crop the image if it isn't square in the middle with minimum data loss + with Image.open(target.file_path) as img: + # https://stackoverflow.com/a/59476938/16804841 + if img.mode != 'RGB': + img = img.convert('RGB') + + try: + image_hash = imagehash.crop_resistant_hash(img) + except Exception as e: + continue + + if image_hash in artwork_hashes: + artwork.variants.pop(index) + target.delete() + continue + artwork_hashes.append(image_hash) width, height = img.size if width != height: if width > height: @@ -193,27 +208,20 @@ class ArtworkCollection: else: img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2)) - # resize the image to the preferred resolution - img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) - - # https://stackoverflow.com/a/59476938/16804841 - if img.mode != 'RGB': - img = img.convert('RGB') - if target is not None: - img.save(target.file_path, main_settings["image_format"]) - - + # resize the image to the preferred resolution + img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) + index =+ 1 + + def __merge__(self, other: ArtworkCollection, **kwargs) -> None: self.parent_artworks.update(other.parent_artworks) for other_artwork in other._data: for other_variant in other_artwork.variants: - if len(self._data) != 0: - for artwork in self._data: - for variant in artwork.variants: - variant.__merge__(other_variant) - else: - self.add_data(other_variant.url) + if self.__contains__(other_variant.url): + continue + self.append(ArtworkVariant(other_variant.url)) + def __hash__(self) -> int: return id(self) @@ -224,21 +232,5 @@ class ArtworkCollection: def get_urls(self) -> Generator[str, None, None]: yield from (artwork.url for artwork in self._data if artwork.url is not None) - """ - @property - def flat_empty(self) -> bool: - return len(self._variant_mapping.keys()) <= 0 - - def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]: - return min(artwork_variants, key=lambda x: x["deviation"]) - - @property - def best_variant(self) -> ArtworkVariant: - if self.flat_empty: - return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks]) - return self._get_best_from_list(self._variant_mapping.values()) - - def get_variant_name(self, variant: ArtworkVariant) -> str: - return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" - """ + \ No newline at end of file diff --git a/music_kraken/objects/target.py b/music_kraken/objects/target.py index ced2ddc..28bdb26 100644 --- a/music_kraken/objects/target.py +++ b/music_kraken/objects/target.py @@ -31,7 +31,8 @@ class Target(OuterProxy): } @classmethod - def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P: + def temp(cls, name: str = None, file_extension: Optional[str] = None) -> P: + name = name or str(random.randint(0, HIGHEST_ID)) if file_extension is not None: name = f"{name}.{file_extension}" diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index ed1caed..bfb19ff 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -8,9 +8,10 @@ import pycountry from bs4 import BeautifulSoup from ..connection import Connection -from ..objects import (Album, Artist, ArtworkCollection, DatabaseObject, +from ..objects import (Album, Artist, DatabaseObject, FormattedText, ID3Timestamp, Label, Lyrics, Song, Source, Target) +from ..objects.artwork import (Artwork, ArtworkVariant, ArtworkCollection) from ..utils import shared, string_processing from ..utils.config import logging_settings, main_settings from ..utils.enums import ALL_SOURCE_TYPES, SourceType @@ -1069,7 +1070,7 @@ class Musify(Page): gallery_body_content: BeautifulSoup = artwork_gallery.find(id="bodyContent") gallery_image_element_list: List[BeautifulSoup] = gallery_body_content.find_all("img") for gallery_image_element in gallery_image_element_list: - artist.artwork.add_data(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247) + artist.artwork.append(ArtworkVariant(url=gallery_image_element.get("data-src", gallery_image_element.get("src")), width=247, heigth=247)) def fetch_artist(self, source: Source, **kwargs) -> Artist: diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index c709f5c..96006ea 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -441,7 +441,7 @@ class YoutubeMusic(SuperYouTube): # fetch artist artwork artist_thumbnails = musicImmersiveHeaderRenderer.get("thumbnail", {}).get("musicThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) for artist_thumbnail in artist_thumbnails: - artist.artwork.append(**artist_thumbnail) + artist.artwork.append(artist_thumbnail) if DEBUG: for i, content in enumerate(renderer_list): @@ -493,7 +493,7 @@ class YoutubeMusic(SuperYouTube): # album artwork album_thumbnails = musicDetailHeaderRenderer.get("thumbnail", {}).get("croppedSquareThumbnailRenderer", {}).get("thumbnail", {}).get("thumbnails", {}) for album_thumbnail in album_thumbnails: - album.artwork.append(**album_thumbnail) + album.artwork.append(value=album_thumbnail) title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) From 5ce76c758e9554185e4d09a9e3fe91194c80e2e9 Mon Sep 17 00:00:00 2001 From: Kur01234 Date: Tue, 2 Jul 2024 17:20:25 +0200 Subject: [PATCH 23/23] feat: genius fixes and duplicate detection --- music_kraken/download/page_attributes.py | 109 +++++++++++++---------- music_kraken/objects/artwork.py | 7 ++ music_kraken/objects/song.py | 2 +- music_kraken/pages/genius.py | 8 +- 4 files changed, 77 insertions(+), 49 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 248ce6c..788c13f 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -3,6 +3,7 @@ from collections import defaultdict from pathlib import Path import re import logging +import subprocess from PIL import Image @@ -76,33 +77,37 @@ if DEBUG_PAGES: class Pages: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): self.LOGGER = logging.getLogger("download") - + self.download_options: DownloadOptions = download_options or DownloadOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions() # initialize all page instances self._page_instances: Dict[Type[Page], Page] = dict() self._source_to_page: Dict[SourceType, Type[Page]] = dict() - + exclude_pages = exclude_pages if exclude_pages is not None else set() - + if exclude_shady: exclude_pages = exclude_pages.union(SHADY_PAGES) - + if not exclude_pages.issubset(ALL_PAGES): - raise ValueError(f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}") - + raise ValueError( + f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}") + def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]: return tuple(sorted(page_set, key=lambda page: page.__name__)) - + self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages) self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set) - self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES) - self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set) - + self._audio_pages_set: Set[Type[Page] + ] = self._pages_set.intersection(AUDIO_PAGES) + self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple( + self._audio_pages_set) + for page_type in self.pages: - self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options) + self._page_instances[page_type] = page_type( + fetch_options=self.fetch_options, download_options=self.download_options) self._source_to_page[page_type.SOURCE_TYPE] = page_type def _get_page_from_enum(self, source_page: SourceType) -> Page: @@ -112,24 +117,26 @@ class Pages: def search(self, query: Query) -> SearchResults: result = SearchResults() - + for page_type in self.pages: result.add( page=page_type, - search_result=self._page_instances[page_type].search(query=query) + search_result=self._page_instances[page_type].search( + query=query) ) - + return result - + def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: if not isinstance(data_object, INDEPENDENT_DB_OBJECTS): return data_object - + source: Source for source in data_object.source_collection.get_sources(source_type_sorting={ "only_with_page": True, }): - new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level) + new_data_object = self.fetch_from_source( + source=source, stop_at_level=stop_at_level) if new_data_object is not None: data_object.merge(new_data_object) @@ -138,14 +145,14 @@ class Pages: def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]: if not source.has_page: return None - + source_type = source.page.get_source_type(source=source) if source_type is None: self.LOGGER.debug(f"Could not determine source type for {source}.") return None func = getattr(source.page, fetch_map[source_type]) - + # fetching the data object and marking it as fetched data_object: DataObject = func(source=source, **kwargs) data_object.mark_as_fetched(source.hash_url) @@ -155,21 +162,21 @@ class Pages: source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) if source is None: return None - + return self.fetch_from_source(source=source) - + def _skip_object(self, data_object: DataObject) -> bool: if isinstance(data_object, Album): if not self.download_options.download_all and data_object.album_type in self.download_options.album_type_blacklist: return True - - return False + return False def _fetch_artist_artwork(self, artist: Artist, naming: dict): naming: Dict[str, List[str]] = defaultdict(list, naming) naming["artist"].append(artist.name) - naming["label"].extend([l.title_value for l in artist.label_collection]) + naming["label"].extend( + [l.title_value for l in artist.label_collection]) # removing duplicates from the naming, and process the strings for key, value in naming.items(): # https://stackoverflow.com/a/17016257 @@ -182,8 +189,12 @@ class Pages: naming["image_number"] = [str(image_number)] target = Target( relative_to_music_dir=True, - file_path=Path(self._parse_path_template(main_settings["artist_artwork_path"], naming=naming)) + file_path=Path(self._parse_path_template( + main_settings["artist_artwork_path"], naming=naming)) ) + if not target.file_path.parent.exists(): + target.create_path() + subprocess.Popen(["gio", "set", target.file_path.parent, "metadata::custom-icon", "file://"+str(target.file_path)]) with Image.open(artwork_variant.target.file_path) as img: img.save(target.file_path, main_settings["image_format"]) artwork_variant.target = Target @@ -191,7 +202,8 @@ class Pages: def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: # fetch the given object self.fetch_details(data_object) - output(f"\nDownloading {data_object.option_string}...", color=BColors.BOLD) + output( + f"\nDownloading {data_object.option_string}...", color=BColors.BOLD) # fetching all parent objects (e.g. if you only download a song) if not kwargs.get("fetched_upwards", False): @@ -209,7 +221,7 @@ class Pages: new_to_fetch.extend(c) to_fetch = new_to_fetch - + kwargs["fetched_upwards"] = True naming = kwargs.get("naming", { @@ -247,13 +259,15 @@ class Pages: return set(re.findall(r"{([^}]+)}", path_template)) def _parse_path_template(self, path_template: str, naming: Dict[str, List[str]]) -> str: - field_names: Set[str] = self._extract_fields_from_template(path_template) - + field_names: Set[str] = self._extract_fields_from_template( + path_template) + for field in field_names: if len(naming[field]) == 0: raise MKMissingNameException(f"Missing field for {field}.") - path_template = path_template.replace(f"{{{field}}}", naming[field][0]) + path_template = path_template.replace( + f"{{{field}}}", naming[field][0]) return path_template @@ -263,16 +277,17 @@ class Pages: Search the song in the file system. """ r = DownloadResult(total=1) - + # pre process the data recursively song.compile() - + # manage the naming naming: Dict[str, List[str]] = defaultdict(list, naming) naming["song"].append(song.title_value) naming["isrc"].append(song.isrc) naming["album"].extend(a.title_value for a in song.album_collection) - naming["album_type"].extend(a.album_type.value for a in song.album_collection) + naming["album_type"].extend( + a.album_type.value for a in song.album_collection) naming["artist"].extend(a.name for a in song.artist_collection) naming["artist"].extend(a.name for a in song.feature_artist_collection) for a in song.album_collection: @@ -289,13 +304,16 @@ class Pages: song.target_collection.append(Target( relative_to_music_dir=True, file_path=Path( - self._parse_path_template(main_settings["download_path"], naming=naming), - self._parse_path_template(main_settings["download_file"], naming=naming), + self._parse_path_template( + main_settings["download_path"], naming=naming), + self._parse_path_template( + main_settings["download_file"], naming=naming), ) )) for target in song.target_collection: if target.exists: - output(f'{target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) + output( + f'{target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) r.found_on_disk += 1 if not self.download_options.download_again_if_found: @@ -316,8 +334,10 @@ class Pages: break used_source = source - streaming_results = source.page.download_song_to_target(source=source, target=tmp, desc="download") - skip_intervals = source.page.get_skip_intervals(song=song, source=source) + streaming_results = source.page.download_song_to_target( + source=source, target=tmp, desc="download") + skip_intervals = source.page.get_skip_intervals( + song=song, source=source) # if something has been downloaded but it somehow failed, delete the file if streaming_results.is_fatal_error and tmp.exists: @@ -341,7 +361,8 @@ class Pages: used_source.page.post_process_hook(song=song, temp_target=tmp) if not found_on_disk or self.download_options.process_metadata_if_found: - write_metadata_to_target(metadata=song.metadata, target=tmp, song=song) + write_metadata_to_target( + metadata=song.metadata, target=tmp, song=song) # copy the tmp target to the final locations for target in song.target_collection: @@ -352,12 +373,10 @@ class Pages: def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) - + if source is None: raise UrlNotFoundException(url=url) - - _actual_page = self._source_to_page[source.source_type] - - return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) - \ No newline at end of file + _actual_page = self._source_to_page[source.source_type] + + return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index fcd69cf..9effaba 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -175,6 +175,7 @@ class ArtworkCollection: This will make the artworks ready for download and delete duplicates. """ artwork_hashes: list = list() + artwork_urls: list = list() for artwork in self._data: index = 0 for artwork_variant in artwork.variants: @@ -182,6 +183,12 @@ class ArtworkCollection: url=artwork_variant.url, name=artwork_variant.url, ) + + if artwork_variant.url in artwork_urls: + artwork.variants.pop(index) + continue + artwork_urls.append(artwork_variant.url) + target: Target = artwork_variant.target with target.open("wb") as f: f.write(r.content) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index d66bb12..f39aa96 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -185,7 +185,7 @@ class Song(Base): return def _compile(self): - self.artwork.compile(self.target_collection.get(0)) + self.artwork.compile() INDEX_DEPENDS_ON = ("title", "isrc", "source_collection") diff --git a/music_kraken/pages/genius.py b/music_kraken/pages/genius.py index 3b8f184..c6414ba 100644 --- a/music_kraken/pages/genius.py +++ b/music_kraken/pages/genius.py @@ -1,4 +1,5 @@ -import json +import simplejson as json +from json_unescape import escape_json, unescape_json from enum import Enum from typing import List, Optional, Type from urllib.parse import urlencode, urlparse, urlunparse @@ -268,8 +269,9 @@ class Genius(Page): # get the contents that are between `JSON.parse('` and `');` content = self.get_json_content_from_response(r, start="window.__PRELOADED_STATE__ = JSON.parse('", end="');\n window.__APP_CONFIG__ = ") if content is not None: - content = content.replace("\\\\", "\\").replace('\\"', '"').replace("\\'", "'") - data = json.loads(content) + #IMPLEMENT FIX FROM HAZEL + content = escape_json(content) + data = json.loads(content) lyrics_html = traverse_json_path(data, "songPage.lyricsData.body.html", default=None) if lyrics_html is not None: