From 1ad62df0abad47baf8df2e36892ecbc5ac9b08fa Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 13:43:34 +0200 Subject: [PATCH 01/32] feat: default implementation for options that should be sufficient --- .vscode/settings.json | 1 + music_kraken/objects/parents.py | 23 ++++++++++++++++++----- music_kraken/objects/song.py | 4 ++-- music_kraken/utils/__init__.py | 6 ++++++ 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 662ba25..fa0b6f7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -16,6 +16,7 @@ }, "python.formatting.provider": "none", "cSpell.words": [ + "albumsort", "APIC", "Bandcamp", "dotenv", diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 59a3d10..2f04b45 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -9,7 +9,7 @@ from pathlib import Path import inspect from .metadata import Metadata -from ..utils import get_unix_time, object_trace +from ..utils import get_unix_time, object_trace, generate_id from ..utils.config import logging_settings, main_settings from ..utils.shared import HIGHEST_ID from ..utils.hacking import MetaClass @@ -29,6 +29,10 @@ class InnerData: """ _refers_to_instances: set = None + """ + Attribute versions keep track, of if the attribute has been changed. + """ + _attribute_versions: Dict[str, int] = None def __init__(self, object_type, **kwargs): self._refers_to_instances = set() @@ -84,8 +88,6 @@ class OuterProxy: DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() - TITEL = "id" - def __init__(self, _id: int = None, dynamic: bool = False, **kwargs): _automatic_id: bool = False @@ -94,7 +96,7 @@ class OuterProxy: generates a random integer id the range is defined in the config """ - _id = random.randint(0, HIGHEST_ID) + _id = generate_id() _automatic_id = True kwargs["automatic_id"] = _automatic_id @@ -235,7 +237,17 @@ class OuterProxy: @property def options(self) -> List[P]: - return [self] + r = [] + + for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES: + r.extend(self.__getattribute__(collection_string_attribute)) + + r.append(self) + + for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: + r.extend(self.__getattribute__(collection_string_attribute)) + + return r @property def indexing_values(self) -> List[Tuple[str, object]]: @@ -267,6 +279,7 @@ class OuterProxy: return r + TITEL = "id" @property def title_string(self) -> str: return str(self.__getattribute__(self.TITEL)) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index be6d751..4b4abce 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -117,7 +117,7 @@ class Song(Base): Base.__init__(**locals()) - UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection") + UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_artist_collection", "feature_artist_collection", "album_collection") TITEL = "title" def __init_collections__(self) -> None: @@ -269,7 +269,7 @@ class Album(Base): **kwargs) DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",) - UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") + UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection") def __init_collections__(self): self.song_collection.append_object_to_attribute = { diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index 9226441..fcfb9a5 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -71,6 +71,12 @@ def object_trace(obj): misc functions """ +_auto_increment = 0 +def generate_id() -> int: + global _auto_increment + _auto_increment += 1 + return _auto_increment + def get_current_millis() -> int: dt = datetime.now() return int(dt.microsecond / 1_000) From ee1aaa13b09c2d66528287f9f32f87965ba444c7 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 13:49:16 +0200 Subject: [PATCH 02/32] feat: cleaned data objects --- music_kraken/objects/song.py | 86 +++--------------------------------- 1 file changed, 6 insertions(+), 80 deletions(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 4b4abce..1e66565 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -340,12 +340,6 @@ class Album(Base): if len(self.song_collection) > 0: r += f" with {len(self.song_collection)} songs" return r - - @property - def options(self) -> List[P]: - options = [*self.artist_collection, self, *self.song_collection] - - return options def update_tracksort(self): """ @@ -372,18 +366,6 @@ class Album(Base): tracksort_map[i] = existing_list.pop(0) tracksort_map[i].tracksort = i - def compile(self, merge_into: bool = False): - """ - compiles the recursive structures, - and does depending on the object some other stuff. - - no need to override if only the recursive structure should be built. - override self.build_recursive_structures() instead - """ - - self.update_tracksort() - self._build_recursive_structures(build_version=random.randint(0, 99999), merge=merge_into) - @property def copyright(self) -> str: if self.date is None: @@ -429,7 +411,7 @@ class Artist(Base): lyrical_themes: List[str] general_genre: str - unformated_location: str + unformatted_location: str source_collection: SourceCollection contact_collection: Collection[Contact] @@ -442,7 +424,7 @@ class Artist(Base): "name": str, "unified_name": lambda: None, "country": lambda: None, - "unformated_location": lambda: None, + "unformatted_location": lambda: None, "formed_in": ID3Timestamp, "notes": FormattedText, @@ -461,17 +443,17 @@ class Artist(Base): # This is automatically generated def __init__(self, name: str = "", unified_name: str = None, country: Country = None, formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None, - general_genre: str = None, unformated_location: str = None, source_list: List[Source] = None, + general_genre: str = None, unformatted_location: str = None, source_list: List[Source] = None, contact_list: List[Contact] = None, feature_song_list: List[Song] = None, main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None: - + super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes, lyrical_themes=lyrical_themes, general_genre=general_genre, - unformated_location=unformated_location, source_list=source_list, contact_list=contact_list, + unformatted_location=unformatted_location, source_list=source_list, contact_list=contact_list, feature_song_list=feature_song_list, main_album_list=main_album_list, label_list=label_list, **kwargs) - DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection") + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection") UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",) def __init_collections__(self): @@ -504,12 +486,6 @@ class Artist(Base): self.label_collection.extend(object_list) return - @property - def options(self) -> List[P]: - options = [self, *self.main_album_collection.shallow_list, *self.feature_album] - print(options) - return options - def update_albumsort(self): """ This updates the albumsort attributes, of the albums in @@ -585,19 +561,6 @@ class Artist(Base): return metadata - """ - def __str__(self, include_notes: bool = False): - string = self.name or "" - if include_notes: - plaintext_notes = self.notes.get_plaintext() - if plaintext_notes is not None: - string += "\n" + plaintext_notes - return string - """ - - def __repr__(self): - return f"Artist(\"{self.name}\")" - @property def option_string(self) -> str: r = OPTION_FOREGROUND.value + self.name + BColors.ENDC.value + OPTION_BACKGROUND.value @@ -613,43 +576,6 @@ class Artist(Base): return r - @property - def options(self) -> List[P]: - options = [self] - options.extend(self.main_album_collection) - options.extend(self.feature_song_collection) - return options - - @property - def feature_album(self) -> Album: - return Album( - title="features", - album_status=AlbumStatus.UNRELEASED, - album_type=AlbumType.COMPILATION_ALBUM, - is_split=True, - albumsort=666, - dynamic=True, - song_list=self.feature_song_collection.shallow_list - ) - - def get_all_songs(self) -> List[Song]: - """ - returns a list of all Songs. - probably not that useful, because it is unsorted - """ - collection = self.feature_song_collection.copy() - for album in self.discography: - collection.extend(album.song_collection) - - return collection - - @property - def discography(self) -> List[Album]: - flat_copy_discography = self.main_album_collection.copy() - flat_copy_discography.append(self.feature_album) - - return flat_copy_discography - """ Label From 3eba8e90f49f5b4b26882b670407ec2b9f8b9431 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 13:49:41 +0200 Subject: [PATCH 03/32] feat: cleaned data objects --- music_kraken/objects/song.py | 1 - 1 file changed, 1 deletion(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 1e66565..b227f64 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -628,7 +628,6 @@ class Label(Base): @property def indexing_values(self) -> List[Tuple[str, object]]: return [ - ('id', self.id), ('name', unify(self.name)), *[('url', source.url) for source in self.source_collection] ] From aa50d2cf20dc741ba379caef3de7bdbee5cf595b Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 13:51:43 +0200 Subject: [PATCH 04/32] feat: renamed referrer page fixing typo --- music_kraken/objects/source.py | 38 ++++++++++---------- music_kraken/pages/bandcamp.py | 2 +- music_kraken/pages/encyclopaedia_metallum.py | 4 +-- music_kraken/pages/musify.py | 4 +-- 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index bb2e9e3..64cd433 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -4,6 +4,7 @@ from collections import defaultdict from enum import Enum from typing import List, Dict, Set, Tuple, Optional, Iterable from urllib.parse import urlparse +from dataclasses import dataclass from ..utils.enums.source import SourcePages, SourceTypes from ..utils.config import youtube_settings @@ -14,11 +15,12 @@ from .parents import OuterProxy from .collection import Collection + +@dataclass class Source(OuterProxy): url: str - page_enum: SourcePages - referer_page: SourcePages + referrer_page: SourcePages audio_url: str @@ -27,16 +29,16 @@ class Source(OuterProxy): } # This is automatically generated - def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None, + def __init__(self, page_enum: SourcePages, url: str, referrer_page: SourcePages = None, audio_url: str = None, **kwargs) -> None: - if referer_page is None: - referer_page = page_enum + if referrer_page is None: + referrer_page = page_enum - super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs) + super().__init__(url=url, page_enum=page_enum, referrer_page=referrer_page, audio_url=audio_url, **kwargs) @classmethod - def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]: + def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]: """ this shouldn't be used, unlesse you are not certain what the source is for the reason is that it is more inefficient @@ -45,38 +47,38 @@ class Source(OuterProxy): url = parsed.geturl() if "musify" in parsed.netloc: - return cls(SourcePages.MUSIFY, url, referer_page=referer_page) + return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page) if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: - return cls(SourcePages.YOUTUBE, url, referer_page=referer_page) + return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page) if url.startswith("https://www.deezer"): - return cls(SourcePages.DEEZER, url, referer_page=referer_page) + return cls(SourcePages.DEEZER, url, referrer_page=referrer_page) if url.startswith("https://open.spotify.com"): - return cls(SourcePages.SPOTIFY, url, referer_page=referer_page) + return cls(SourcePages.SPOTIFY, url, referrer_page=referrer_page) if "bandcamp" in url: - return cls(SourcePages.BANDCAMP, url, referer_page=referer_page) + return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page) if "wikipedia" in parsed.netloc: - return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page) + return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page) if url.startswith("https://www.metal-archives.com/"): - return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page) + return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) # the less important once if url.startswith("https://www.facebook"): - return cls(SourcePages.FACEBOOK, url, referer_page=referer_page) + return cls(SourcePages.FACEBOOK, url, referrer_page=referrer_page) if url.startswith("https://www.instagram"): - return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page) + return cls(SourcePages.INSTAGRAM, url, referrer_page=referrer_page) if url.startswith("https://twitter"): - return cls(SourcePages.TWITTER, url, referer_page=referer_page) + return cls(SourcePages.TWITTER, url, referrer_page=referrer_page) if url.startswith("https://myspace.com"): - return cls(SourcePages.MYSPACE, url, referer_page=referer_page) + return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page) def get_song_metadata(self) -> Metadata: return Metadata({ diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 90064db..dcfebbf 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -185,7 +185,7 @@ class Bandcamp(Page): if li is None and li['href'] is not None: continue - source_list.append(Source.match_url(_parse_artist_url(li['href']), referer_page=self.SOURCE_TYPE)) + source_list.append(Source.match_url(_parse_artist_url(li['href']), referrer_page=self.SOURCE_TYPE)) return Artist( name=name, diff --git a/music_kraken/pages/encyclopaedia_metallum.py b/music_kraken/pages/encyclopaedia_metallum.py index d9ce0ca..dba4527 100644 --- a/music_kraken/pages/encyclopaedia_metallum.py +++ b/music_kraken/pages/encyclopaedia_metallum.py @@ -486,7 +486,7 @@ class EncyclopaediaMetallum(Page): href = anchor["href"] if href is not None: - source_list.append(Source.match_url(href, referer_page=self.SOURCE_TYPE)) + source_list.append(Source.match_url(href, referrer_page=self.SOURCE_TYPE)) # The following code is only legacy code, which I just kep because it doesn't harm. # The way ma returns sources changed. @@ -504,7 +504,7 @@ class EncyclopaediaMetallum(Page): if url is None: continue - source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE)) + source_list.append(Source.match_url(url, referrer_page=self.SOURCE_TYPE)) return source_list diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 28ac0a9..0e99f75 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -503,7 +503,7 @@ class Musify(Page): source_list.append(Source( SourcePages.YOUTUBE, iframe["src"], - referer_page=self.SOURCE_TYPE + referrer_page=self.SOURCE_TYPE )) return Song( @@ -812,7 +812,7 @@ class Musify(Page): href = additional_source.get("href") if href is None: continue - new_src = Source.match_url(href, referer_page=self.SOURCE_TYPE) + new_src = Source.match_url(href, referrer_page=self.SOURCE_TYPE) if new_src is None: continue source_list.append(new_src) From c6bdf724e3ceb7804e8bd1211998debbc70b157f Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 14:40:49 +0200 Subject: [PATCH 05/32] draft: string processing --- music_kraken/objects/source.py | 51 ++++++++++++++----------- music_kraken/utils/string_processing.py | 25 +++++++++++- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 64cd433..c122d11 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -2,10 +2,12 @@ from __future__ import annotations from collections import defaultdict from enum import Enum -from typing import List, Dict, Set, Tuple, Optional, Iterable -from urllib.parse import urlparse -from dataclasses import dataclass +from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator +from urllib.parse import urlparse, ParseResult +from dataclasses import dataclass, field +from functools import cached_property +from ..utils import generate_id from ..utils.enums.source import SourcePages, SourceTypes from ..utils.config import youtube_settings from ..utils.string_processing import hash_url @@ -17,25 +19,21 @@ from .collection import Collection @dataclass -class Source(OuterProxy): +class Source: url: str page_enum: SourcePages referrer_page: SourcePages + audio_url: Optional[str] - audio_url: str + id: int = field(default_factory=generate_id) + additional_data: dict = field(default_factory=dict) - _default_factories = { - "audio_url": lambda: None, - } - - # This is automatically generated - def __init__(self, page_enum: SourcePages, url: str, referrer_page: SourcePages = None, audio_url: str = None, - **kwargs) -> None: - - if referrer_page is None: - referrer_page = page_enum - - super().__init__(url=url, page_enum=page_enum, referrer_page=referrer_page, audio_url=audio_url, **kwargs) + def __post_init__(self): + self.referrer_page = self.referrer_page or self.page_enum + + @cached_property + def parsed_url(self) -> ParseResult: + return urlparse(self.url) @classmethod def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]: @@ -122,16 +120,23 @@ class Source(OuterProxy): homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum)) -class SourceCollection(Collection): +class SourceCollection: + _page_to_source_list: Dict[SourcePages, List[Source]] + + def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): - self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list) + self._page_to_source_list = defaultdict(list) - super().__init__(data=data, **kwargs) + def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: + for page in source_pages: + yield from self._page_to_source_list[page] - def _map_element(self, __object: Source, **kwargs): - super()._map_element(__object, **kwargs) + def append(self, source: Source): + pass - self._page_to_source_list[__object.page_enum].append(__object) + def extend(self, sources: Iterable[Source]): + for source in sources: + self.append(source) @property def source_pages(self) -> Set[SourcePages]: diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index 9acd3c8..0b45c6f 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -6,6 +6,7 @@ from functools import lru_cache from transliterate.exceptions import LanguageDetectionError from transliterate import translit from pathvalidate import sanitize_filename +from urllib.parse import urlparse, ParseResult, parse_qs COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = ( @@ -21,6 +22,7 @@ def unify(string: str) -> str: returns a unified str, to make comparisons easy. a unified string has the following attributes: - is lowercase + - is transliterated to Latin characters from e.g. Cyrillic """ if string is None: @@ -132,8 +134,27 @@ def unify_punctuation(to_unify: str) -> str: to_unify = to_unify.replace(char, UNIFY_TO) return to_unify -def hash_url(url: str) -> int: - return url.strip().lower().lstrip("https://").lstrip("http://") +def hash_url(url: Union[str, ParseResult]) -> str: + if isinstance(url, str): + url = urlparse(url) + + query = url.query + query_dict: Optional[dict] = None + try: + query_dict: dict = parse_qs(url.query, strict_parsing=True) + except ValueError: + # the query couldn't be parsed + pass + + if isinstance(query_dict, dict): + # sort keys alphabetically + query = "" + for key, value in sorted(query_dict.items(), key=lambda i: i[0]): + query += f"_{key.strip()}_{''.join(i.strip() for i in value)}" + + r = f"{url.netloc}_{url.path.replace('/', '_')}{query}" + r = r.lower().strip() + return r def remove_feature_part_from_track(title: str) -> str: From 1971982d27de98e5236f78f89bdd51b6eab6e15e Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 15:31:32 +0200 Subject: [PATCH 06/32] feat: added tests --- music_kraken/utils/string_processing.py | 20 +++++++++++--- tests/__init__.py | 0 tests/test_hash_url.py | 35 +++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/test_hash_url.py diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index 0b45c6f..d9d4c70 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -129,15 +129,27 @@ UNIFY_TO = " " ALLOWED_LENGTH_DISTANCE = 20 -def unify_punctuation(to_unify: str) -> str: +def unify_punctuation(to_unify: str, unify_to: str = UNIFY_TO) -> str: for char in string.punctuation: - to_unify = to_unify.replace(char, UNIFY_TO) + to_unify = to_unify.replace(char, unify_to) return to_unify def hash_url(url: Union[str, ParseResult]) -> str: if isinstance(url, str): url = urlparse(url) + unify_to = "-" + + def unify_part(part: str) -> str: + nonlocal unify_to + return unify_punctuation(part.lower(), unify_to=unify_to).strip(unify_to) + + # netloc + netloc = unify_part(url.netloc) + if netloc.startswith("www" + unify_to): + netloc = netloc[3 + len(unify_to):] + + # query query = url.query query_dict: Optional[dict] = None try: @@ -150,9 +162,9 @@ def hash_url(url: Union[str, ParseResult]) -> str: # sort keys alphabetically query = "" for key, value in sorted(query_dict.items(), key=lambda i: i[0]): - query += f"_{key.strip()}_{''.join(i.strip() for i in value)}" + query += f"{key.strip()}-{''.join(i.strip() for i in value)}" - r = f"{url.netloc}_{url.path.replace('/', '_')}{query}" + r = f"{netloc}_{unify_part(url.path)}_{unify_part(query)}" r = r.lower().strip() return r diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_hash_url.py b/tests/test_hash_url.py new file mode 100644 index 0000000..f87b2ff --- /dev/null +++ b/tests/test_hash_url.py @@ -0,0 +1,35 @@ +import unittest + +from music_kraken.utils.string_processing import hash_url + + +class TestCollection(unittest.TestCase): + def test_remove_schema(self): + self.assertFalse(hash_url("https://www.youtube.com/watch?v=3jZ_D3ELwOQ").startswith("https")) + self.assertFalse(hash_url("ftp://www.youtube.com/watch?v=3jZ_D3ELwOQ").startswith("https")) + self.assertFalse(hash_url("sftp://www.youtube.com/watch?v=3jZ_D3ELwOQ").startswith("https")) + self.assertFalse(hash_url("http://www.youtube.com/watch?v=3jZ_D3ELwOQ").startswith("https")) + + def test_no_punctuation(self): + self.assertNotIn(hash_url("https://www.you_tube.com/watch?v=3jZ_D3ELwOQ"), "you_tube") + self.assertNotIn(hash_url("https://docs.gitea.com/next/install.ation/comparison"), ".") + + def test_three_parts(self): + """ + The url is parsed into three parts [netloc; path; query] + Which are then appended to each other with an underscore between. + """ + + self.assertTrue(hash_url("https://duckduckgo.com/?t=h_&q=dfasf&ia=web").count("_") == 2) + + def test_sort_query(self): + """ + The query is sorted alphabetically + """ + hashed = hash_url("https://duckduckgo.com/?t=h_&q=dfasf&ia=web") + sorted_keys = ["ia-", "q-", "t-"] + + self.assertTrue(hashed.index(sorted_keys[0]) < hashed.index(sorted_keys[1]) < hashed.index(sorted_keys[2])) + +if __name__ == "__main__": + unittest.main() From 8f9858da6031f8ab192bc1e75a668addc76fce4f Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 17:06:31 +0200 Subject: [PATCH 07/32] draft: no metadata function for source --- music_kraken/objects/song.py | 5 +- music_kraken/objects/source.py | 97 +++++++++++++------------ music_kraken/pages/abstract.py | 4 +- music_kraken/pages/musify.py | 2 +- music_kraken/utils/enums/source.py | 56 ++++++-------- music_kraken/utils/string_processing.py | 1 + 6 files changed, 81 insertions(+), 84 deletions(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index b227f64..e396a32 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -169,6 +169,8 @@ class Song(Base): id3Mapping.GENRE: [self.genre], id3Mapping.TRACKNUMBER: [self.tracksort_str], id3Mapping.COMMENT: [self.note.markdown], + id3Mapping.FILE_WEBPAGE_URL: self.source_collection.url_list, + id3Mapping.SOURCE_WEBPAGE_URL: self.source_collection.homepage_list, }) # metadata.merge_many([s.get_song_metadata() for s in self.source_collection]) album sources have no relevant metadata for id3 @@ -555,7 +557,8 @@ class Artist(Base): @property def metadata(self) -> Metadata: metadata = Metadata({ - id3Mapping.ARTIST: [self.name] + id3Mapping.ARTIST: [self.name], + id3Mapping.ARTIST_WEBPAGE_URL: self.source_collection.url_list, }) metadata.merge_many([s.get_artist_metadata() for s in self.source_collection]) diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index c122d11..710f7ee 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -7,10 +7,9 @@ from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property -from ..utils import generate_id from ..utils.enums.source import SourcePages, SourceTypes from ..utils.config import youtube_settings -from ..utils.string_processing import hash_url +from ..utils.string_processing import hash_url, shorten_display_url from .metadata import Mapping, Metadata from .parents import OuterProxy @@ -20,12 +19,11 @@ from .collection import Collection @dataclass class Source: - url: str page_enum: SourcePages - referrer_page: SourcePages - audio_url: Optional[str] + url: str + referrer_page: SourcePages = None + audio_url: Optional[str] = None - id: int = field(default_factory=generate_id) additional_data: dict = field(default_factory=dict) def __post_init__(self): @@ -36,7 +34,7 @@ class Source: return urlparse(self.url) @classmethod - def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]: + def match_url(cls, url: str, referrer_page: SourcePages) -> Optional[Source]: """ this shouldn't be used, unlesse you are not certain what the source is for the reason is that it is more inefficient @@ -78,73 +76,78 @@ class Source: if url.startswith("https://myspace.com"): return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page) - def get_song_metadata(self) -> Metadata: - return Metadata({ - Mapping.FILE_WEBPAGE_URL: [self.url], - Mapping.SOURCE_WEBPAGE_URL: [self.homepage] - }) - - def get_artist_metadata(self) -> Metadata: - return Metadata({ - Mapping.ARTIST_WEBPAGE_URL: [self.url] - }) - @property def hash_url(self) -> str: return hash_url(self.url) @property - def metadata(self) -> Metadata: - return self.get_song_metadata() - - @property - def indexing_values(self) -> List[Tuple[str, object]]: - return [ - ('id', self.id), - ('url', self.url), - ('audio_url', self.audio_url), - ] - - def __str__(self): - return self.__repr__() + def indexing_values(self) -> list: + r = [hash_url(self.url)] + if self.audio_url: + r.append(hash_url(self.audio_url)) + return r def __repr__(self) -> str: - return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})" + return f"Src({self.page_enum.value}: {shorten_display_url(self.url)})" - @property - def title_string(self) -> str: - return self.url + def __merge__(self, other: Source, override: bool = False): + if self.audio_url is None: + self.audio_url = other.audio_url + self.additional_data.update(other.additional_data) page_str = property(fget=lambda self: self.page_enum.value) - type_str = property(fget=lambda self: self.type_enum.value) - homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum)) class SourceCollection: + _indexed_sources: Dict[str, Source] _page_to_source_list: Dict[SourcePages, List[Source]] - def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): self._page_to_source_list = defaultdict(list) + self._indexed_sources = {} + + self.extend(data or []) def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: + if not len(source_pages): + source_pages = self.source_pages + for page in source_pages: yield from self._page_to_source_list[page] def append(self, source: Source): - pass + existing_source = None + for key in source.indexing_values: + if key in self._indexed_sources: + existing_source = self._indexed_sources[key] + break + + if existing_source is not None: + existing_source.__merge__(source) + source = existing_source + + for key in source.indexing_values: + self._indexed_sources[key] = source + self._page_to_source_list[source.page_enum].append(source) def extend(self, sources: Iterable[Source]): for source in sources: self.append(source) + + def __iter__(self): + yield from self.get_sources() + + def __merge__(self, other: SourceCollection, override: bool = False): + self.extend(other) @property - def source_pages(self) -> Set[SourcePages]: - return set(source.page_enum for source in self._data) + def source_pages(self) -> Iterable[SourcePages]: + return sorted(self._page_to_source_list.keys(), key=lambda page: page.value) - def get_sources_from_page(self, source_page: SourcePages) -> List[Source]: - """ - getting the sources for a specific page like - YouTube or musify - """ - return self._page_to_source_list[source_page].copy() + @property + def url_list(self) -> List[str]: + return [source.url for source in self.get_sources(SourcePages.ARTIST)] + + @property + def homepage_list(self) -> List[str]: + return [source.homepage for source in self.source_pages] \ No newline at end of file diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 468067b..712c371 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -246,7 +246,7 @@ class Page: # only certain database objects, have a source list if isinstance(music_object, INDEPENDENT_DB_OBJECTS): source: Source - for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): + for source in music_object.source_collection.get_sources(self.SOURCE_TYPE): if music_object.already_fetched_from(source.hash_url): continue @@ -419,7 +419,7 @@ class Page: if song.target_collection.empty: song.target_collection.append(new_target) - sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE) + sources = song.source_collection.get_sources(self.SOURCE_TYPE) if len(sources) == 0: return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.") diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 0e99f75..4646385 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -693,7 +693,7 @@ class Musify(Page): if stop_at_level > 1: song: Song for song in album.song_collection: - sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE) + sources = song.source_collection.get_sources(self.SOURCE_TYPE) for source in sources: song.merge(self.fetch_song(source=source)) diff --git a/music_kraken/utils/enums/source.py b/music_kraken/utils/enums/source.py index a5e213e..be3171f 100644 --- a/music_kraken/utils/enums/source.py +++ b/music_kraken/utils/enums/source.py @@ -9,42 +9,32 @@ class SourceTypes(Enum): class SourcePages(Enum): - YOUTUBE = "youtube" - MUSIFY = "musify" - YOUTUBE_MUSIC = "youtube music" - GENIUS = "genius" - MUSICBRAINZ = "musicbrainz" + YOUTUBE = "youtube", "https://www.youtube.com/" + MUSIFY = "musify", "https://musify.club/" + YOUTUBE_MUSIC = "youtube music", "https://music.youtube.com/" + GENIUS = "genius", "https://genius.com/" + MUSICBRAINZ = "musicbrainz", "https://musicbrainz.org/" ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum" - BANDCAMP = "bandcamp" - DEEZER = "deezer" - SPOTIFY = "spotify" + BANDCAMP = "bandcamp", "https://bandcamp.com/" + DEEZER = "deezer", "https://www.deezer.com/" + SPOTIFY = "spotify", "https://open.spotify.com/" # This has nothing to do with audio, but bands can be here - WIKIPEDIA = "wikipedia" - INSTAGRAM = "instagram" - FACEBOOK = "facebook" - TWITTER = "twitter" # I will use nitter though lol - MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE + WIKIPEDIA = "wikipedia", "https://en.wikipedia.org/wiki/Main_Page" + INSTAGRAM = "instagram", "https://www.instagram.com/" + FACEBOOK = "facebook", "https://www.facebook.com/" + TWITTER = "twitter", "https://twitter.com/" + MYSPACE = "myspace", "https://myspace.com/" # Yes somehow this ancient site is linked EVERYWHERE - MANUAL = "manual" + MANUAL = "manual", "" - PRESET = "preset" + PRESET = "preset", "" - @classmethod - def get_homepage(cls, attribute) -> str: - homepage_map = { - cls.YOUTUBE: "https://www.youtube.com/", - cls.MUSIFY: "https://musify.club/", - cls.MUSICBRAINZ: "https://musicbrainz.org/", - cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/", - cls.GENIUS: "https://genius.com/", - cls.BANDCAMP: "https://bandcamp.com/", - cls.DEEZER: "https://www.deezer.com/", - cls.INSTAGRAM: "https://www.instagram.com/", - cls.FACEBOOK: "https://www.facebook.com/", - cls.SPOTIFY: "https://open.spotify.com/", - cls.TWITTER: "https://twitter.com/", - cls.MYSPACE: "https://myspace.com/", - cls.WIKIPEDIA: "https://en.wikipedia.org/wiki/Main_Page" - } - return homepage_map[attribute] \ No newline at end of file + def __new__(cls, value, homepage = None): + member = object.__new__(cls) + + member._value_ = value + member.homepage = homepage + + return member + \ No newline at end of file diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index d9d4c70..c81c71f 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -134,6 +134,7 @@ def unify_punctuation(to_unify: str, unify_to: str = UNIFY_TO) -> str: to_unify = to_unify.replace(char, unify_to) return to_unify +@lru_cache(maxsize=128) def hash_url(url: Union[str, ParseResult]) -> str: if isinstance(url, str): url = urlparse(url) From 67f475076c0e73d0ccf741a5656fca766850de7f Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 17:19:09 +0200 Subject: [PATCH 08/32] feat: cleaned downloading --- music_kraken/objects/song.py | 1 - music_kraken/objects/source.py | 5 ++++- music_kraken/pages/abstract.py | 22 ++++++++++++++-------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index e396a32..37f1a6e 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -560,7 +560,6 @@ class Artist(Base): id3Mapping.ARTIST: [self.name], id3Mapping.ARTIST_WEBPAGE_URL: self.source_collection.url_list, }) - metadata.merge_many([s.get_artist_metadata() for s in self.source_collection]) return metadata diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 710f7ee..130cacd 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -108,6 +108,9 @@ class SourceCollection: self.extend(data or []) + def has_source_page(self, *source_pages: SourcePages) -> bool: + return any(source_page in self._page_to_source_list for source_page in source_pages) + def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: if not len(source_pages): source_pages = self.source_pages @@ -146,7 +149,7 @@ class SourceCollection: @property def url_list(self) -> List[str]: - return [source.url for source in self.get_sources(SourcePages.ARTIST)] + return [source.url for source in self.get_sources()] @property def homepage_list(self) -> List[str]: diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 712c371..c405c60 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -419,9 +419,10 @@ class Page: if song.target_collection.empty: song.target_collection.append(new_target) + if not song.source_collection.has_source_page(self.SOURCE_TYPE): + return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.") + sources = song.source_collection.get_sources(self.SOURCE_TYPE) - if len(sources) == 0: - return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.") temp_target: Target = Target( relative_to_music_dir=False, @@ -448,14 +449,19 @@ class Page: self.LOGGER.info(f"{song.option_string} already exists, thus not downloading again.") return r - source = sources[0] - if not found_on_disc: - r = self.download_song_to_target(source=source, target=temp_target, desc=song.option_string) + for source in sources: + r = self.download_song_to_target(source=source, target=temp_target, desc=song.option_string) - if not r.is_fatal_error: - r.merge(self._post_process_targets(song, temp_target, - [] if found_on_disc else self.get_skip_intervals(song, source))) + if not r.is_fatal_error: + break + + if temp_target.exists: + r.merge(self._post_process_targets( + song=song, + temp_target=temp_target, + interval_list=[] if found_on_disc else self.get_skip_intervals(song, source) + )) return r From 415210522ff5b8dafe1b230b5b607279b65c6a12 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 17:27:12 +0200 Subject: [PATCH 09/32] fix: not directly adding all sources --- development/actual_donwload.py | 4 ++-- music_kraken/objects/source.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index a8eb732..4788eb5 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,8 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Crystal F", - "d: 20", + "s: #a Ghost Bath", + "d: 4", ] diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 130cacd..8680cde 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -128,10 +128,11 @@ class SourceCollection: if existing_source is not None: existing_source.__merge__(source) source = existing_source + else: + self._page_to_source_list[source.page_enum].append(source) for key in source.indexing_values: self._indexed_sources[key] = source - self._page_to_source_list[source.page_enum].append(source) def extend(self, sources: Iterable[Source]): for source in sources: From 95d1df3530b26c6c64de64013d5a9e0f06b63ad8 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 17:29:55 +0200 Subject: [PATCH 10/32] fix: not directly adding all sources --- music_kraken/objects/source.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 8680cde..66a012e 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -36,16 +36,16 @@ class Source: @classmethod def match_url(cls, url: str, referrer_page: SourcePages) -> Optional[Source]: """ - this shouldn't be used, unlesse you are not certain what the source is for + this shouldn't be used, unless you are not certain what the source is for the reason is that it is more inefficient """ - parsed = urlparse(url) - url = parsed.geturl() + parsed_url = urlparse(url) + url = parsed_url.geturl() - if "musify" in parsed.netloc: + if "musify" in parsed_url.netloc: return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page) - if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: + if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page) if url.startswith("https://www.deezer"): @@ -57,7 +57,7 @@ class Source: if "bandcamp" in url: return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page) - if "wikipedia" in parsed.netloc: + if "wikipedia" in parsed_url.netloc: return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page) if url.startswith("https://www.metal-archives.com/"): From 8e1dfd0be6c691704be2ecd9cc29d2d9f7e93bb9 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 17:36:43 +0200 Subject: [PATCH 11/32] draft: added canged version --- music_kraken/objects/source.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 66a012e..26425a8 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -7,6 +7,7 @@ from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property +from ..utils import generate_id from ..utils.enums.source import SourcePages, SourceTypes from ..utils.config import youtube_settings from ..utils.string_processing import hash_url, shorten_display_url @@ -99,6 +100,8 @@ class Source: class SourceCollection: + __change_version__ = generate_id() + _indexed_sources: Dict[str, Source] _page_to_source_list: Dict[SourcePages, List[Source]] @@ -131,9 +134,15 @@ class SourceCollection: else: self._page_to_source_list[source.page_enum].append(source) + changed = False for key in source.indexing_values: + if key not in self._indexed_sources: + changed = True self._indexed_sources[key] = source + if changed: + self.__change_version__ = generate_id() + def extend(self, sources: Iterable[Source]): for source in sources: self.append(source) @@ -148,6 +157,10 @@ class SourceCollection: def source_pages(self) -> Iterable[SourcePages]: return sorted(self._page_to_source_list.keys(), key=lambda page: page.value) + @property + def hash_url_list(self) -> List[str]: + return [hash_url(source.url) for source in self.get_sources()] + @property def url_list(self) -> List[str]: return [source.url for source in self.get_sources()] From 3737e0dc81880e80208f72c7a2b65283424d5255 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 29 Apr 2024 18:18:57 +0200 Subject: [PATCH 12/32] feat: added id possibility to output --- development/actual_donwload.py | 2 +- music_kraken/objects/collection.py | 14 +++++++------- music_kraken/objects/parents.py | 3 ++- music_kraken/objects/song.py | 28 +++++++++++++++++++--------- music_kraken/objects/source.py | 6 +++++- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index 4788eb5..8161548 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -7,7 +7,7 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ "s: #a Ghost Bath", - "d: 4", + "d: 14", ] diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 02bff19..0842f58 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -13,8 +13,8 @@ class Collection(Generic[T]): _data: List[T] - _indexed_values: Dict[str, set] - _indexed_to_objects: Dict[any, list] + _indexed_from_id: Dict[int, Dict[str, Any]] + _indexed_values: Dict[str, Dict[Any, T]] shallow_list = property(fget=lambda self: self.data) @@ -74,6 +74,10 @@ class Collection(Generic[T]): del self._indexed_from_id[obj_id] + def _remap(self): + for e in self: + self._map_element(e) + def _find_object(self, __object: T) -> Optional[T]: for name, value in __object.indexing_values: if value in self._indexed_values[name]: @@ -94,6 +98,7 @@ class Collection(Generic[T]): if __object is None: return + self._remap() existing_object = self._find_object(__object) if existing_object is None: @@ -112,15 +117,10 @@ class Collection(Generic[T]): b = __object.__getattribute__(attribute) object_trace(f"Syncing [{a}{id(a)}] = [{b}{id(b)}]") - data_to_extend = b.data - a._collection_for.update(b._collection_for) for synced_with, key in b._collection_for.items(): synced_with.__setattr__(key, a) - a.extend(data_to_extend) - - else: # merge only if the two objects are not the same if existing_object.id == __object.id: diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 2f04b45..4db562f 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -32,7 +32,6 @@ class InnerData: """ Attribute versions keep track, of if the attribute has been changed. """ - _attribute_versions: Dict[str, int] = None def __init__(self, object_type, **kwargs): self._refers_to_instances = set() @@ -249,6 +248,8 @@ class OuterProxy: return r + INDEX_DEPENDS_ON: List[str] = [] + @property def indexing_values(self) -> List[Tuple[str, object]]: """ diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 37f1a6e..8da5e16 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -43,7 +43,8 @@ def get_collection_string( template: str, ignore_titles: Set[str] = None, background: BColors = OPTION_BACKGROUND, - foreground: BColors = OPTION_FOREGROUND + foreground: BColors = OPTION_FOREGROUND, + add_id: bool = False, ) -> str: if collection.empty: return "" @@ -55,8 +56,15 @@ def get_collection_string( r = background + def get_element_str(element) -> str: + nonlocal add_id + r = element.title_string.strip() + if add_id: + r += " " + str(element.id) + return r + element: Base - titel_list: List[str] = [element.title_string.strip() for element in collection if element.title_string not in ignore_titles] + titel_list: List[str] = [get_element_str(element) for element in collection if element.title_string not in ignore_titles] for i, titel in enumerate(titel_list): delimiter = ", " @@ -151,13 +159,14 @@ class Song(Base): self.album_collection.extend(object_list) return + INDEX_DEPENDS_ON = ("title", "isrc", "source_collection") + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ - ('id', self.id), ('title', unify(self.title)), ('isrc', self.isrc), - *[('url', source.url) for source in self.source_collection] + *self.source_collection.indexing_values(), ] @property @@ -304,13 +313,14 @@ class Album(Base): self.label_collection.extend(object_list) return + INDEX_DEPENDS_ON = ("title", "barcode", "source_collection") + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ - ('id', self.id), ('title', unify(self.title)), ('barcode', self.barcode), - *[('url', source.url) for source in self.source_collection] + *self.source_collection.indexing_values(), ] @property @@ -545,13 +555,13 @@ class Artist(Base): # replace the old collection with the new one self.main_album_collection: Collection = Collection(data=album_list, element_type=Album) + INDEX_DEPENDS_ON = ("name", "source_collection", "contact_collection") @property def indexing_values(self) -> List[Tuple[str, object]]: return [ - ('id', self.id), ('name', unify(self.name)), - *[('url', source.url) for source in self.source_collection], - *[('contact', contact.value) for contact in self.contact_collection] + *[('contact', contact.value) for contact in self.contact_collection], + *self.source_collection.indexing_values(), ] @property diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 26425a8..b605aec 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -167,4 +167,8 @@ class SourceCollection: @property def homepage_list(self) -> List[str]: - return [source.homepage for source in self.source_pages] \ No newline at end of file + return [source.homepage for source in self.source_pages] + + def indexing_values(self) -> Generator[Tuple[str, str], None, None]: + for index in self._indexed_sources: + yield "url", index \ No newline at end of file From 3e29e1d3224a2f1a37bbe09e1f062e59b504a0d0 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 29 Apr 2024 22:37:07 +0200 Subject: [PATCH 13/32] draft: fix collection appending --- development/actual_donwload.py | 4 ++-- music_kraken/objects/artwork.py | 4 ++-- music_kraken/objects/collection.py | 36 ++++++++++++++---------------- music_kraken/objects/parents.py | 18 +++++---------- music_kraken/objects/song.py | 2 +- music_kraken/objects/source.py | 4 ++-- music_kraken/utils/shared.py | 2 +- 7 files changed, 31 insertions(+), 39 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index 8161548..a8eb732 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,8 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Ghost Bath", - "d: 14", + "s: #a Crystal F", + "d: 20", ] diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 43ea87e..d5ba54b 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -53,9 +53,9 @@ class Artwork: def get_variant_name(self, variant: ArtworkVariant) -> str: return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" - def __merge__(self, other: Artwork, override: bool = False) -> None: + def __merge__(self, other: Artwork, **kwargs) -> None: for key, value in other._variant_mapping.items(): - if key not in self._variant_mapping or override: + if key not in self._variant_mapping: self._variant_mapping[key] = value def __eq__(self, other: Artwork) -> bool: diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 0842f58..6f8be7e 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -79,26 +79,25 @@ class Collection(Generic[T]): self._map_element(e) def _find_object(self, __object: T) -> Optional[T]: + self._remap() + for name, value in __object.indexing_values: if value in self._indexed_values[name]: return self._indexed_values[name][value] - def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): + def append(self, __object: Optional[T], **kwargs): """ If an object, that represents the same entity exists in a relevant collection, merge into this object. (and remap) Else append to this collection. :param __object: - :param already_is_parent: - :param from_map: :return: """ if __object is None: return - self._remap() existing_object = self._find_object(__object) if existing_object is None: @@ -107,40 +106,39 @@ class Collection(Generic[T]): self._map_element(__object) for collection_attribute, child_collection in self.extend_object_to_attribute.items(): - __object.__getattribute__(collection_attribute).extend(child_collection) + __object.__getattribute__(collection_attribute).extend(child_collection, **kwargs) for attribute, new_object in self.append_object_to_attribute.items(): - __object.__getattribute__(attribute).append(new_object) + __object.__getattribute__(attribute).append(new_object, **kwargs) # only modify collections if the object actually has been appended for attribute, a in self.sync_on_append.items(): b = __object.__getattribute__(attribute) - object_trace(f"Syncing [{a}{id(a)}] = [{b}{id(b)}]") + if a is b: + continue + + object_trace(f"Syncing [{a}] = [{b}]") - a._collection_for.update(b._collection_for) for synced_with, key in b._collection_for.items(): synced_with.__setattr__(key, a) + a._collection_for.update(b._collection_for) + + a.extend(b.data, **kwargs) else: # merge only if the two objects are not the same if existing_object.id == __object.id: return - old_id = existing_object.id - - existing_object.merge(__object) - - if existing_object.id != old_id: - self._unmap_element(old_id) - + existing_object.merge(__object, **kwargs) self._map_element(existing_object) - def extend(self, __iterable: Optional[Generator[T, None, None]]): + def extend(self, __iterable: Optional[Generator[T, None, None]], **kwargs): if __iterable is None: return for __object in __iterable: - self.append(__object) + self.append(__object, **kwargs) @property def data(self) -> List[T]: @@ -156,8 +154,8 @@ class Collection(Generic[T]): def __iter__(self) -> Iterator[T]: yield from self._data - def __merge__(self, __other: Collection, override: bool = False): - self.extend(__other) + def __merge__(self, __other: Collection, **kwargs): + self.extend(__other, **kwargs) def __getitem__(self, item: int): return self._data[item] diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 4db562f..c6f2138 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -50,10 +50,9 @@ class InnerData: def __hash__(self): return self.id - def __merge__(self, __other: InnerData, override: bool = False): + def __merge__(self, __other: InnerData, **kwargs): """ :param __other: - :param override: :return: """ @@ -68,13 +67,9 @@ class InnerData: # if the object of value implemented __merge__, it merges existing = self.__getattribute__(key) if hasattr(type(existing), "__merge__"): - existing.__merge__(value, override) + existing.__merge__(value, **kwargs) continue - # override the existing value if requested - if override: - self.__setattr__(key, value) - class OuterProxy: """ @@ -174,13 +169,12 @@ class OuterProxy: def __eq__(self, other: Any): return self.__hash__() == other.__hash__() - def merge(self, __other: Optional[OuterProxy], override: bool = False): + def merge(self, __other: Optional[OuterProxy], **kwargs): """ 1. merges the data of __other in self 2. replaces the data of __other with the data of self :param __other: - :param override: :return: """ if __other is None: @@ -205,11 +199,11 @@ class OuterProxy: instance._inner = a._inner a._inner._refers_to_instances.add(instance) - a._inner.__merge__(old_inner, override=override) + a._inner.__merge__(old_inner, **kwargs) del old_inner - def __merge__(self, __other: Optional[OuterProxy], override: bool = False): - self.merge(__other, override) + def __merge__(self, __other: Optional[OuterProxy], **kwargs): + self.merge(__other, **kwargs) def mark_as_fetched(self, *url_hash_list: List[str]): for url_hash in url_hash_list: diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 8da5e16..8ccbc23 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -44,7 +44,7 @@ def get_collection_string( ignore_titles: Set[str] = None, background: BColors = OPTION_BACKGROUND, foreground: BColors = OPTION_FOREGROUND, - add_id: bool = False, + add_id: bool = True, ) -> str: if collection.empty: return "" diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index b605aec..d20d571 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -91,7 +91,7 @@ class Source: def __repr__(self) -> str: return f"Src({self.page_enum.value}: {shorten_display_url(self.url)})" - def __merge__(self, other: Source, override: bool = False): + def __merge__(self, other: Source, **kwargs): if self.audio_url is None: self.audio_url = other.audio_url self.additional_data.update(other.additional_data) @@ -150,7 +150,7 @@ class SourceCollection: def __iter__(self): yield from self.get_sources() - def __merge__(self, other: SourceCollection, override: bool = False): + def __merge__(self, other: SourceCollection, **kwargs): self.extend(other) @property diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index a2b06b8..401b051 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True DEBUG_LOGGING = DEBUG and False DEBUG_TRACE = DEBUG and True -DEBUG_OBJECT_TRACE = DEBUG and False +DEBUG_OBJECT_TRACE = DEBUG and True DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False From e9b1a12aa1a347dc0c8bc7b07909361b98d1e36e Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 29 Apr 2024 23:40:48 +0200 Subject: [PATCH 14/32] draft: the problem is in _list_renderer.py --- music_kraken/objects/collection.py | 22 ++++++++++---- music_kraken/objects/parents.py | 1 + music_kraken/pages/abstract.py | 46 ------------------------------ 3 files changed, 18 insertions(+), 51 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 6f8be7e..f3139cd 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections import defaultdict -from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any +from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set from .parents import OuterProxy from ..utils import object_trace @@ -117,14 +117,26 @@ class Collection(Generic[T]): if a is b: continue - object_trace(f"Syncing [{a}] = [{b}]") + no_sync_collection: Set[Collection] = kwargs.get("no_sync_collection", set()) + object_trace(f"Syncing [{a}] = [{b}]; {no_sync_collection}") + if id(b) in no_sync_collection: + continue - for synced_with, key in b._collection_for.items(): + + b_data = b.data.copy() + b_collection_for = b._collection_for.copy() + no_sync_collection.add(id(b)) + kwargs["no_sync_collection"] = no_sync_collection + del b + + for synced_with, key in b_collection_for.items(): synced_with.__setattr__(key, a) - a._collection_for.update(b._collection_for) + a._collection_for[synced_with] = key - a.extend(b.data, **kwargs) + print(synced_with, key) + a.extend(b_data, **kwargs) + else: # merge only if the two objects are not the same if existing_object.id == __object.id: diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index c6f2138..c0669c0 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -45,6 +45,7 @@ class InnerData: for key, value in kwargs.items(): if hasattr(value, "__is_collection__"): value._collection_for[self] = key + self.__setattr__(key, value) def __hash__(self): diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index c405c60..0ea15db 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -89,52 +89,6 @@ class NamingDict(dict): return self.default_value_for_name(attribute_name) -def _clean_music_object(music_object: INDEPENDENT_DB_OBJECTS, collections: Dict[INDEPENDENT_DB_TYPES, Collection]): - if type(music_object) == Label: - return _clean_label(label=music_object, collections=collections) - if type(music_object) == Artist: - return _clean_artist(artist=music_object, collections=collections) - if type(music_object) == Album: - return _clean_album(album=music_object, collections=collections) - if type(music_object) == Song: - return _clean_song(song=music_object, collections=collections) - - -def _clean_collection(collection: Collection, collection_dict: Dict[INDEPENDENT_DB_TYPES, Collection]): - if collection.element_type not in collection_dict: - return - - for i, element in enumerate(collection): - r = collection_dict[collection.element_type].append(element, merge_into_existing=True) - collection[i] = r.current_element - - if not r.was_the_same: - _clean_music_object(r.current_element, collection_dict) - - -def _clean_label(label: Label, collections: Dict[INDEPENDENT_DB_TYPES, Collection]): - _clean_collection(label.current_artist_collection, collections) - _clean_collection(label.album_collection, collections) - - -def _clean_artist(artist: Artist, collections: Dict[INDEPENDENT_DB_TYPES, Collection]): - _clean_collection(artist.main_album_collection, collections) - _clean_collection(artist.feature_song_collection, collections) - _clean_collection(artist.label_collection, collections) - - -def _clean_album(album: Album, collections: Dict[INDEPENDENT_DB_TYPES, Collection]): - _clean_collection(album.label_collection, collections) - _clean_collection(album.song_collection, collections) - _clean_collection(album.artist_collection, collections) - - -def _clean_song(song: Song, collections: Dict[INDEPENDENT_DB_TYPES, Collection]): - _clean_collection(song.album_collection, collections) - _clean_collection(song.feature_artist_collection, collections) - _clean_collection(song.main_artist_collection, collections) - - class Page: """ This is an abstract class, laying out the From a3ef671f00441afb18556adbc0e9f0bb01e2f6b9 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 30 Apr 2024 02:09:52 +0200 Subject: [PATCH 15/32] feat: tried improving fetching --- music_kraken/objects/collection.py | 5 ++++- music_kraken/pages/youtube_music/_list_render.py | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index f3139cd..4bba8bb 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -143,7 +143,10 @@ class Collection(Generic[T]): return existing_object.merge(__object, **kwargs) - self._map_element(existing_object) + self._map_element(existing_object) + + def contains(self, __object: T) -> bool: + return self._find_object(__object) is not None def extend(self, __iterable: Optional[Generator[T, None, None]], **kwargs): if __iterable is None: diff --git a/music_kraken/pages/youtube_music/_list_render.py b/music_kraken/pages/youtube_music/_list_render.py index 8076e54..cd7f878 100644 --- a/music_kraken/pages/youtube_music/_list_render.py +++ b/music_kraken/pages/youtube_music/_list_render.py @@ -56,15 +56,24 @@ def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]: for song in song_list: song.album_collection.extend(album_list) - song.main_artist_collection.extend(artist_list) + + for artist in artist_list: + existing_artist = song.main_artist_collection._find_object(artist) + + if existing_artist is None: + song.feature_artist_collection.append(artist) + else: + existing_artist.merge(artist) + + if len(song_list) > 0: + return song_list for album in album_list: album.artist_collection.extend(artist_list) - if len(song_list) > 0: - return song_list if len(album_list) > 0: return album_list + if len(artist_list) > 0: return artist_list From 312e26ec44f7008f60ea7639285cad56f9698e8a Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 30 Apr 2024 08:11:10 +0200 Subject: [PATCH 16/32] feat: implemented push to --- music_kraken/objects/collection.py | 15 ++++++++++----- music_kraken/objects/song.py | 2 ++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 4bba8bb..7090aff 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -36,6 +36,8 @@ class Collection(Generic[T]): self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} self.extend_object_to_attribute: Dict[str, Collection[T]] = extend_object_to_attribute or {} self.sync_on_append: Dict[str, Collection] = sync_on_append or {} + self.pull_from: List[Collection] = [] + self.push_to: List[Collection] = [] self._id_to_index_values: Dict[int, set] = defaultdict(set) @@ -79,11 +81,16 @@ class Collection(Generic[T]): self._map_element(e) def _find_object(self, __object: T) -> Optional[T]: + for c in self.push_to: + found = c._find_object(__object) + if found is not None: + return found, c + self._remap() for name, value in __object.indexing_values: if value in self._indexed_values[name]: - return self._indexed_values[name][value] + return self._indexed_values[name][value], self def append(self, __object: Optional[T], **kwargs): """ @@ -98,7 +105,7 @@ class Collection(Generic[T]): if __object is None: return - existing_object = self._find_object(__object) + existing_object, map_to = self._find_object(__object) if existing_object is None: # append @@ -133,8 +140,6 @@ class Collection(Generic[T]): synced_with.__setattr__(key, a) a._collection_for[synced_with] = key - print(synced_with, key) - a.extend(b_data, **kwargs) else: @@ -143,7 +148,7 @@ class Collection(Generic[T]): return existing_object.merge(__object, **kwargs) - self._map_element(existing_object) + map_to._map_element(existing_object) def contains(self, __object: T) -> bool: return self._find_object(__object) is not None diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 8ccbc23..687fc1a 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -143,6 +143,8 @@ class Song(Base): "feature_song_collection": self } + self.feature_artist_collection.push_to = [self.main_artist_collection] + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: return From 796f609d86293b3f546984fbdb6da7a06752ef76 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 30 Apr 2024 09:31:38 +0200 Subject: [PATCH 17/32] fix: push to --- music_kraken/objects/collection.py | 6 ++++-- music_kraken/pages/youtube_music/_list_render.py | 9 +-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 7090aff..d008fb1 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -82,9 +82,9 @@ class Collection(Generic[T]): def _find_object(self, __object: T) -> Optional[T]: for c in self.push_to: - found = c._find_object(__object) + found, found_in = c._find_object(__object) if found is not None: - return found, c + return found, found_in self._remap() @@ -92,6 +92,8 @@ class Collection(Generic[T]): if value in self._indexed_values[name]: return self._indexed_values[name][value], self + return None, self + def append(self, __object: Optional[T], **kwargs): """ If an object, that represents the same entity exists in a relevant collection, diff --git a/music_kraken/pages/youtube_music/_list_render.py b/music_kraken/pages/youtube_music/_list_render.py index cd7f878..a820014 100644 --- a/music_kraken/pages/youtube_music/_list_render.py +++ b/music_kraken/pages/youtube_music/_list_render.py @@ -56,14 +56,7 @@ def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]: for song in song_list: song.album_collection.extend(album_list) - - for artist in artist_list: - existing_artist = song.main_artist_collection._find_object(artist) - - if existing_artist is None: - song.feature_artist_collection.append(artist) - else: - existing_artist.merge(artist) + song.feature_artist_collection.extend(artist_list) if len(song_list) > 0: return song_list From e93f6d754cb7d92315f4c8e51c335347d705ddb4 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 30 Apr 2024 12:32:55 +0200 Subject: [PATCH 18/32] draft --- music_kraken/objects/collection.py | 37 ++++++++++++++----- music_kraken/objects/song.py | 1 + .../pages/youtube_music/youtube_music.py | 26 +++++++++---- music_kraken/utils/__init__.py | 10 ++++- music_kraken/utils/hacking.py | 18 +++++++-- music_kraken/utils/string_processing.py | 5 ++- 6 files changed, 72 insertions(+), 25 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index d008fb1..497f93e 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -4,6 +4,7 @@ from collections import defaultdict from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set from .parents import OuterProxy from ..utils import object_trace +from ..utils import output, BColors T = TypeVar('T', bound=OuterProxy) @@ -80,11 +81,12 @@ class Collection(Generic[T]): for e in self: self._map_element(e) - def _find_object(self, __object: T) -> Optional[T]: - for c in self.push_to: - found, found_in = c._find_object(__object) - if found is not None: - return found, found_in + def _find_object(self, __object: T, no_push_to: bool = False) -> Optional[T]: + if not no_push_to: + for c in self.push_to: + found, found_in = c._find_object(__object, no_push_to=True) + if found is not None: + return found, found_in self._remap() @@ -104,10 +106,20 @@ class Collection(Generic[T]): :return: """ + if __object is None: return - existing_object, map_to = self._find_object(__object) + existing_object, map_to = self._find_object(__object, no_push_to=kwargs.get("no_push_to", False)) + + if map_to is self: + for other, contained in (c._find_object(__object, no_push_to=True) for c in self.pull_from): + output(other, __object, contained, color=BColors.RED) + if other is None: + continue + + __object.__merge__(other, no_push_to=False, **kwargs) + contained.remove(other) if existing_object is None: # append @@ -135,22 +147,27 @@ class Collection(Generic[T]): b_data = b.data.copy() b_collection_for = b._collection_for.copy() no_sync_collection.add(id(b)) - kwargs["no_sync_collection"] = no_sync_collection + # kwargs["no_sync_collection"] = no_sync_collection del b + a.extend(b_data, **kwargs) + for synced_with, key in b_collection_for.items(): synced_with.__setattr__(key, a) a._collection_for[synced_with] = key - a.extend(b_data, **kwargs) - else: # merge only if the two objects are not the same if existing_object.id == __object.id: return existing_object.merge(__object, **kwargs) - map_to._map_element(existing_object) + map_to._map_element(existing_object) + + def remove(self, __object: T) -> T: + self._data.remove(__object) + self._unmap_element(__object) + return __object def contains(self, __object: T) -> bool: return self._find_object(__object) is not None diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 687fc1a..ee9f689 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -144,6 +144,7 @@ class Song(Base): } self.feature_artist_collection.push_to = [self.main_artist_collection] + self.main_artist_collection.pull_from = [self.feature_artist_collection] def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 6ecbeaf..46581f3 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -498,7 +498,22 @@ class YoutubeMusic(SuperYouTube): self.fetch_media_url(source=source, ydl_res=ydl_res) - artist_name = ydl_res.get("artist", ydl_res.get("uploader", "")).rstrip(" - Topic") + artist_names = [] + uploader = ydl_res.get("uploader", "") + if uploader.endswith(" - Topic"): + artist_names = [uploader.rstrip(" - Topic")] + """ + elif "artist" in ydl_res: + artist_names = ydl_res.get("artist").split(", ") + """ + artist_list = [ + Artist( + name=name, + source_list=[Source( + SourcePages.YOUTUBE_MUSIC, + f"https://music.youtube.com/channel/{ydl_res.get('channel_id', ydl_res.get('uploader_id', ''))}" + )] + ) for name in artist_names] album_list = [] if "album" in ydl_res: @@ -507,19 +522,14 @@ class YoutubeMusic(SuperYouTube): date=ID3Timestamp.strptime(ydl_res.get("upload_date"), "%Y%m%d"), )) + artist_name = artist_names[0] if len(artist_names) > 0 else None return Song( title=ydl_res.get("track", clean_song_title(ydl_res.get("title"), artist_name=artist_name)), note=ydl_res.get("descriptions"), album_list=album_list, length=int(ydl_res.get("duration", 0)) * 1000, artwork=Artwork(*ydl_res.get("thumbnails", [])), - main_artist_list=[Artist( - name=artist_name, - source_list=[Source( - SourcePages.YOUTUBE_MUSIC, - f"https://music.youtube.com/channel/{ydl_res.get('channel_id', ydl_res.get('uploader_id', ''))}" - )] - )], + main_artist_list=artist_list, source_list=[Source( SourcePages.YOUTUBE_MUSIC, f"https://music.youtube.com/watch?v={ydl_res.get('id')}" diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index fcfb9a5..981eb7e 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -3,24 +3,30 @@ from pathlib import Path import json import logging import inspect +from typing import List from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK from .config import config, read_config, write_config from .enums.colors import BColors from .path_manager import LOCATIONS +from .hacking import merge_args """ IO functions """ def _apply_color(msg: str, color: BColors) -> str: + if not isinstance(msg, str): + msg = str(msg) + if color is BColors.ENDC: return msg return color.value + msg + BColors.ENDC.value -def output(msg: str, color: BColors = BColors.ENDC): - print(_apply_color(msg, color)) +@merge_args(print) +def output(*msg: List[str], color: BColors = BColors.ENDC, **kwargs): + print(*(_apply_color(s, color) for s in msg), **kwargs) def user_input(msg: str, color: BColors = BColors.ENDC): diff --git a/music_kraken/utils/hacking.py b/music_kraken/utils/hacking.py index e68356e..0e949d8 100644 --- a/music_kraken/utils/hacking.py +++ b/music_kraken/utils/hacking.py @@ -78,7 +78,14 @@ def _merge( drop_args = [] if drop_kwonlyargs is None: drop_kwonlyargs = [] - source_spec = inspect.getfullargspec(source) + + is_builtin = False + try: + source_spec = inspect.getfullargspec(source) + except TypeError: + is_builtin = True + source_spec = inspect.FullArgSpec(type(source).__name__, [], [], [], [], [], []) + dest_spec = inspect.getfullargspec(dest) if source_spec.varargs or source_spec.varkw: @@ -128,13 +135,15 @@ def _merge( 'co_kwonlyargcount': len(kwonlyargs_merged), 'co_posonlyargcount': dest.__code__.co_posonlyargcount, 'co_nlocals': len(args_all), - 'co_flags': source.__code__.co_flags, 'co_varnames': args_all, 'co_filename': dest.__code__.co_filename, 'co_name': dest.__code__.co_name, 'co_firstlineno': dest.__code__.co_firstlineno, } + if hasattr(source, "__code__"): + replace_kwargs['co_flags'] = source.__code__.co_flags + if PY310: replace_kwargs['co_linetable'] = dest.__code__.co_linetable else: @@ -151,7 +160,7 @@ def _merge( len(kwonlyargs_merged), _blank.__code__.co_nlocals, _blank.__code__.co_stacksize, - source.__code__.co_flags, + source.__code__.co_flags if hasattr(source, "__code__") else dest.__code__.co_flags, _blank.__code__.co_code, (), (), args_all, dest.__code__.co_filename, dest.__code__.co_name, @@ -171,6 +180,9 @@ def _merge( dest_ret = dest.__annotations__['return'] for v in ('__kwdefaults__', '__annotations__'): + if not hasattr(source, v): + continue + out = getattr(source, v) if out is None: out = {} diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index c81c71f..1e46a58 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -32,8 +32,9 @@ def unify(string: str) -> str: string = translit(string, reversed=True) except LanguageDetectionError: pass - - return string.lower() + + string = unify_punctuation(string) + return string.lower().strip() def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Union[str, Path]: From 4510520db6e2457c301935c6daa0849b5128de75 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 30 Apr 2024 17:24:11 +0200 Subject: [PATCH 19/32] feat: draft better debug --- music_kraken/objects/collection.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 497f93e..ec43ab3 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -82,10 +82,11 @@ class Collection(Generic[T]): self._map_element(e) def _find_object(self, __object: T, no_push_to: bool = False) -> Optional[T]: - if not no_push_to: + if not no_push_to or True: for c in self.push_to: found, found_in = c._find_object(__object, no_push_to=True) if found is not None: + output("push to", found, __object, color=BColors.RED) return found, found_in self._remap() @@ -114,10 +115,10 @@ class Collection(Generic[T]): if map_to is self: for other, contained in (c._find_object(__object, no_push_to=True) for c in self.pull_from): - output(other, __object, contained, color=BColors.RED) if other is None: continue + output("pull from", other, __object, color=BColors.RED) __object.__merge__(other, no_push_to=False, **kwargs) contained.remove(other) From be843f2c10d175b4e38a86420ef187b4f162680d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 30 Apr 2024 17:43:00 +0200 Subject: [PATCH 20/32] draft: improved debug even more --- music_kraken/objects/collection.py | 4 +++- music_kraken/objects/parents.py | 4 ++++ music_kraken/objects/song.py | 2 +- music_kraken/objects/source.py | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index ec43ab3..46b0be1 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -50,7 +50,7 @@ class Collection(Generic[T]): self.extend(data) def __repr__(self) -> str: - return f"Collection({id(self)})" + return f"Collection({' | '.join(self._collection_for.values())} {id(self)})" def _map_element(self, __object: T, from_map: bool = False): self._unmap_element(__object.id) @@ -111,6 +111,8 @@ class Collection(Generic[T]): if __object is None: return + object_trace(f"Appending {__object.option_string} {__object.id} to {self}") + existing_object, map_to = self._find_object(__object, no_push_to=kwargs.get("no_push_to", False)) if map_to is self: diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index c0669c0..15aed6b 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -243,6 +243,10 @@ class OuterProxy: return r + @property + def option_string(self) -> str: + return self.title_string + INDEX_DEPENDS_ON: List[str] = [] @property diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index ee9f689..59c67a4 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -348,7 +348,7 @@ class Album(Base): @property def option_string(self) -> str: - r = OPTION_FOREGROUND.value + self.title + BColors.ENDC.value + OPTION_BACKGROUND.value + r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value r += get_collection_string(self.artist_collection, " by {}") r += get_collection_string(self.label_collection, " under {}") diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index d20d571..b7e483a 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -30,7 +30,7 @@ class Source: def __post_init__(self): self.referrer_page = self.referrer_page or self.page_enum - @cached_property + @property def parsed_url(self) -> ParseResult: return urlparse(self.url) From 9c369b421d9b92dea9ef0bf16746bb754ef41145 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Fri, 3 May 2024 14:52:12 +0200 Subject: [PATCH 21/32] feat: oh no --- development/actual_donwload.py | 2 +- development/objects_collection.py | 39 +++--- music_kraken/objects/collection.py | 196 ++++++++++++++++++----------- music_kraken/objects/parents.py | 8 +- music_kraken/objects/song.py | 7 +- music_kraken/utils/shared.py | 1 + 6 files changed, 148 insertions(+), 105 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index a8eb732..2c9a174 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -7,7 +7,7 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ "s: #a Crystal F", - "d: 20", + "d: 20" ] diff --git a/development/objects_collection.py b/development/objects_collection.py index 642bb18..d8d8f47 100644 --- a/development/objects_collection.py +++ b/development/objects_collection.py @@ -2,30 +2,27 @@ import music_kraken from music_kraken.objects import Song, Album, Artist, Collection if __name__ == "__main__": - album_1 = Album( - title="album", - song_list=[ - Song(title="song", main_artist_list=[Artist(name="artist")]), - ], - artist_list=[ - Artist(name="artist 3"), - ] + song_1 = Song( + title="song", + main_artist_list=[Artist( + name="main_artist" + )], + feature_artist_list=[Artist( + name="main_artist" + )] ) - album_2 = Album( - title="album", - song_list=[ - Song(title="song", main_artist_list=[Artist(name="artist 2")]), - ], - artist_list=[ - Artist(name="artist"), - ] + other_artist = Artist(name="other_artist") + + song_2 = Song( + title = "song", + main_artist_list=[other_artist] ) - album_1.merge(album_2) + other_artist.name = "main_artist" - print() - print(*(f"{a.title_string} ; {a.id}" for a in album_1.artist_collection.data), sep=" | ") + song_1.merge(song_2) - print(id(album_1.artist_collection), id(album_2.artist_collection)) - print(id(album_1.song_collection[0].main_artist_collection), id(album_2.song_collection[0].main_artist_collection)) \ No newline at end of file + print("#" * 120) + print("main", *song_1.main_artist_collection) + print("feat", *song_1.feature_artist_collection) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 46b0be1..a257226 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -40,8 +40,6 @@ class Collection(Generic[T]): self.pull_from: List[Collection] = [] self.push_to: List[Collection] = [] - self._id_to_index_values: Dict[int, set] = defaultdict(set) - # This is to cleanly unmap previously mapped items by their id self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict) # this is to keep track and look up the actual objects @@ -52,8 +50,9 @@ class Collection(Generic[T]): def __repr__(self) -> str: return f"Collection({' | '.join(self._collection_for.values())} {id(self)})" - def _map_element(self, __object: T, from_map: bool = False): - self._unmap_element(__object.id) + def _map_element(self, __object: T, no_unmap: bool = False, **kwargs): + if not no_unmap: + self._unmap_element(__object.id) self._indexed_from_id[__object.id]["id"] = __object.id self._indexed_values["id"][__object.id] = __object @@ -78,108 +77,153 @@ class Collection(Generic[T]): del self._indexed_from_id[obj_id] def _remap(self): - for e in self: - self._map_element(e) + # reinitialize the mapping to clean it without time consuming operations + self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict) + self._indexed_values: Dict[str, Dict[Any, T]] = defaultdict(dict) - def _find_object(self, __object: T, no_push_to: bool = False) -> Optional[T]: - if not no_push_to or True: - for c in self.push_to: - found, found_in = c._find_object(__object, no_push_to=True) - if found is not None: - output("push to", found, __object, color=BColors.RED) - return found, found_in + for e in self._data: + self._map_element(e, no_unmap=True) + def _find_object(self, __object: T, **kwargs) -> Optional[T]: self._remap() for name, value in __object.indexing_values: if value in self._indexed_values[name]: - return self._indexed_values[name][value], self + return self._indexed_values[name][value] - return None, self + return None - def append(self, __object: Optional[T], **kwargs): + def _merge_into_contained_object(self, existing: T, other: T, **kwargs): + """ + This function merges the other object into the existing object, which is contained in the current collection. + This also modifies the correct mapping. + """ + + if existing.id == other.id: + return + + self._map_element(existing) + existing.merge(other, **kwargs) + + def _append_new_object(self, other: T, **kwargs): + """ + This function appends the other object to the current collection. + This only works if not another object, which represents the same real life object exists in the collection. + """ + + self._data.append(other) + self._map_element(other) + + # all of the existing hooks to get the defined datastructure + for collection_attribute, generator in self.extend_object_to_attribute.items(): + other.__getattribute__(collection_attribute).extend(generator, **kwargs) + + for attribute, new_object in self.append_object_to_attribute.items(): + other.__getattribute__(attribute).append(new_object, **kwargs) + + for attribute, a in self.sync_on_append.items(): + # syncing two collections by reference + b = other.__getattribute__(attribute) + if a is b: + continue + + """ + no_sync_collection: Set[Collection] = kwargs.get("no_sync_collection", set()) + if id(b) in no_sync_collection: + continue + """ + object_trace(f"Syncing [{a}] = [{b}]") + + + b_data = b.data.copy() + b_collection_for = b._collection_for.copy() + # no_sync_collection.add(id(b)) + + del b + + for synced_with, key in b_collection_for.items(): + synced_with.__setattr__(key, a) + a._collection_for[synced_with] = key + + a.extend(b_data, **kwargs) + + def append(self, other: Optional[T], **kwargs): """ If an object, that represents the same entity exists in a relevant collection, merge into this object. (and remap) Else append to this collection. - :param __object: + :param other: :return: """ - if __object is None: + if other is None: + return + if other.id in self._indexed_from_id: return - object_trace(f"Appending {__object.option_string} {__object.id} to {self}") + object_trace(f"Appending {other.option_string} to {self}") - existing_object, map_to = self._find_object(__object, no_push_to=kwargs.get("no_push_to", False)) + push_to: Optional[Tuple[Collection, T]] = None + for c in self.push_to: + r = c._find_object(other) + if r is not None: + push_to_collection = (c, r) + output("found push to", found, other, self, color=BColors.RED, sep="\t") + break - if map_to is self: - for other, contained in (c._find_object(__object, no_push_to=True) for c in self.pull_from): - if other is None: - continue + pull_from: Optional[Tuple[Collection, T]] = None + for c in self.pull_from: + r = c._find_object(other) + if r is not None: + pull_from_collection = (c, r) + output("found pull from", found, other, self, color=BColors.RED, sep="\t") + break - output("pull from", other, __object, color=BColors.RED) - __object.__merge__(other, no_push_to=False, **kwargs) - contained.remove(other) + if pull_from is not None: + pull_from[0].remove(pull_from[1]) + existing_object = self._find_object(other, no_push_to=kwargs.get("no_push_to", False)) + if existing_object is None: - # append - self._data.append(__object) - self._map_element(__object) + if push_to is None: + self._append_new_object(other, **kwargs) + else: + push_to[0]._merge_into_contained_object(push_to[1], other, **kwargs) - for collection_attribute, child_collection in self.extend_object_to_attribute.items(): - __object.__getattribute__(collection_attribute).extend(child_collection, **kwargs) + if pull_from is not None: + self._merge_into_contained_object(other if push_to is None else push_to[1], pull_from[1], **kwargs) + else: + self._merge_into_contained_object(existing_object, other, **kwargs) + if pull_from is not None: + self._merge_into_contained_object(existing_object, pull_from[1], **kwargs) + + def remove(self, *other_list: List[T], silent: bool = False): + for other in other_list: + existing: Optional[T] = self._indexed_values["id"].get(other.id, None) + if existing is None: + if not silent: + raise ValueError(f"Object {other} not found in {self}") + return other + + for collection_attribute, generator in self.extend_object_to_attribute.items(): + other.__getattribute__(collection_attribute).remove(*generator, silent=silent, **kwargs) for attribute, new_object in self.append_object_to_attribute.items(): - __object.__getattribute__(attribute).append(new_object, **kwargs) + other.__getattribute__(attribute).remove(new_object, silent=silent, **kwargs) - # only modify collections if the object actually has been appended - for attribute, a in self.sync_on_append.items(): - b = __object.__getattribute__(attribute) - if a is b: - continue + self._data.remove(existing) + self._unmap_element(existing) - no_sync_collection: Set[Collection] = kwargs.get("no_sync_collection", set()) - object_trace(f"Syncing [{a}] = [{b}]; {no_sync_collection}") - if id(b) in no_sync_collection: - continue + def contains(self, other: T) -> bool: + return self._find_object(other) is not None - - b_data = b.data.copy() - b_collection_for = b._collection_for.copy() - no_sync_collection.add(id(b)) - # kwargs["no_sync_collection"] = no_sync_collection - del b - - a.extend(b_data, **kwargs) - - for synced_with, key in b_collection_for.items(): - synced_with.__setattr__(key, a) - a._collection_for[synced_with] = key - - else: - # merge only if the two objects are not the same - if existing_object.id == __object.id: - return - - existing_object.merge(__object, **kwargs) - map_to._map_element(existing_object) - - def remove(self, __object: T) -> T: - self._data.remove(__object) - self._unmap_element(__object) - return __object - - def contains(self, __object: T) -> bool: - return self._find_object(__object) is not None - - def extend(self, __iterable: Optional[Generator[T, None, None]], **kwargs): - if __iterable is None: + def extend(self, other_collections: Optional[Generator[T, None, None]], **kwargs): + if other_collections is None: return - for __object in __iterable: + for __object in other_collections: self.append(__object, **kwargs) @property @@ -196,8 +240,8 @@ class Collection(Generic[T]): def __iter__(self) -> Iterator[T]: yield from self._data - def __merge__(self, __other: Collection, **kwargs): - self.extend(__other, **kwargs) + def __merge__(self, other: Collection, **kwargs): + self.extend(other, **kwargs) def __getitem__(self, item: int): return self._data[item] diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 15aed6b..ac04e34 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -11,7 +11,7 @@ import inspect from .metadata import Metadata from ..utils import get_unix_time, object_trace, generate_id from ..utils.config import logging_settings, main_settings -from ..utils.shared import HIGHEST_ID +from ..utils.shared import HIGHEST_ID, DEBUG_PRINT_ID from ..utils.hacking import MetaClass LOGGER = logging_settings["object_logger"] @@ -113,7 +113,7 @@ class OuterProxy: self._inner: InnerData = InnerData(type(self), **kwargs) self._inner._refers_to_instances.add(self) - object_trace(f"creating {type(self).__name__} [{self.title_string}]") + object_trace(f"creating {type(self).__name__} [{self.option_string}]") self.__init_collections__() @@ -192,7 +192,7 @@ class OuterProxy: if len(b._inner._refers_to_instances) > len(a._inner._refers_to_instances): a, b = b, a - object_trace(f"merging {type(a).__name__} [{a.title_string} | {a.id}] with {type(b).__name__} [{b.title_string} | {b.id}]") + object_trace(f"merging {a.option_string} | {b.option_string}") old_inner = b._inner @@ -282,7 +282,7 @@ class OuterProxy: TITEL = "id" @property def title_string(self) -> str: - return str(self.__getattribute__(self.TITEL)) + return str(self.__getattribute__(self.TITEL)) + (f" {self.id}" if DEBUG_PRINT_ID else "") def __repr__(self): return f"{type(self).__name__}({self.title_string})" diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 59c67a4..2113fcc 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -22,6 +22,7 @@ from .parents import OuterProxy, P from .source import Source, SourceCollection from .target import Target from .country import Language, Country +from ..utils.shared import DEBUG_PRINT_ID from ..utils.string_processing import unify from .parents import OuterProxy as Base @@ -44,7 +45,7 @@ def get_collection_string( ignore_titles: Set[str] = None, background: BColors = OPTION_BACKGROUND, foreground: BColors = OPTION_FOREGROUND, - add_id: bool = True, + add_id: bool = DEBUG_PRINT_ID, ) -> str: if collection.empty: return "" @@ -203,7 +204,7 @@ class Song(Base): @property def option_string(self) -> str: - r = OPTION_FOREGROUND.value + self.title + BColors.ENDC.value + OPTION_BACKGROUND.value + r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value r += get_collection_string(self.album_collection, " from {}", ignore_titles={self.title}) r += get_collection_string(self.main_artist_collection, " by {}") r += get_collection_string(self.feature_artist_collection, " feat. {}") @@ -578,7 +579,7 @@ class Artist(Base): @property def option_string(self) -> str: - r = OPTION_FOREGROUND.value + self.name + BColors.ENDC.value + OPTION_BACKGROUND.value + r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value r += get_collection_string(self.label_collection, " under {}") r += OPTION_BACKGROUND.value diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 401b051..2a5d4a4 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -20,6 +20,7 @@ DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False DEBUG_DUMP = DEBUG and False +DEBUG_PRINT_ID = DEBUG and True if DEBUG: print("DEBUG ACTIVE") From a7711761f93ce3c0cc28c6095066e5af3f9333a0 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Fri, 3 May 2024 14:55:22 +0200 Subject: [PATCH 22/32] dfa --- music_kraken/objects/collection.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index a257226..d08415e 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -84,6 +84,7 @@ class Collection(Generic[T]): for e in self._data: self._map_element(e, no_unmap=True) + def _find_object(self, __object: T, **kwargs) -> Optional[T]: self._remap() @@ -216,8 +217,8 @@ class Collection(Generic[T]): self._data.remove(existing) self._unmap_element(existing) - def contains(self, other: T) -> bool: - return self._find_object(other) is not None + def contains(self, __object: T) -> bool: + return self._find_object(__object) is not None def extend(self, other_collections: Optional[Generator[T, None, None]], **kwargs): if other_collections is None: From d9105fb55a6795291035138740e4e7766d79491f Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 6 May 2024 10:31:21 +0200 Subject: [PATCH 23/32] fix: some bug --- development/objects_collection.py | 3 --- music_kraken/objects/collection.py | 31 +++++++++++------------------- music_kraken/utils/shared.py | 2 +- 3 files changed, 12 insertions(+), 24 deletions(-) diff --git a/development/objects_collection.py b/development/objects_collection.py index d8d8f47..893e2c5 100644 --- a/development/objects_collection.py +++ b/development/objects_collection.py @@ -4,9 +4,6 @@ from music_kraken.objects import Song, Album, Artist, Collection if __name__ == "__main__": song_1 = Song( title="song", - main_artist_list=[Artist( - name="main_artist" - )], feature_artist_list=[Artist( name="main_artist" )] diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index d08415e..aa83e9e 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -166,41 +166,30 @@ class Collection(Generic[T]): object_trace(f"Appending {other.option_string} to {self}") - push_to: Optional[Tuple[Collection, T]] = None for c in self.push_to: r = c._find_object(other) if r is not None: - push_to_collection = (c, r) - output("found push to", found, other, self, color=BColors.RED, sep="\t") - break + output("found push to", r, other, self, color=BColors.RED, sep="\t") + return c.append(other, **kwargs) + pull_from: Optional[Tuple[Collection, T]] = None for c in self.pull_from: r = c._find_object(other) if r is not None: - pull_from_collection = (c, r) - output("found pull from", found, other, self, color=BColors.RED, sep="\t") + output("found pull from", r, other, self, color=BColors.RED, sep="\t") + other.merge(r, **kwargs) + c.remove(r, **kwargs) break - if pull_from is not None: - pull_from[0].remove(pull_from[1]) - existing_object = self._find_object(other, no_push_to=kwargs.get("no_push_to", False)) if existing_object is None: - if push_to is None: - self._append_new_object(other, **kwargs) - else: - push_to[0]._merge_into_contained_object(push_to[1], other, **kwargs) - - if pull_from is not None: - self._merge_into_contained_object(other if push_to is None else push_to[1], pull_from[1], **kwargs) + self._append_new_object(other, **kwargs) else: - self._merge_into_contained_object(existing_object, other, **kwargs) - if pull_from is not None: - self._merge_into_contained_object(existing_object, pull_from[1], **kwargs) + existing_object.merge(other, **kwargs) - def remove(self, *other_list: List[T], silent: bool = False): + def remove(self, *other_list: List[T], silent: bool = False, **kwargs): for other in other_list: existing: Optional[T] = self._indexed_values["id"].get(other.id, None) if existing is None: @@ -208,11 +197,13 @@ class Collection(Generic[T]): raise ValueError(f"Object {other} not found in {self}") return other + """ for collection_attribute, generator in self.extend_object_to_attribute.items(): other.__getattribute__(collection_attribute).remove(*generator, silent=silent, **kwargs) for attribute, new_object in self.append_object_to_attribute.items(): other.__getattribute__(attribute).remove(new_object, silent=silent, **kwargs) + """ self._data.remove(existing) self._unmap_element(existing) diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 2a5d4a4..8f671f9 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True DEBUG_LOGGING = DEBUG and False DEBUG_TRACE = DEBUG and True -DEBUG_OBJECT_TRACE = DEBUG and True +DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False From 7e5a1f84ae0458c8c36a7c342568e3163e0bd996 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 6 May 2024 12:40:06 +0200 Subject: [PATCH 24/32] feat: improved the youtube music album fetching --- .vscode/settings.json | 1 + music_kraken/objects/collection.py | 37 ++++-------- music_kraken/objects/song.py | 2 +- .../pages/youtube_music/_list_render.py | 19 ++---- .../youtube_music/_music_object_render.py | 10 +++- .../pages/youtube_music/youtube_music.py | 59 ++++++++++++++++--- music_kraken/utils/shared.py | 2 +- 7 files changed, 76 insertions(+), 54 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index fa0b6f7..f49d1d9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -29,6 +29,7 @@ "pathvalidate", "Referer", "sponsorblock", + "tracklist", "tracksort", "translit", "unmap", diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index aa83e9e..e57cb45 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -93,18 +93,6 @@ class Collection(Generic[T]): return self._indexed_values[name][value] return None - - def _merge_into_contained_object(self, existing: T, other: T, **kwargs): - """ - This function merges the other object into the existing object, which is contained in the current collection. - This also modifies the correct mapping. - """ - - if existing.id == other.id: - return - - self._map_element(existing) - existing.merge(other, **kwargs) def _append_new_object(self, other: T, **kwargs): """ @@ -113,7 +101,6 @@ class Collection(Generic[T]): """ self._data.append(other) - self._map_element(other) # all of the existing hooks to get the defined datastructure for collection_attribute, generator in self.extend_object_to_attribute.items(): @@ -128,17 +115,10 @@ class Collection(Generic[T]): if a is b: continue - """ - no_sync_collection: Set[Collection] = kwargs.get("no_sync_collection", set()) - if id(b) in no_sync_collection: - continue - """ object_trace(f"Syncing [{a}] = [{b}]") - b_data = b.data.copy() b_collection_for = b._collection_for.copy() - # no_sync_collection.add(id(b)) del b @@ -166,6 +146,7 @@ class Collection(Generic[T]): object_trace(f"Appending {other.option_string} to {self}") + # switching collection in the case of push to for c in self.push_to: r = c._find_object(other) if r is not None: @@ -173,25 +154,24 @@ class Collection(Generic[T]): return c.append(other, **kwargs) - pull_from: Optional[Tuple[Collection, T]] = None for c in self.pull_from: r = c._find_object(other) if r is not None: output("found pull from", r, other, self, color=BColors.RED, sep="\t") other.merge(r, **kwargs) - c.remove(r, **kwargs) + c.remove(r, existing=r, **kwargs) break - existing_object = self._find_object(other, no_push_to=kwargs.get("no_push_to", False)) + existing_object = self._find_object(other) if existing_object is None: self._append_new_object(other, **kwargs) else: existing_object.merge(other, **kwargs) - def remove(self, *other_list: List[T], silent: bool = False, **kwargs): + def remove(self, *other_list: List[T], silent: bool = False, existing: Optional[T] = None, **kwargs): for other in other_list: - existing: Optional[T] = self._indexed_values["id"].get(other.id, None) + existing: Optional[T] = existing or self._indexed_values["id"].get(other.id, None) if existing is None: if not silent: raise ValueError(f"Object {other} not found in {self}") @@ -233,6 +213,7 @@ class Collection(Generic[T]): yield from self._data def __merge__(self, other: Collection, **kwargs): + object_trace(f"merging {str(self)} | {str(other)}") self.extend(other, **kwargs) def __getitem__(self, item: int): @@ -242,3 +223,9 @@ class Collection(Generic[T]): if item >= len(self._data): return default return self._data[item] + + def __eq__(self, other: Collection) -> bool: + if self.empty and other.empty: + return True + + return self._data == other._data diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 2113fcc..1528887 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -156,7 +156,7 @@ class Song(Base): return if isinstance(object_list, Artist): - self.main_artist_collection.extend(object_list) + self.feature_artist_collection.extend(object_list) return if isinstance(object_list, Album): diff --git a/music_kraken/pages/youtube_music/_list_render.py b/music_kraken/pages/youtube_music/_list_render.py index a820014..bb6f40b 100644 --- a/music_kraken/pages/youtube_music/_list_render.py +++ b/music_kraken/pages/youtube_music/_list_render.py @@ -25,7 +25,6 @@ def music_card_shelf_renderer(renderer: dict) -> List[DatabaseObject]: results.extend(parse_renderer(sub_renderer)) return results - def music_responsive_list_item_flex_column_renderer(renderer: dict) -> List[DatabaseObject]: return parse_run_list(renderer.get("text", {}).get("runs", [])) @@ -54,21 +53,11 @@ def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]: for result in results: _map[type(result)].append(result) - for song in song_list: - song.album_collection.extend(album_list) + if len(song_list) == 1: + song = song_list[0] song.feature_artist_collection.extend(artist_list) - - if len(song_list) > 0: - return song_list - - for album in album_list: - album.artist_collection.extend(artist_list) - - if len(album_list) > 0: - return album_list - - if len(artist_list) > 0: - return artist_list + song.album_collection.extend(album_list) + return [song] return results diff --git a/music_kraken/pages/youtube_music/_music_object_render.py b/music_kraken/pages/youtube_music/_music_object_render.py index f10d11a..831d50d 100644 --- a/music_kraken/pages/youtube_music/_music_object_render.py +++ b/music_kraken/pages/youtube_music/_music_object_render.py @@ -40,7 +40,7 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]: _temp_nav = run_element.get("navigationEndpoint", {}) is_video = "watchEndpoint" in _temp_nav - navigation_endpoint = _temp_nav.get("watchEndpoint" if is_video else "browseEndpoint", {}) + navigation_endpoint = _temp_nav.get("watchEndpoint", _temp_nav.get("browseEndpoint", {})) element_type = PageType.SONG page_type_string = navigation_endpoint.get("watchEndpointMusicSupportedConfigs", {}).get("watchEndpointMusicConfig", {}).get("musicVideoType", "") @@ -51,7 +51,7 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]: except ValueError: return - element_id = navigation_endpoint.get("videoId" if is_video else "browseId") + element_id = navigation_endpoint.get("videoId", navigation_endpoint.get("browseId")) element_text = run_element.get("text") if element_id is None or element_text is None: @@ -60,7 +60,11 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]: if element_type == PageType.SONG or (element_type == PageType.VIDEO and not youtube_settings["youtube_music_clean_data"]) or (element_type == PageType.OFFICIAL_MUSIC_VIDEO and not youtube_settings["youtube_music_clean_data"]): source = Source(SOURCE_PAGE, f"https://music.youtube.com/watch?v={element_id}") - return Song(title=clean_song_title(element_text), source_list=[source]) + + return Song( + title=clean_song_title(element_text), + source_list=[source] + ) if element_type == PageType.ARTIST or (element_type == PageType.CHANNEL and not youtube_settings["youtube_music_clean_data"]): source = Source(SOURCE_PAGE, f"https://music.youtube.com/channel/{element_id}") diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 46581f3..9c37ad2 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -8,6 +8,7 @@ import json from dataclasses import dataclass import re from functools import lru_cache +from collections import defaultdict import youtube_dl from youtube_dl.extractor.youtube import YoutubeIE @@ -33,9 +34,11 @@ from ...objects import ( Target ) from ...connection import Connection +from ...utils.enums.album import AlbumType from ...utils.support_classes.download_result import DownloadResult from ._list_render import parse_renderer +from ._music_object_render import parse_run_element from .super_youtube import SuperYouTube @@ -162,6 +165,12 @@ class MusicKrakenYoutubeIE(YoutubeIE): +ALBUM_TYPE_MAP = { + "Single": AlbumType.SINGLE, + "Album": AlbumType.STUDIO_ALBUM, + "EP": AlbumType.EP, +} + class YoutubeMusic(SuperYouTube): # CHANGE @@ -465,6 +474,46 @@ class YoutubeMusic(SuperYouTube): if DEBUG: dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False) + data = r.json() + + # album details + header = data.get("header", {}) + musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {}) + + title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) + subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) + + if len(title_runs) > 0: + album.title = title_runs[0].get("text", album.title) + + def other_parse_run(run: dict) -> str: + nonlocal album + + if "text" not in run: + return + text = run["text"] + + is_text_field = len(run.keys()) == 1 + + # regex that text is a year + if is_text_field and re.match(r"\d{4}", text): + album.date = ID3Timestamp.strptime(text, "%Y") + return + + if text in ALBUM_TYPE_MAP: + album.album_type = ALBUM_TYPE_MAP[text] + return + + if not is_text_field: + r = parse_run_element(run) + if r is not None: + album.add_list_of_other_objects([r]) + return + + for _run in subtitle_runs: + other_parse_run(_run) + + # tracklist renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[ 0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) @@ -472,17 +521,9 @@ class YoutubeMusic(SuperYouTube): for i, content in enumerate(renderer_list): dump_to_file(f"{i}-album-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False) - results = [] - - """ - cant use fixed indices, because if something has no entries, the list dissappears - instead I have to try parse everything, and just reject community playlists and profiles. - """ for renderer in renderer_list: - results.extend(parse_renderer(renderer)) - - album.add_list_of_other_objects(results) + album.add_list_of_other_objects(parse_renderer(renderer)) return album diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 8f671f9..b75cf7f 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -19,7 +19,7 @@ DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False -DEBUG_DUMP = DEBUG and False +DEBUG_DUMP = DEBUG and True DEBUG_PRINT_ID = DEBUG and True if DEBUG: From be7e91cb7be40b41689f0ec90d22edbc993ae335 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 6 May 2024 12:44:15 +0200 Subject: [PATCH 25/32] feat: improved the youtube music album fetching --- music_kraken/pages/youtube_music/youtube_music.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 9c37ad2..1265eeb 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -430,6 +430,18 @@ class YoutubeMusic(SuperYouTube): if DEBUG: dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False) + # artist details + header = data.get("header", {}) + musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {}) + + title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) + subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) + + if len(title_runs) > 0: + artist.name = title_runs[0].get("text", artist.name) + + + # fetch discography renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[ 0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) From 90d685da816daa8e207491b03d3628ca94d9be6d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 6 May 2024 12:53:06 +0200 Subject: [PATCH 26/32] feat: implemented correct merging of artists --- music_kraken/pages/youtube_music/youtube_music.py | 3 ++- music_kraken/utils/shared.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 1265eeb..61fe534 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -410,7 +410,7 @@ class YoutubeMusic(SuperYouTube): return results def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: - artist = Artist() + artist = Artist(source_list=[source]) # construct the request url = urlparse(source.url) @@ -431,6 +431,7 @@ class YoutubeMusic(SuperYouTube): dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False) # artist details + data: dict = r.json() header = data.get("header", {}) musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {}) diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index b75cf7f..5a40396 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True DEBUG_LOGGING = DEBUG and False DEBUG_TRACE = DEBUG and True -DEBUG_OBJECT_TRACE = DEBUG and False +DEBUG_OBJECT_TRACE = DEBUG and True DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False From 5ed902489fd1df15c5dd7571d98cebb50674689c Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 6 May 2024 14:33:03 +0200 Subject: [PATCH 27/32] feat: added additional data --- music_kraken/objects/song.py | 2 +- .../pages/youtube_music/youtube_music.py | 46 +++++++++++++++++-- music_kraken/utils/shared.py | 2 +- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 1528887..9f9ba7e 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -60,7 +60,7 @@ def get_collection_string( def get_element_str(element) -> str: nonlocal add_id r = element.title_string.strip() - if add_id: + if add_id and False: r += " " + str(element.id) return r diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 61fe534..894b972 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -538,6 +538,10 @@ class YoutubeMusic(SuperYouTube): for renderer in renderer_list: album.add_list_of_other_objects(parse_renderer(renderer)) + for song in album.song_collection: + for song_source in song.source_collection: + song_source.additional_data["playlist_id"] = browse_id + return album @@ -556,10 +560,7 @@ class YoutubeMusic(SuperYouTube): uploader = ydl_res.get("uploader", "") if uploader.endswith(" - Topic"): artist_names = [uploader.rstrip(" - Topic")] - """ - elif "artist" in ydl_res: - artist_names = ydl_res.get("artist").split(", ") - """ + artist_list = [ Artist( name=name, @@ -577,7 +578,7 @@ class YoutubeMusic(SuperYouTube): )) artist_name = artist_names[0] if len(artist_names) > 0 else None - return Song( + song = Song( title=ydl_res.get("track", clean_song_title(ydl_res.get("title"), artist_name=artist_name)), note=ydl_res.get("descriptions"), album_list=album_list, @@ -590,6 +591,41 @@ class YoutubeMusic(SuperYouTube): ), source], ) + # other song details + parsed_url = urlparse(source.url) + browse_id = parse_qs(parsed_url.query)['v'][0] + request_data = { + "captionParams": {}, + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}, + "videoId": browse_id, + } + if "playlist_id" in source.additional_data: + request_data["playlistId"] = source.additional_data["playlist_id"] + + initial_details = self.yt_music_connection.post( + url=get_youtube_url(path="/youtubei/v1/player", query=f"prettyPrint=false"), + json=request_data, + name=f"fetch_album_{browse_id}.json", + ) + + if initial_details is None: + return song + + dump_to_file(f"fetch_song_{browse_id}.json", initial_details.text, is_json=True, exit_after_dump=False) + + data = initial_details.json() + video_details = data.get("videoDetails", {}) + + browse_id = video_details.get("videoId", browse_id) + song.title = video_details.get("title", song.title) + if video_details.get("isLiveContent", False): + for album in song.album_list: + album.album_type = AlbumType.LIVE_ALBUM + for thumbnail in video_details.get("thumbnails", []): + song.artwork.append(**thumbnail) + + return song + def fetch_media_url(self, source: Source, ydl_res: dict = None) -> dict: def _get_best_format(format_list: List[Dict]) -> dict: diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 5a40396..b75cf7f 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True DEBUG_LOGGING = DEBUG and False DEBUG_TRACE = DEBUG and True -DEBUG_OBJECT_TRACE = DEBUG and True +DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False From ed8cc914bec7456eb2a9fd8133af25ce20d15fce Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 6 May 2024 16:27:49 +0200 Subject: [PATCH 28/32] feat: lyrics for youtube music --- .vscode/settings.json | 3 +- .../pages/youtube_music/youtube_music.py | 61 ++++++++++++++++++- music_kraken/utils/__init__.py | 33 +++++++++- 3 files changed, 92 insertions(+), 5 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index f49d1d9..64b7f98 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -33,6 +33,7 @@ "tracksort", "translit", "unmap", - "youtube" + "youtube", + "youtubei" ] } \ No newline at end of file diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 894b972..bbb8d22 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -18,7 +18,7 @@ from ...utils.exception.config import SettingValueError from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING from ...utils.string_processing import clean_song_title -from ...utils import get_current_millis +from ...utils import get_current_millis, traverse_json_path from ...utils import dump_to_file @@ -31,7 +31,9 @@ from ...objects import ( Song, Album, Label, - Target + Target, + Lyrics, + FormattedText ) from ...connection import Connection from ...utils.enums.album import AlbumType @@ -544,6 +546,57 @@ class YoutubeMusic(SuperYouTube): return album + def fetch_lyrics(self, video_id: str, playlist_id: str = None) -> str: + request_data = { + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}, + "videoId": video_id, + } + if playlist_id is not None: + request_data["playlistId"] = playlist_id + + tab_request = self.yt_music_connection.post( + url=get_youtube_url(path="/youtubei/v1/next", query=f"prettyPrint=false"), + json=request_data, + name=f"fetch_song_tabs_{video_id}.json", + ) + + if tab_request is None: + return None + + dump_to_file(f"fetch_song_tabs_{video_id}.json", tab_request.text, is_json=True, exit_after_dump=False) + + tab_data: dict = tab_request.json() + + tabs = traverse_json_path(tab_data, "contents.singleColumnMusicWatchNextResultsRenderer.tabbedRenderer.watchNextTabbedResultsRenderer.tabs", default=[]) + browse_id = None + for tab in tabs: + pageType = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType", default="") + if pageType in ("MUSIC_TAB_TYPE_LYRICS", "MUSIC_PAGE_TYPE_TRACK_LYRICS") or "lyrics" in pageType.lower(): + browse_id = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseId", default=None) + break + + if browse_id is None: + return None + + + r = self.yt_music_connection.post( + url=get_youtube_url(path="/youtubei/v1/browse", query=f"prettyPrint=false"), + json={ + "browseId": browse_id, + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}} + }, + name=f"fetch_song_lyrics_{video_id}.json" + ) + + dump_to_file(f"fetch_song_lyrics_{video_id}.json", r.text, is_json=True, exit_after_dump=False) + + data = r.json() + lyrics_text = traverse_json_path(data, "contents.sectionListRenderer.contents[0].musicDescriptionShelfRenderer.description.runs[0].text", default=None) + if lyrics_text is None: + return None + + return Lyrics(FormattedText(plain=lyrics_text)) + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: ydl_res: dict = {} @@ -605,7 +658,7 @@ class YoutubeMusic(SuperYouTube): initial_details = self.yt_music_connection.post( url=get_youtube_url(path="/youtubei/v1/player", query=f"prettyPrint=false"), json=request_data, - name=f"fetch_album_{browse_id}.json", + name=f"fetch_song_{browse_id}.json", ) if initial_details is None: @@ -624,6 +677,8 @@ class YoutubeMusic(SuperYouTube): for thumbnail in video_details.get("thumbnails", []): song.artwork.append(**thumbnail) + song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId"))) + return song diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index 981eb7e..e85fa1a 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -3,7 +3,7 @@ from pathlib import Path import json import logging import inspect -from typing import List +from typing import List, Union from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK from .config import config, read_config, write_config @@ -77,6 +77,37 @@ def object_trace(obj): misc functions """ +def traverse_json_path(data, path: Union[str, List[str]], default=None): + """ + Path parts are concatenated with . or wrapped with [""] for object keys and wrapped in [] for array indices. + """ + + if isinstance(path, str): + path = path.replace('["', '.').replace('"]', '.').replace("[", ".").replace("]", ".") + path = [p for p in path.split(".") if len(p) > 0] + + if len(path) <= 0: + return data + + current = path[0] + path = path[1:] + + new_data = None + + if isinstance(data, dict): + new_data = data.get(current) + + elif isinstance(data, list): + try: + new_data = data[int(current)] + except (IndexError, ValueError): + pass + + if new_data is None: + return default + + return traverse_json_path(data=new_data, path=path, default=default) + _auto_increment = 0 def generate_id() -> int: global _auto_increment From 131be537c88151505f97595c42e37798c1b07a26 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 6 May 2024 17:39:53 +0200 Subject: [PATCH 29/32] fix: actually merging --- music_kraken/objects/collection.py | 22 ++++++++++++---------- music_kraken/objects/parents.py | 10 ++++++++-- music_kraken/utils/shared.py | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index e57cb45..b8b2d4a 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -88,6 +88,9 @@ class Collection(Generic[T]): def _find_object(self, __object: T, **kwargs) -> Optional[T]: self._remap() + if __object.id in self._indexed_from_id: + return self._indexed_values["id"][__object.id] + for name, value in __object.indexing_values: if value in self._indexed_values[name]: return self._indexed_values[name][value] @@ -138,7 +141,6 @@ class Collection(Generic[T]): :return: """ - if other is None: return if other.id in self._indexed_from_id: @@ -146,13 +148,6 @@ class Collection(Generic[T]): object_trace(f"Appending {other.option_string} to {self}") - # switching collection in the case of push to - for c in self.push_to: - r = c._find_object(other) - if r is not None: - output("found push to", r, other, self, color=BColors.RED, sep="\t") - return c.append(other, **kwargs) - for c in self.pull_from: r = c._find_object(other) @@ -163,6 +158,13 @@ class Collection(Generic[T]): break existing_object = self._find_object(other) + + # switching collection in the case of push to + for c in self.push_to: + r = c._find_object(other) + if r is not None: + output("found push to", r, other, self, color=BColors.RED, sep="\t") + return c.append(other, **kwargs) if existing_object is None: self._append_new_object(other, **kwargs) @@ -195,8 +197,8 @@ class Collection(Generic[T]): if other_collections is None: return - for __object in other_collections: - self.append(__object, **kwargs) + for other_object in other_collections: + self.append(other_object, **kwargs) @property def data(self) -> List[T]: diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index ac04e34..a79887a 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -60,6 +60,13 @@ class InnerData: self._fetched_from.update(__other._fetched_from) for key, value in __other.__dict__.copy().items(): + if key.startswith("_"): + continue + + if hasattr(value, "__is_collection__") and key in self.__dict__: + self.__getattribute__(key).__merge__(value, **kwargs) + continue + # just set the other value if self doesn't already have it if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)): self.__setattr__(key, value) @@ -67,9 +74,8 @@ class InnerData: # if the object of value implemented __merge__, it merges existing = self.__getattribute__(key) - if hasattr(type(existing), "__merge__"): + if hasattr(existing, "__merge__"): existing.__merge__(value, **kwargs) - continue class OuterProxy: diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index b75cf7f..5a40396 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True DEBUG_LOGGING = DEBUG and False DEBUG_TRACE = DEBUG and True -DEBUG_OBJECT_TRACE = DEBUG and False +DEBUG_OBJECT_TRACE = DEBUG and True DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False From 542d59562a8c8bcf6c55920b8ecb1d1fc231c805 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 6 May 2024 18:35:25 +0200 Subject: [PATCH 30/32] fix: removed redundand code --- music_kraken/pages/musify.py | 7 ------- music_kraken/utils/shared.py | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 4646385..5f1b7aa 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -690,13 +690,6 @@ class Musify(Page): new_song = self._parse_song_card(card_soup) album.song_collection.append(new_song) - if stop_at_level > 1: - song: Song - for song in album.song_collection: - sources = song.source_collection.get_sources(self.SOURCE_TYPE) - for source in sources: - song.merge(self.fetch_song(source=source)) - album.update_tracksort() return album diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 5a40396..b75cf7f 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True DEBUG_LOGGING = DEBUG and False DEBUG_TRACE = DEBUG and True -DEBUG_OBJECT_TRACE = DEBUG and True +DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False From 6805d1cbe62203522930f5d3976e90edf9948e06 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 6 May 2024 18:40:21 +0200 Subject: [PATCH 31/32] feat: allowed to append none to source collection --- development/actual_donwload.py | 4 ++-- music_kraken/objects/source.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index 2c9a174..c821734 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,8 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Crystal F", - "d: 20" + "s: #a Psychonaut 4", + "d: 0" ] diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index b7e483a..ff68d6a 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -122,6 +122,9 @@ class SourceCollection: yield from self._page_to_source_list[page] def append(self, source: Source): + if source is None: + return + existing_source = None for key in source.indexing_values: if key in self._indexed_sources: From 1b22c80e5c0a225454890400eb23667f60c0ee01 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 6 May 2024 18:48:13 +0200 Subject: [PATCH 32/32] fix: removing the possibility or file names containing / --- music_kraken/audio/metadata.py | 2 +- music_kraken/utils/shared.py | 2 +- music_kraken/utils/string_processing.py | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/music_kraken/audio/metadata.py b/music_kraken/audio/metadata.py index 1d37419..1431112 100644 --- a/music_kraken/audio/metadata.py +++ b/music_kraken/audio/metadata.py @@ -79,7 +79,7 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): with temp_target.open("wb") as f: f.write(r.content) - converted_target: Target = Target.temp(name=f"{song.title}.jpeg") + converted_target: Target = Target.temp(name=f"{song.title.replace('/', '_')}") with Image.open(temp_target.file_path) as img: # crop the image if it isn't square in the middle with minimum data loss width, height = img.size diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index b75cf7f..8f671f9 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -19,7 +19,7 @@ DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False -DEBUG_DUMP = DEBUG and True +DEBUG_DUMP = DEBUG and False DEBUG_PRINT_ID = DEBUG and True if DEBUG: diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index 1e46a58..22ae63e 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -52,7 +52,14 @@ def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Uni string = string[1:] string = string.replace("/", "_").replace("\\", "_") + + try: + string = translit(string, reversed=True) + except LanguageDetectionError: + pass + string = sanitize_filename(string) + return string if isinstance(string, Path):