From 7e5a1f84ae0458c8c36a7c342568e3163e0bd996 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 6 May 2024 12:40:06 +0200 Subject: [PATCH] feat: improved the youtube music album fetching --- .vscode/settings.json | 1 + music_kraken/objects/collection.py | 37 ++++-------- music_kraken/objects/song.py | 2 +- .../pages/youtube_music/_list_render.py | 19 ++---- .../youtube_music/_music_object_render.py | 10 +++- .../pages/youtube_music/youtube_music.py | 59 ++++++++++++++++--- music_kraken/utils/shared.py | 2 +- 7 files changed, 76 insertions(+), 54 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index fa0b6f7..f49d1d9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -29,6 +29,7 @@ "pathvalidate", "Referer", "sponsorblock", + "tracklist", "tracksort", "translit", "unmap", diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index aa83e9e..e57cb45 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -93,18 +93,6 @@ class Collection(Generic[T]): return self._indexed_values[name][value] return None - - def _merge_into_contained_object(self, existing: T, other: T, **kwargs): - """ - This function merges the other object into the existing object, which is contained in the current collection. - This also modifies the correct mapping. - """ - - if existing.id == other.id: - return - - self._map_element(existing) - existing.merge(other, **kwargs) def _append_new_object(self, other: T, **kwargs): """ @@ -113,7 +101,6 @@ class Collection(Generic[T]): """ self._data.append(other) - self._map_element(other) # all of the existing hooks to get the defined datastructure for collection_attribute, generator in self.extend_object_to_attribute.items(): @@ -128,17 +115,10 @@ class Collection(Generic[T]): if a is b: continue - """ - no_sync_collection: Set[Collection] = kwargs.get("no_sync_collection", set()) - if id(b) in no_sync_collection: - continue - """ object_trace(f"Syncing [{a}] = [{b}]") - b_data = b.data.copy() b_collection_for = b._collection_for.copy() - # no_sync_collection.add(id(b)) del b @@ -166,6 +146,7 @@ class Collection(Generic[T]): object_trace(f"Appending {other.option_string} to {self}") + # switching collection in the case of push to for c in self.push_to: r = c._find_object(other) if r is not None: @@ -173,25 +154,24 @@ class Collection(Generic[T]): return c.append(other, **kwargs) - pull_from: Optional[Tuple[Collection, T]] = None for c in self.pull_from: r = c._find_object(other) if r is not None: output("found pull from", r, other, self, color=BColors.RED, sep="\t") other.merge(r, **kwargs) - c.remove(r, **kwargs) + c.remove(r, existing=r, **kwargs) break - existing_object = self._find_object(other, no_push_to=kwargs.get("no_push_to", False)) + existing_object = self._find_object(other) if existing_object is None: self._append_new_object(other, **kwargs) else: existing_object.merge(other, **kwargs) - def remove(self, *other_list: List[T], silent: bool = False, **kwargs): + def remove(self, *other_list: List[T], silent: bool = False, existing: Optional[T] = None, **kwargs): for other in other_list: - existing: Optional[T] = self._indexed_values["id"].get(other.id, None) + existing: Optional[T] = existing or self._indexed_values["id"].get(other.id, None) if existing is None: if not silent: raise ValueError(f"Object {other} not found in {self}") @@ -233,6 +213,7 @@ class Collection(Generic[T]): yield from self._data def __merge__(self, other: Collection, **kwargs): + object_trace(f"merging {str(self)} | {str(other)}") self.extend(other, **kwargs) def __getitem__(self, item: int): @@ -242,3 +223,9 @@ class Collection(Generic[T]): if item >= len(self._data): return default return self._data[item] + + def __eq__(self, other: Collection) -> bool: + if self.empty and other.empty: + return True + + return self._data == other._data diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 2113fcc..1528887 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -156,7 +156,7 @@ class Song(Base): return if isinstance(object_list, Artist): - self.main_artist_collection.extend(object_list) + self.feature_artist_collection.extend(object_list) return if isinstance(object_list, Album): diff --git a/music_kraken/pages/youtube_music/_list_render.py b/music_kraken/pages/youtube_music/_list_render.py index a820014..bb6f40b 100644 --- a/music_kraken/pages/youtube_music/_list_render.py +++ b/music_kraken/pages/youtube_music/_list_render.py @@ -25,7 +25,6 @@ def music_card_shelf_renderer(renderer: dict) -> List[DatabaseObject]: results.extend(parse_renderer(sub_renderer)) return results - def music_responsive_list_item_flex_column_renderer(renderer: dict) -> List[DatabaseObject]: return parse_run_list(renderer.get("text", {}).get("runs", [])) @@ -54,21 +53,11 @@ def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]: for result in results: _map[type(result)].append(result) - for song in song_list: - song.album_collection.extend(album_list) + if len(song_list) == 1: + song = song_list[0] song.feature_artist_collection.extend(artist_list) - - if len(song_list) > 0: - return song_list - - for album in album_list: - album.artist_collection.extend(artist_list) - - if len(album_list) > 0: - return album_list - - if len(artist_list) > 0: - return artist_list + song.album_collection.extend(album_list) + return [song] return results diff --git a/music_kraken/pages/youtube_music/_music_object_render.py b/music_kraken/pages/youtube_music/_music_object_render.py index f10d11a..831d50d 100644 --- a/music_kraken/pages/youtube_music/_music_object_render.py +++ b/music_kraken/pages/youtube_music/_music_object_render.py @@ -40,7 +40,7 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]: _temp_nav = run_element.get("navigationEndpoint", {}) is_video = "watchEndpoint" in _temp_nav - navigation_endpoint = _temp_nav.get("watchEndpoint" if is_video else "browseEndpoint", {}) + navigation_endpoint = _temp_nav.get("watchEndpoint", _temp_nav.get("browseEndpoint", {})) element_type = PageType.SONG page_type_string = navigation_endpoint.get("watchEndpointMusicSupportedConfigs", {}).get("watchEndpointMusicConfig", {}).get("musicVideoType", "") @@ -51,7 +51,7 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]: except ValueError: return - element_id = navigation_endpoint.get("videoId" if is_video else "browseId") + element_id = navigation_endpoint.get("videoId", navigation_endpoint.get("browseId")) element_text = run_element.get("text") if element_id is None or element_text is None: @@ -60,7 +60,11 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]: if element_type == PageType.SONG or (element_type == PageType.VIDEO and not youtube_settings["youtube_music_clean_data"]) or (element_type == PageType.OFFICIAL_MUSIC_VIDEO and not youtube_settings["youtube_music_clean_data"]): source = Source(SOURCE_PAGE, f"https://music.youtube.com/watch?v={element_id}") - return Song(title=clean_song_title(element_text), source_list=[source]) + + return Song( + title=clean_song_title(element_text), + source_list=[source] + ) if element_type == PageType.ARTIST or (element_type == PageType.CHANNEL and not youtube_settings["youtube_music_clean_data"]): source = Source(SOURCE_PAGE, f"https://music.youtube.com/channel/{element_id}") diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 46581f3..9c37ad2 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -8,6 +8,7 @@ import json from dataclasses import dataclass import re from functools import lru_cache +from collections import defaultdict import youtube_dl from youtube_dl.extractor.youtube import YoutubeIE @@ -33,9 +34,11 @@ from ...objects import ( Target ) from ...connection import Connection +from ...utils.enums.album import AlbumType from ...utils.support_classes.download_result import DownloadResult from ._list_render import parse_renderer +from ._music_object_render import parse_run_element from .super_youtube import SuperYouTube @@ -162,6 +165,12 @@ class MusicKrakenYoutubeIE(YoutubeIE): +ALBUM_TYPE_MAP = { + "Single": AlbumType.SINGLE, + "Album": AlbumType.STUDIO_ALBUM, + "EP": AlbumType.EP, +} + class YoutubeMusic(SuperYouTube): # CHANGE @@ -465,6 +474,46 @@ class YoutubeMusic(SuperYouTube): if DEBUG: dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False) + data = r.json() + + # album details + header = data.get("header", {}) + musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {}) + + title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", []) + subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", []) + + if len(title_runs) > 0: + album.title = title_runs[0].get("text", album.title) + + def other_parse_run(run: dict) -> str: + nonlocal album + + if "text" not in run: + return + text = run["text"] + + is_text_field = len(run.keys()) == 1 + + # regex that text is a year + if is_text_field and re.match(r"\d{4}", text): + album.date = ID3Timestamp.strptime(text, "%Y") + return + + if text in ALBUM_TYPE_MAP: + album.album_type = ALBUM_TYPE_MAP[text] + return + + if not is_text_field: + r = parse_run_element(run) + if r is not None: + album.add_list_of_other_objects([r]) + return + + for _run in subtitle_runs: + other_parse_run(_run) + + # tracklist renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[ 0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) @@ -472,17 +521,9 @@ class YoutubeMusic(SuperYouTube): for i, content in enumerate(renderer_list): dump_to_file(f"{i}-album-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False) - results = [] - - """ - cant use fixed indices, because if something has no entries, the list dissappears - instead I have to try parse everything, and just reject community playlists and profiles. - """ for renderer in renderer_list: - results.extend(parse_renderer(renderer)) - - album.add_list_of_other_objects(results) + album.add_list_of_other_objects(parse_renderer(renderer)) return album diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 8f671f9..b75cf7f 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -19,7 +19,7 @@ DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False -DEBUG_DUMP = DEBUG and False +DEBUG_DUMP = DEBUG and True DEBUG_PRINT_ID = DEBUG and True if DEBUG: