feat: improved the youtube music album fetching
This commit is contained in:
parent
d9105fb55a
commit
7e5a1f84ae
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@ -29,6 +29,7 @@
|
||||
"pathvalidate",
|
||||
"Referer",
|
||||
"sponsorblock",
|
||||
"tracklist",
|
||||
"tracksort",
|
||||
"translit",
|
||||
"unmap",
|
||||
|
@ -94,18 +94,6 @@ class Collection(Generic[T]):
|
||||
|
||||
return None
|
||||
|
||||
def _merge_into_contained_object(self, existing: T, other: T, **kwargs):
|
||||
"""
|
||||
This function merges the other object into the existing object, which is contained in the current collection.
|
||||
This also modifies the correct mapping.
|
||||
"""
|
||||
|
||||
if existing.id == other.id:
|
||||
return
|
||||
|
||||
self._map_element(existing)
|
||||
existing.merge(other, **kwargs)
|
||||
|
||||
def _append_new_object(self, other: T, **kwargs):
|
||||
"""
|
||||
This function appends the other object to the current collection.
|
||||
@ -113,7 +101,6 @@ class Collection(Generic[T]):
|
||||
"""
|
||||
|
||||
self._data.append(other)
|
||||
self._map_element(other)
|
||||
|
||||
# all of the existing hooks to get the defined datastructure
|
||||
for collection_attribute, generator in self.extend_object_to_attribute.items():
|
||||
@ -128,17 +115,10 @@ class Collection(Generic[T]):
|
||||
if a is b:
|
||||
continue
|
||||
|
||||
"""
|
||||
no_sync_collection: Set[Collection] = kwargs.get("no_sync_collection", set())
|
||||
if id(b) in no_sync_collection:
|
||||
continue
|
||||
"""
|
||||
object_trace(f"Syncing [{a}] = [{b}]")
|
||||
|
||||
|
||||
b_data = b.data.copy()
|
||||
b_collection_for = b._collection_for.copy()
|
||||
# no_sync_collection.add(id(b))
|
||||
|
||||
del b
|
||||
|
||||
@ -166,6 +146,7 @@ class Collection(Generic[T]):
|
||||
|
||||
object_trace(f"Appending {other.option_string} to {self}")
|
||||
|
||||
# switching collection in the case of push to
|
||||
for c in self.push_to:
|
||||
r = c._find_object(other)
|
||||
if r is not None:
|
||||
@ -173,25 +154,24 @@ class Collection(Generic[T]):
|
||||
return c.append(other, **kwargs)
|
||||
|
||||
|
||||
pull_from: Optional[Tuple[Collection, T]] = None
|
||||
for c in self.pull_from:
|
||||
r = c._find_object(other)
|
||||
if r is not None:
|
||||
output("found pull from", r, other, self, color=BColors.RED, sep="\t")
|
||||
other.merge(r, **kwargs)
|
||||
c.remove(r, **kwargs)
|
||||
c.remove(r, existing=r, **kwargs)
|
||||
break
|
||||
|
||||
existing_object = self._find_object(other, no_push_to=kwargs.get("no_push_to", False))
|
||||
existing_object = self._find_object(other)
|
||||
|
||||
if existing_object is None:
|
||||
self._append_new_object(other, **kwargs)
|
||||
else:
|
||||
existing_object.merge(other, **kwargs)
|
||||
|
||||
def remove(self, *other_list: List[T], silent: bool = False, **kwargs):
|
||||
def remove(self, *other_list: List[T], silent: bool = False, existing: Optional[T] = None, **kwargs):
|
||||
for other in other_list:
|
||||
existing: Optional[T] = self._indexed_values["id"].get(other.id, None)
|
||||
existing: Optional[T] = existing or self._indexed_values["id"].get(other.id, None)
|
||||
if existing is None:
|
||||
if not silent:
|
||||
raise ValueError(f"Object {other} not found in {self}")
|
||||
@ -233,6 +213,7 @@ class Collection(Generic[T]):
|
||||
yield from self._data
|
||||
|
||||
def __merge__(self, other: Collection, **kwargs):
|
||||
object_trace(f"merging {str(self)} | {str(other)}")
|
||||
self.extend(other, **kwargs)
|
||||
|
||||
def __getitem__(self, item: int):
|
||||
@ -242,3 +223,9 @@ class Collection(Generic[T]):
|
||||
if item >= len(self._data):
|
||||
return default
|
||||
return self._data[item]
|
||||
|
||||
def __eq__(self, other: Collection) -> bool:
|
||||
if self.empty and other.empty:
|
||||
return True
|
||||
|
||||
return self._data == other._data
|
||||
|
@ -156,7 +156,7 @@ class Song(Base):
|
||||
return
|
||||
|
||||
if isinstance(object_list, Artist):
|
||||
self.main_artist_collection.extend(object_list)
|
||||
self.feature_artist_collection.extend(object_list)
|
||||
return
|
||||
|
||||
if isinstance(object_list, Album):
|
||||
|
@ -25,7 +25,6 @@ def music_card_shelf_renderer(renderer: dict) -> List[DatabaseObject]:
|
||||
results.extend(parse_renderer(sub_renderer))
|
||||
return results
|
||||
|
||||
|
||||
def music_responsive_list_item_flex_column_renderer(renderer: dict) -> List[DatabaseObject]:
|
||||
return parse_run_list(renderer.get("text", {}).get("runs", []))
|
||||
|
||||
@ -54,21 +53,11 @@ def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]:
|
||||
for result in results:
|
||||
_map[type(result)].append(result)
|
||||
|
||||
for song in song_list:
|
||||
song.album_collection.extend(album_list)
|
||||
if len(song_list) == 1:
|
||||
song = song_list[0]
|
||||
song.feature_artist_collection.extend(artist_list)
|
||||
|
||||
if len(song_list) > 0:
|
||||
return song_list
|
||||
|
||||
for album in album_list:
|
||||
album.artist_collection.extend(artist_list)
|
||||
|
||||
if len(album_list) > 0:
|
||||
return album_list
|
||||
|
||||
if len(artist_list) > 0:
|
||||
return artist_list
|
||||
song.album_collection.extend(album_list)
|
||||
return [song]
|
||||
|
||||
return results
|
||||
|
||||
|
@ -40,7 +40,7 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]:
|
||||
_temp_nav = run_element.get("navigationEndpoint", {})
|
||||
is_video = "watchEndpoint" in _temp_nav
|
||||
|
||||
navigation_endpoint = _temp_nav.get("watchEndpoint" if is_video else "browseEndpoint", {})
|
||||
navigation_endpoint = _temp_nav.get("watchEndpoint", _temp_nav.get("browseEndpoint", {}))
|
||||
|
||||
element_type = PageType.SONG
|
||||
page_type_string = navigation_endpoint.get("watchEndpointMusicSupportedConfigs", {}).get("watchEndpointMusicConfig", {}).get("musicVideoType", "")
|
||||
@ -51,7 +51,7 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]:
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
element_id = navigation_endpoint.get("videoId" if is_video else "browseId")
|
||||
element_id = navigation_endpoint.get("videoId", navigation_endpoint.get("browseId"))
|
||||
element_text = run_element.get("text")
|
||||
|
||||
if element_id is None or element_text is None:
|
||||
@ -60,7 +60,11 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]:
|
||||
|
||||
if element_type == PageType.SONG or (element_type == PageType.VIDEO and not youtube_settings["youtube_music_clean_data"]) or (element_type == PageType.OFFICIAL_MUSIC_VIDEO and not youtube_settings["youtube_music_clean_data"]):
|
||||
source = Source(SOURCE_PAGE, f"https://music.youtube.com/watch?v={element_id}")
|
||||
return Song(title=clean_song_title(element_text), source_list=[source])
|
||||
|
||||
return Song(
|
||||
title=clean_song_title(element_text),
|
||||
source_list=[source]
|
||||
)
|
||||
|
||||
if element_type == PageType.ARTIST or (element_type == PageType.CHANNEL and not youtube_settings["youtube_music_clean_data"]):
|
||||
source = Source(SOURCE_PAGE, f"https://music.youtube.com/channel/{element_id}")
|
||||
|
@ -8,6 +8,7 @@ import json
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from collections import defaultdict
|
||||
|
||||
import youtube_dl
|
||||
from youtube_dl.extractor.youtube import YoutubeIE
|
||||
@ -33,9 +34,11 @@ from ...objects import (
|
||||
Target
|
||||
)
|
||||
from ...connection import Connection
|
||||
from ...utils.enums.album import AlbumType
|
||||
from ...utils.support_classes.download_result import DownloadResult
|
||||
|
||||
from ._list_render import parse_renderer
|
||||
from ._music_object_render import parse_run_element
|
||||
from .super_youtube import SuperYouTube
|
||||
|
||||
|
||||
@ -162,6 +165,12 @@ class MusicKrakenYoutubeIE(YoutubeIE):
|
||||
|
||||
|
||||
|
||||
ALBUM_TYPE_MAP = {
|
||||
"Single": AlbumType.SINGLE,
|
||||
"Album": AlbumType.STUDIO_ALBUM,
|
||||
"EP": AlbumType.EP,
|
||||
}
|
||||
|
||||
|
||||
class YoutubeMusic(SuperYouTube):
|
||||
# CHANGE
|
||||
@ -465,6 +474,46 @@ class YoutubeMusic(SuperYouTube):
|
||||
if DEBUG:
|
||||
dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False)
|
||||
|
||||
data = r.json()
|
||||
|
||||
# album details
|
||||
header = data.get("header", {})
|
||||
musicDetailHeaderRenderer = header.get("musicDetailHeaderRenderer", {})
|
||||
|
||||
title_runs: List[dict] = musicDetailHeaderRenderer.get("title", {}).get("runs", [])
|
||||
subtitle_runs: List[dict] = musicDetailHeaderRenderer.get("subtitle", {}).get("runs", [])
|
||||
|
||||
if len(title_runs) > 0:
|
||||
album.title = title_runs[0].get("text", album.title)
|
||||
|
||||
def other_parse_run(run: dict) -> str:
|
||||
nonlocal album
|
||||
|
||||
if "text" not in run:
|
||||
return
|
||||
text = run["text"]
|
||||
|
||||
is_text_field = len(run.keys()) == 1
|
||||
|
||||
# regex that text is a year
|
||||
if is_text_field and re.match(r"\d{4}", text):
|
||||
album.date = ID3Timestamp.strptime(text, "%Y")
|
||||
return
|
||||
|
||||
if text in ALBUM_TYPE_MAP:
|
||||
album.album_type = ALBUM_TYPE_MAP[text]
|
||||
return
|
||||
|
||||
if not is_text_field:
|
||||
r = parse_run_element(run)
|
||||
if r is not None:
|
||||
album.add_list_of_other_objects([r])
|
||||
return
|
||||
|
||||
for _run in subtitle_runs:
|
||||
other_parse_run(_run)
|
||||
|
||||
# tracklist
|
||||
renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[
|
||||
0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", [])
|
||||
|
||||
@ -472,17 +521,9 @@ class YoutubeMusic(SuperYouTube):
|
||||
for i, content in enumerate(renderer_list):
|
||||
dump_to_file(f"{i}-album-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False)
|
||||
|
||||
results = []
|
||||
|
||||
"""
|
||||
cant use fixed indices, because if something has no entries, the list dissappears
|
||||
instead I have to try parse everything, and just reject community playlists and profiles.
|
||||
"""
|
||||
|
||||
for renderer in renderer_list:
|
||||
results.extend(parse_renderer(renderer))
|
||||
|
||||
album.add_list_of_other_objects(results)
|
||||
album.add_list_of_other_objects(parse_renderer(renderer))
|
||||
|
||||
return album
|
||||
|
||||
|
@ -19,7 +19,7 @@ DEBUG_OBJECT_TRACE = DEBUG and False
|
||||
DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False
|
||||
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
|
||||
DEBUG_PAGES = DEBUG and False
|
||||
DEBUG_DUMP = DEBUG and False
|
||||
DEBUG_DUMP = DEBUG and True
|
||||
DEBUG_PRINT_ID = DEBUG and True
|
||||
|
||||
if DEBUG:
|
||||
|
Loading…
Reference in New Issue
Block a user