From ed8cc914bec7456eb2a9fd8133af25ce20d15fce Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 6 May 2024 16:27:49 +0200 Subject: [PATCH] feat: lyrics for youtube music --- .vscode/settings.json | 3 +- .../pages/youtube_music/youtube_music.py | 61 ++++++++++++++++++- music_kraken/utils/__init__.py | 33 +++++++++- 3 files changed, 92 insertions(+), 5 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index f49d1d9..64b7f98 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -33,6 +33,7 @@ "tracksort", "translit", "unmap", - "youtube" + "youtube", + "youtubei" ] } \ No newline at end of file diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 894b972..bbb8d22 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -18,7 +18,7 @@ from ...utils.exception.config import SettingValueError from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING from ...utils.string_processing import clean_song_title -from ...utils import get_current_millis +from ...utils import get_current_millis, traverse_json_path from ...utils import dump_to_file @@ -31,7 +31,9 @@ from ...objects import ( Song, Album, Label, - Target + Target, + Lyrics, + FormattedText ) from ...connection import Connection from ...utils.enums.album import AlbumType @@ -544,6 +546,57 @@ class YoutubeMusic(SuperYouTube): return album + def fetch_lyrics(self, video_id: str, playlist_id: str = None) -> str: + request_data = { + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}, + "videoId": video_id, + } + if playlist_id is not None: + request_data["playlistId"] = playlist_id + + tab_request = self.yt_music_connection.post( + url=get_youtube_url(path="/youtubei/v1/next", query=f"prettyPrint=false"), + json=request_data, + name=f"fetch_song_tabs_{video_id}.json", + ) + + if tab_request is None: + return None + + dump_to_file(f"fetch_song_tabs_{video_id}.json", tab_request.text, is_json=True, exit_after_dump=False) + + tab_data: dict = tab_request.json() + + tabs = traverse_json_path(tab_data, "contents.singleColumnMusicWatchNextResultsRenderer.tabbedRenderer.watchNextTabbedResultsRenderer.tabs", default=[]) + browse_id = None + for tab in tabs: + pageType = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType", default="") + if pageType in ("MUSIC_TAB_TYPE_LYRICS", "MUSIC_PAGE_TYPE_TRACK_LYRICS") or "lyrics" in pageType.lower(): + browse_id = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseId", default=None) + break + + if browse_id is None: + return None + + + r = self.yt_music_connection.post( + url=get_youtube_url(path="/youtubei/v1/browse", query=f"prettyPrint=false"), + json={ + "browseId": browse_id, + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}} + }, + name=f"fetch_song_lyrics_{video_id}.json" + ) + + dump_to_file(f"fetch_song_lyrics_{video_id}.json", r.text, is_json=True, exit_after_dump=False) + + data = r.json() + lyrics_text = traverse_json_path(data, "contents.sectionListRenderer.contents[0].musicDescriptionShelfRenderer.description.runs[0].text", default=None) + if lyrics_text is None: + return None + + return Lyrics(FormattedText(plain=lyrics_text)) + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: ydl_res: dict = {} @@ -605,7 +658,7 @@ class YoutubeMusic(SuperYouTube): initial_details = self.yt_music_connection.post( url=get_youtube_url(path="/youtubei/v1/player", query=f"prettyPrint=false"), json=request_data, - name=f"fetch_album_{browse_id}.json", + name=f"fetch_song_{browse_id}.json", ) if initial_details is None: @@ -624,6 +677,8 @@ class YoutubeMusic(SuperYouTube): for thumbnail in video_details.get("thumbnails", []): song.artwork.append(**thumbnail) + song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId"))) + return song diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index 981eb7e..e85fa1a 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -3,7 +3,7 @@ from pathlib import Path import json import logging import inspect -from typing import List +from typing import List, Union from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK from .config import config, read_config, write_config @@ -77,6 +77,37 @@ def object_trace(obj): misc functions """ +def traverse_json_path(data, path: Union[str, List[str]], default=None): + """ + Path parts are concatenated with . or wrapped with [""] for object keys and wrapped in [] for array indices. + """ + + if isinstance(path, str): + path = path.replace('["', '.').replace('"]', '.').replace("[", ".").replace("]", ".") + path = [p for p in path.split(".") if len(p) > 0] + + if len(path) <= 0: + return data + + current = path[0] + path = path[1:] + + new_data = None + + if isinstance(data, dict): + new_data = data.get(current) + + elif isinstance(data, list): + try: + new_data = data[int(current)] + except (IndexError, ValueError): + pass + + if new_data is None: + return default + + return traverse_json_path(data=new_data, path=path, default=default) + _auto_increment = 0 def generate_id() -> int: global _auto_increment