feat: lyrics for youtube music
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Hazel 2024-05-06 16:27:49 +02:00
parent 5ed902489f
commit ed8cc914be
3 changed files with 92 additions and 5 deletions

View File

@ -33,6 +33,7 @@
"tracksort",
"translit",
"unmap",
"youtube"
"youtube",
"youtubei"
]
}

View File

@ -18,7 +18,7 @@ from ...utils.exception.config import SettingValueError
from ...utils.config import main_settings, youtube_settings, logging_settings
from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING
from ...utils.string_processing import clean_song_title
from ...utils import get_current_millis
from ...utils import get_current_millis, traverse_json_path
from ...utils import dump_to_file
@ -31,7 +31,9 @@ from ...objects import (
Song,
Album,
Label,
Target
Target,
Lyrics,
FormattedText
)
from ...connection import Connection
from ...utils.enums.album import AlbumType
@ -544,6 +546,57 @@ class YoutubeMusic(SuperYouTube):
return album
def fetch_lyrics(self, video_id: str, playlist_id: str = None) -> str:
request_data = {
"context": {**self.credentials.context, "adSignalsInfo": {"params": []}},
"videoId": video_id,
}
if playlist_id is not None:
request_data["playlistId"] = playlist_id
tab_request = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/next", query=f"prettyPrint=false"),
json=request_data,
name=f"fetch_song_tabs_{video_id}.json",
)
if tab_request is None:
return None
dump_to_file(f"fetch_song_tabs_{video_id}.json", tab_request.text, is_json=True, exit_after_dump=False)
tab_data: dict = tab_request.json()
tabs = traverse_json_path(tab_data, "contents.singleColumnMusicWatchNextResultsRenderer.tabbedRenderer.watchNextTabbedResultsRenderer.tabs", default=[])
browse_id = None
for tab in tabs:
pageType = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType", default="")
if pageType in ("MUSIC_TAB_TYPE_LYRICS", "MUSIC_PAGE_TYPE_TRACK_LYRICS") or "lyrics" in pageType.lower():
browse_id = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseId", default=None)
break
if browse_id is None:
return None
r = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/browse", query=f"prettyPrint=false"),
json={
"browseId": browse_id,
"context": {**self.credentials.context, "adSignalsInfo": {"params": []}}
},
name=f"fetch_song_lyrics_{video_id}.json"
)
dump_to_file(f"fetch_song_lyrics_{video_id}.json", r.text, is_json=True, exit_after_dump=False)
data = r.json()
lyrics_text = traverse_json_path(data, "contents.sectionListRenderer.contents[0].musicDescriptionShelfRenderer.description.runs[0].text", default=None)
if lyrics_text is None:
return None
return Lyrics(FormattedText(plain=lyrics_text))
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
ydl_res: dict = {}
@ -605,7 +658,7 @@ class YoutubeMusic(SuperYouTube):
initial_details = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/player", query=f"prettyPrint=false"),
json=request_data,
name=f"fetch_album_{browse_id}.json",
name=f"fetch_song_{browse_id}.json",
)
if initial_details is None:
@ -624,6 +677,8 @@ class YoutubeMusic(SuperYouTube):
for thumbnail in video_details.get("thumbnails", []):
song.artwork.append(**thumbnail)
song.lyrics_collection.append(self.fetch_lyrics(browse_id, playlist_id=request_data.get("playlistId")))
return song

View File

@ -3,7 +3,7 @@ from pathlib import Path
import json
import logging
import inspect
from typing import List
from typing import List, Union
from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK
from .config import config, read_config, write_config
@ -77,6 +77,37 @@ def object_trace(obj):
misc functions
"""
def traverse_json_path(data, path: Union[str, List[str]], default=None):
"""
Path parts are concatenated with . or wrapped with [""] for object keys and wrapped in [] for array indices.
"""
if isinstance(path, str):
path = path.replace('["', '.').replace('"]', '.').replace("[", ".").replace("]", ".")
path = [p for p in path.split(".") if len(p) > 0]
if len(path) <= 0:
return data
current = path[0]
path = path[1:]
new_data = None
if isinstance(data, dict):
new_data = data.get(current)
elif isinstance(data, list):
try:
new_data = data[int(current)]
except (IndexError, ValueError):
pass
if new_data is None:
return default
return traverse_json_path(data=new_data, path=path, default=default)
_auto_increment = 0
def generate_id() -> int:
global _auto_increment