From 7b4eee858ab264e264f697bebebf68e295f28a7c Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 21 May 2024 17:14:58 +0200 Subject: [PATCH] feat: parsed script json --- development/actual_donwload.py | 1 + music_kraken/pages/genius.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index 1ee7563..ad8f1d0 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -9,6 +9,7 @@ if __name__ == "__main__": "s: #a Crystal F", "10", "1", + "3", ] diff --git a/music_kraken/pages/genius.py b/music_kraken/pages/genius.py index afead40..885aed8 100644 --- a/music_kraken/pages/genius.py +++ b/music_kraken/pages/genius.py @@ -256,8 +256,38 @@ class Genius(Page): return album + def get_json_content_from_response(self, response, start: str, end: str) -> Optional[str]: + content = response.text + start_index = content.find(start) + if start_index < 0: + return None + start_index += len(start) + end_index = content.find(end, start_index) + if end_index < 0: + return None + return content[start_index:end_index] + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - song = Song() + song: Song = Song() + + r = self.connection.get(source.url, name=source.url) + if r is None: + return song + + # get the contents that are between `JSON.parse('` and `');` + content = self.get_json_content_from_response(r, start="window.__PRELOADED_STATE__ = JSON.parse('", end="');\n window.__APP_CONFIG__ = ") + if content is not None: + content = content.replace("\\\\", "\\").replace('\\"', '"').replace("\\'", "'") + data = json.loads(content) + + dump_to_file("genius_song_script_json.json", content, is_json=True, exit_after_dump=False) + + soup = self.get_soup_from_response(r) + for lyrics in soup.find_all("div", {"data-lyrics-container": "true"}): + lyrics_object = Lyrics(FormattedText(html=lyrics.prettify())) + song.lyrics_collection.append(lyrics_object) + + song.source_collection.append(source) return song def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: