From 6a37351da1c0a778c23bf87abef44734f6c723da Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 20:18:51 +0200 Subject: [PATCH] feat: implemented parsing of tracklist of album --- src/actual_donwload.py | 4 +-- src/music_kraken/objects/song.py | 6 +++- src/music_kraken/pages/bandcamp.py | 51 ++++++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 59e7667..bb8d1b1 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -29,8 +29,8 @@ if __name__ == "__main__": ] youtube_music_test = [ - "s: #a Only Smile", - "0" + "s: #a Only Smile #r Your best friend", + "8" ] music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index b7d021e..75fa351 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -467,7 +467,8 @@ class Artist(MainObject): "formed_in": ID3Timestamp(), "notes": FormattedText(), "lyrical_themes": [], - "general_genre": "" + "general_genre": "", + "unformated_location": None, } DOWNWARDS_COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection") @@ -489,6 +490,7 @@ class Artist(MainObject): country: CountryTyping = None, formed_in: ID3Timestamp = None, label_list: List['Label'] = None, + unformated_location: str = None, **kwargs ): MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) @@ -520,6 +522,8 @@ class Artist(MainObject): self.contact_collection: Collection[Label] = Collection(data=contact_list, element_type=Contact) + self.unformated_location: Optional[str] = unformated_location + def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): if object_type is Song: # this doesn't really make sense diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index f32e9a2..925bf0b 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -1,6 +1,6 @@ from typing import List, Optional, Type from urllib.parse import urlparse -import logging +import json from enum import Enum from bs4 import BeautifulSoup @@ -145,6 +145,10 @@ class Bandcamp(Page): def song_search(self, song: Song) -> List[Song]: return self.general_search(song.title, filter_string="t") + + + def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: + return Label() def _parse_artist_details(self, soup: BeautifulSoup) -> Artist: name: str = None @@ -203,15 +207,48 @@ class Bandcamp(Page): return artist + def _parse_track_element(self, track: dict) -> Optional[Song]: + return Song( + title=track["item"]["name"], + source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])], + tracksort=track["position"] + ) + + def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: + print(source) + album = Album() + + r = self.connection.get(source.url) + if r is None: + return album + + soup = self.get_soup_from_response(r) + + if DEBUG: + # dump_to_file("album_page.html", r.text, exit_after_dump=False) + pass + + data_container = soup.find("script", {"type": "application/ld+json"}) + + if DEBUG: + dump_to_file("album_data.json", data_container.text, is_json=True, exit_after_dump=False) + + data = json.loads(data_container.text) + + for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): + if DEBUG: + dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False) + + try: + album.song_collection.append(self._parse_track_element(track_json)) + except KeyError: + continue + + return album + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: print(source) return Song() - def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: - return Album() - - def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: - return Label() - def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: return DownloadResult()