diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 60b8c8a..4475034 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -35,4 +35,10 @@ if __name__ == "__main__": "d: 2" ] - music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) + cross_download = [ + "s: #a Psychonaut 4", + "2", + "d: 0" + ] + + music_kraken.cli.download(genre="test", command_list=cross_download, process_metadata_anyway=True) diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index 17c8a91..014a984 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -15,7 +15,7 @@ from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, ALL_PAGES: Set[Type[Page]] = { EncyclopaediaMetallum, Musify, - YoutubeMusic, + # YoutubeMusic, Bandcamp } @@ -97,7 +97,11 @@ class Pages: if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") - _page_types = set(self._source_to_page[src] for src in music_object.source_collection.source_pages) + _page_types = set() + for src in music_object.source_collection.source_pages: + if src in self._source_to_page: + _page_types.add(self._source_to_page[src]) + audio_pages = self._audio_pages_set.intersection(_page_types) for download_page in audio_pages: diff --git a/src/music_kraken/download/results.py b/src/music_kraken/download/results.py index 631ad48..46911b1 100644 --- a/src/music_kraken/download/results.py +++ b/src/music_kraken/download/results.py @@ -27,7 +27,7 @@ class Results: self._page_by_index = dict() def get_music_object_by_index(self, index: int) -> Tuple[Type[Page], DatabaseObject]: - # if this throws a key error, either the formated generator needs to be iterated, or the option doesn't exist. + # if this throws a key error, either the formatted generator needs to be iterated, or the option doesn't exist. return self._page_by_index[index], self._by_index[index] def delete_details(self, exclude_index: int): diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 2b9ce72..5310211 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -81,6 +81,8 @@ class Collection(Generic[T]): :param merge_into_existing: :return did_not_exist: """ + if element is None: + return AppendResult(False, None, False) # if the element type has been defined in the initializer it checks if the type matches if self.element_type is not None and not isinstance(element, self.element_type): diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index a553700..7297b17 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -31,7 +31,7 @@ class DatabaseObject: """ _id = random.randint(0, HIGHEST_ID) self.automatic_id = True - LOGGER.debug(f"Id for {type(self).__name__} isn't set. Setting to {_id}") + # LOGGER.debug(f"Id for {type(self).__name__} isn't set. Setting to {_id}") # The id can only be None, if the object is dynamic (self.dynamic = True) self.id: Optional[int] = _id diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 75fa351..36cd77a 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -649,11 +649,12 @@ class Artist(MainObject): return metadata - def __str__(self): + def __str__(self, include_notes: bool = False): string = self.name or "" - plaintext_notes = self.notes.get_plaintext() - if plaintext_notes is not None: - string += "\n" + plaintext_notes + if include_notes: + plaintext_notes = self.notes.get_plaintext() + if plaintext_notes is not None: + string += "\n" + plaintext_notes return string def __repr__(self): diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 25a589c..f068482 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -153,11 +153,12 @@ def build_new_object(new_object: DatabaseObject) -> DatabaseObject: return new_object -def merge_together(old_object: DatabaseObject, new_object: DatabaseObject) -> DatabaseObject: +def merge_together(old_object: DatabaseObject, new_object: DatabaseObject, do_compile: bool = True) -> DatabaseObject: new_object = clean_object(new_object) old_object.merge(new_object) - old_object.compile(merge_into=False) + if do_compile: + old_object.compile(merge_into=False) return old_object @@ -246,7 +247,7 @@ class Page: return [] - def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject: + def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1, post_process: bool = True) -> DatabaseObject: """ when a music object with lacking data is passed in, it returns the SAME object **(no copy)** with more detailed data. @@ -270,22 +271,22 @@ class Page: if isinstance(music_object, INDEPENDENT_DB_OBJECTS): source: Source for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): - new_music_object.merge( - self.fetch_object_from_source( - source=source, - enforce_type=type(music_object), - stop_at_level=stop_at_level, - post_process=False - ) - ) + new_music_object.merge(self.fetch_object_from_source( + source=source, + enforce_type=type(music_object), + stop_at_level=stop_at_level, + post_process=False + )) - return merge_together(music_object, new_music_object) + return merge_together(music_object, new_music_object, do_compile=post_process) def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]: - obj_type = self.get_source_type(source) - + obj_type = self.get_source_type( + source) + print("obj type", obj_type, self) if obj_type is None: return None + if enforce_type != obj_type and enforce_type is not None: self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}") return None @@ -298,13 +299,21 @@ class Page: Artist: self.fetch_artist, Label: self.fetch_label } - + if obj_type in fetch_map: music_object = fetch_map[obj_type](source, stop_at_level) else: self.LOGGER.warning(f"Can't fetch details of type: {obj_type}") return None + if stop_at_level > 1: + collection: Collection + for collection_str in music_object.DOWNWARDS_COLLECTION_ATTRIBUTES: + collection = music_object.__getattribute__(collection_str) + + for sub_element in collection: + sub_element.merge(self.fetch_details(sub_element, stop_at_level=stop_at_level-1, post_process=False)) + if post_process and music_object: return build_new_object(music_object) @@ -323,6 +332,10 @@ class Page: return Label() def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: + # print("downloading") + + self.fetch_details(music_object, stop_at_level=2) + naming_dict: NamingDict = NamingDict({"genre": genre}) def fill_naming_objects(naming_music_object: DatabaseObject): diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 00e33c3..ea3a09f 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -1,10 +1,9 @@ from typing import List, Optional, Type -from urllib.parse import urlparse +from urllib.parse import urlparse, urlunparse import json from enum import Enum from bs4 import BeautifulSoup import pycountry -import demjson3 from ..objects import Source, DatabaseObject from .abstract import Page @@ -51,12 +50,13 @@ class Bandcamp(Page): def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: parsed_url = urlparse(source.url) + path = parsed_url.path.replace("/", "") - if parsed_url.path == "": + if path == "": return Artist - if parsed_url.path.startswith("/album/"): + if path.startswith("album"): return Album - if parsed_url.path.startswith("/track/"): + if path.startswith("track"): return Song return super().get_source_type(source) @@ -65,7 +65,6 @@ class Bandcamp(Page): try: object_type = BandcampTypes(data["type"]) except ValueError: - print(data["type"]) return url = data["item_url_root"] @@ -180,7 +179,7 @@ class Bandcamp(Page): source_list=source_list ) - def _parse_song_list(self, soup: BeautifulSoup) -> List[Album]: + def _parse_album(self, soup: BeautifulSoup) -> List[Album]: title = None source_list: List[Source] = [] @@ -194,6 +193,25 @@ class Bandcamp(Page): return Album(title=title, source_list=source_list) + def _parse_artist_data_blob(self, data_blob: dict, artist_url: str): + if DEBUG: + dump_to_file("bandcamp_data_blob.json", json.dumps(data_blob), is_json=True, exit_after_dump=False) + + parsed_artist_url = urlparse(artist_url) + album_list: List[Album] = [] + + for album_json in data_blob.get("buyfulldisco", {}).get("tralbums", []): + album_list.append(Album( + title=album_json["title"], + source_list=[Source( + self.SOURCE_TYPE, + urlunparse((parsed_artist_url.scheme, parsed_artist_url.netloc, album_json["page_url"], "", "", "")) + )] + )) + + return album_list + + def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: artist = Artist() @@ -207,9 +225,22 @@ class Bandcamp(Page): dump_to_file("artist_page.html", r.text, exit_after_dump=False) artist = self._parse_artist_details(soup=soup.find("div", {"id": "bio-container"})) - for subsoup in soup.find("ol", {"id": "music-grid"}).find_all("li"): - artist.main_album_collection.append(self._parse_song_list(soup=subsoup)) + html_music_grid = soup.find("ol", {"id": "music-grid"}) + if html_music_grid is not None: + for subsoup in html_music_grid.find_all("li"): + artist.main_album_collection.append(self._parse_album(soup=subsoup)) + + data_blob_soup = soup.find("div", {"id": "pagedata"}) + if data_blob_soup is not None: + data_blob = data_blob_soup["data-blob"] + if data_blob is not None: + artist.main_album_collection.extend( + + self._parse_artist_data_blob(json.loads(data_blob), source.url) + ) + + artist.source_collection.append(source) return artist def _parse_track_element(self, track: dict) -> Optional[Song]: @@ -220,7 +251,6 @@ class Bandcamp(Page): ) def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: - print(source) album = Album() r = self.connection.get(source.url) @@ -237,13 +267,16 @@ class Bandcamp(Page): data = json.loads(data_container.text) artist_data = data["byArtist"] + artist_source_list = [] + if "@id" in artist_data: + artist_source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] album = Album( title=data["name"], source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), artist_list=[Artist( name=artist_data["name"], - source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] + source_list=artist_source_list )] ) @@ -256,6 +289,7 @@ class Bandcamp(Page): except KeyError: continue + album.source_collection.append(source) return album def _fetch_lyrics(self, soup: BeautifulSoup) -> List[Lyrics]: @@ -270,8 +304,6 @@ class Bandcamp(Page): def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - print(source) - r = self.connection.get(source.url) if r is None: return Song() @@ -313,6 +345,7 @@ class Bandcamp(Page): lyrics_list=self._fetch_lyrics(soup=soup) ) + song.source_collection.append(source) return song diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 87c0f69..2025af6 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -22,6 +22,10 @@ from ..objects import ( Options, DatabaseObject ) +from ..utils.shared import DEBUG +if DEBUG: + from ..utils.debug_utils import dump_to_file + ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, { @@ -264,20 +268,33 @@ class EncyclopaediaMetallum(Page): soup = self.get_soup_from_response(r) + if DEBUG: + dump_to_file(f"ma_artist_sources_{ma_artist_id}.html", soup.prettify(), exit_after_dump=False) + if soup.find("span", {"id": "noLinks"}) is not None: return [] - artist_source = soup.find("div", {"id": "band_links_Official"}) - """ - TODO - add a Label object to add the label sources from - TODO - maybe do merchandice stuff - """ + source_list = [] + + link_table: BeautifulSoup = soup.find("table", {"id": "linksTablemain"}) + if link_table is not None: + for tr in link_table.find_all("tr"): + anchor: BeautifulSoup = tr.find("a") + if anchor is None: + continue + + href = anchor["href"] + if href is not None: + source_list.append(Source.match_url(href, referer_page=self.SOURCE_TYPE)) + + # The following code is only legacy code, which I just kep because it doesn't harm. + # The way ma returns sources changed. + artist_source = soup.find("div", {"id": "band_links"}) + merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"}) label_source = soup.find("div", {"id": "band_links_Labels"}) - source_list = [] + if artist_source is not None: for tr in artist_source.find_all("td"): @@ -288,6 +305,8 @@ class EncyclopaediaMetallum(Page): source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE)) + print(source_list) + return source_list def _parse_artist_attributes(self, artist_soup: BeautifulSoup) -> Artist: diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index ef6f14d..851cfc3 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -71,8 +71,9 @@ class YoutubeMusicConnection(Connection): r = self.get("https://music.youtube.com/verify_session", is_heartbeat=True) if r is None: self.heartbeat_failed() + return - string = r.content.decode("utf-8") + string = r.text data = json.loads(string[string.index("{"):]) success: bool = data["success"] @@ -248,6 +249,9 @@ class YoutubeMusic(SuperYouTube): } ) + if r is None: + return [] + renderer_list = r.json().get("contents", {}).get("tabbedSearchResultsRenderer", {}).get("tabs", [{}])[0].get("tabRenderer").get("content", {}).get("sectionListRenderer", {}).get("contents", []) if DEBUG: diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index 33702b6..80efee8 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -4,7 +4,7 @@ from .config import main_settings DEBUG = True DEBUG_YOUTUBE_INITIALIZING = DEBUG and False -DEBUG_PAGES = DEBUG and True +DEBUG_PAGES = DEBUG and False if DEBUG: print("DEBUG ACTIVE")