From 960d3b74ac7478ca790673c7306568ca62091da5 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 7 May 2024 14:59:28 +0200 Subject: [PATCH 1/6] feat: prevent collection albums from being fetched from musify --- development/actual_donwload.py | 2 +- music_kraken/pages/musify.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index fb09b97..c5c3890 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -7,7 +7,7 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ "s: #a Crystal F", - "dm: 10, 20" + "10" ] diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 5f1b7aa..84a4453 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -961,7 +961,7 @@ class Musify(Page): source_list=source_list, date=timestamp, album_type=album_type, - album_status=album_status + album_status=album_status, ) def _parse_album(self, soup: BeautifulSoup) -> Album: @@ -1054,7 +1054,7 @@ class Musify(Page): date=date ) - def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]: + def _get_discography(self, artist: Artist, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]: """ POST https://musify.club/artist/filteralbums ArtistID: 280348 @@ -1076,7 +1076,10 @@ class Musify(Page): soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") for card_soup in soup.find_all("div", {"class": "card"}): - yield self._parse_album_card(card_soup, artist_name) + album = self._parse_album_card(card_soup, artist_name) + if album.album_type is AlbumType.COMPILATION_ALBUM or album.album_type is AlbumType.MIXTAPE: + continue + artist.main_album_collection.append(album) def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: """ @@ -1098,7 +1101,7 @@ class Musify(Page): artist = self._get_artist_attributes(url) - artist.main_album_collection.extend(self._get_discography(url, artist.name)) + self._get_discography(artist, url, artist.name) return artist From e3e547c2328a4edabc1f0fea82010f7a6da2321e Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 8 May 2024 09:15:41 +0200 Subject: [PATCH 2/6] feat: improved musify --- music_kraken/cli/main_downloader.py | 4 +- music_kraken/objects/collection.py | 8 +- music_kraken/objects/song.py | 14 +- music_kraken/pages/musify.py | 261 +++++++++++++--------------- 4 files changed, 130 insertions(+), 157 deletions(-) diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index d66da6f..73812cd 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -304,10 +304,8 @@ class Downloader: def goto(self, data_object: DatabaseObject): page: Type[Page] - self.pages.fetch_details(data_object) + self.pages.fetch_details(data_object, stop_at_level=1) - print(data_object) - print(data_object.options) self.set_current_options(GoToResults(data_object.options, max_items_per_page=self.max_displayed_options)) self.print_current_options() diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index b8b2d4a..255caaa 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -164,6 +164,8 @@ class Collection(Generic[T]): r = c._find_object(other) if r is not None: output("found push to", r, other, self, color=BColors.RED, sep="\t") + if existing_object is not None: + self.remove(existing_object) return c.append(other, **kwargs) if existing_object is None: @@ -179,13 +181,11 @@ class Collection(Generic[T]): raise ValueError(f"Object {other} not found in {self}") return other - """ for collection_attribute, generator in self.extend_object_to_attribute.items(): - other.__getattribute__(collection_attribute).remove(*generator, silent=silent, **kwargs) + other.__getattribute__(collection_attribute).remove(*generator, silent=True, **kwargs) for attribute, new_object in self.append_object_to_attribute.items(): - other.__getattribute__(attribute).remove(new_object, silent=silent, **kwargs) - """ + other.__getattribute__(attribute).remove(new_object, silent=True, **kwargs) self._data.remove(existing) self._unmap_element(existing) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 9f9ba7e..d7924bd 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -207,7 +207,7 @@ class Song(Base): r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value r += get_collection_string(self.album_collection, " from {}", ignore_titles={self.title}) r += get_collection_string(self.main_artist_collection, " by {}") - r += get_collection_string(self.feature_artist_collection, " feat. {}") + r += get_collection_string(self.feature_artist_collection, " feat. {}" if not self.main_artist_collection.empty or True else " by {}") return r @property @@ -413,11 +413,6 @@ class Album(Base): return self.album_type.value -""" -All objects dependent on Artist -""" - - class Artist(Base): name: str unified_name: str @@ -462,7 +457,7 @@ class Artist(Base): general_genre: str = None, unformatted_location: str = None, source_list: List[Source] = None, contact_list: List[Contact] = None, feature_song_list: List[Song] = None, main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None: - + super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes, lyrical_themes=lyrical_themes, general_genre=general_genre, unformatted_location=unformatted_location, source_list=source_list, contact_list=contact_list, @@ -593,11 +588,6 @@ class Artist(Base): return r -""" -Label -""" - - class Label(Base): COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection") diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 84a4453..242d0fa 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -1,7 +1,7 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum -from typing import List, Optional, Type, Union, Generator +from typing import List, Optional, Type, Union, Generator, Dict, Any from urllib.parse import urlparse import pycountry @@ -24,7 +24,7 @@ from ..objects import ( Lyrics, Artwork ) -from ..utils.config import logging_settings +from ..utils.config import logging_settings, main_settings from ..utils import string_processing, shared from ..utils.string_processing import clean_song_title from ..utils.support_classes.query import Query @@ -361,7 +361,7 @@ class Musify(Page): return Song( title=clean_song_title(song_title, artist_name=artist_list[0].name if len(artist_list) > 0 else None), - main_artist_list=artist_list, + feature_artist_list=artist_list, source_list=source_list ) @@ -510,7 +510,7 @@ class Musify(Page): title=clean_song_title(track_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None), source_list=source_list, lyrics_list=lyrics_list, - main_artist_list=artist_list, + feature_artist_list=artist_list, album_list=album_list, artwork=artwork, ) @@ -652,10 +652,101 @@ class Musify(Page): return Song( title=clean_song_title(song_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None), tracksort=tracksort, - main_artist_list=artist_list, + feature_artist_list=artist_list, source_list=source_list ) + + def _parse_album(self, soup: BeautifulSoup) -> Album: + name: str = None + source_list: List[Source] = [] + artist_list: List[Artist] = [] + date: ID3Timestamp = None + + """ + if breadcrumb list has 4 elements, then + the -2 is the artist link, + the -1 is the album + """ + # breadcrumb + breadcrumb_soup: BeautifulSoup = soup.find("ol", {"class", "breadcrumb"}) + breadcrumb_elements: List[BeautifulSoup] = breadcrumb_soup.find_all("li", {"class": "breadcrumb-item"}) + if len(breadcrumb_elements) == 4: + # album + album_crumb: BeautifulSoup = breadcrumb_elements[-1] + name = album_crumb.text.strip() + + # artist + artist_crumb: BeautifulSoup = breadcrumb_elements[-2] + anchor: BeautifulSoup = artist_crumb.find("a") + if anchor is not None: + href = anchor.get("href") + artist_source_list: List[Source] = [] + + if href is not None: + artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip())) + + span: BeautifulSoup = anchor.find("span") + if span is not None: + artist_list.append(Artist( + name=span.get_text(strip=True), + source_list=artist_source_list + )) + else: + self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case") + + # meta + meta_url: BeautifulSoup = soup.find("meta", {"itemprop": "url"}) + if meta_url is not None: + url = meta_url.get("content") + if url is not None: + source_list.append(Source(self.SOURCE_TYPE, self.HOST + url)) + + meta_name: BeautifulSoup = soup.find("meta", {"itemprop": "name"}) + if meta_name is not None: + _name = meta_name.get("content") + if _name is not None: + name = _name + + # album info + album_info_ul: BeautifulSoup = soup.find("ul", {"class": "album-info"}) + if album_info_ul is not None: + artist_anchor: BeautifulSoup + for artist_anchor in album_info_ul.find_all("a", {"itemprop": "byArtist"}): + # line 98 + artist_source_list: List[Source] = [] + + artist_url_meta = artist_anchor.find("meta", {"itemprop": "url"}) + if artist_url_meta is not None: + artist_href = artist_url_meta.get("content") + if artist_href is not None: + artist_source_list.append(Source(self.SOURCE_TYPE, url=self.HOST + artist_href)) + + artist_meta_name = artist_anchor.find("meta", {"itemprop": "name"}) + if artist_meta_name is not None: + artist_name = artist_meta_name.get("content") + if artist_name is not None: + artist_list.append(Artist( + name=artist_name, + source_list=artist_source_list + )) + + time_soup: BeautifulSoup = album_info_ul.find("time", {"itemprop": "datePublished"}) + if time_soup is not None: + raw_datetime = time_soup.get("datetime") + if raw_datetime is not None: + try: + date = ID3Timestamp.strptime(raw_datetime, "%Y-%m-%d") + except ValueError: + self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}") + + return Album( + title=name, + source_list=source_list, + artist_list=artist_list, + date=date + ) + def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: """ fetches album from source: @@ -694,19 +785,14 @@ class Musify(Page): return album - def _get_artist_attributes(self, url: MusifyUrl) -> Artist: + def _fetch_initial_artist(self, url: MusifyUrl, source: Source) -> Artist: """ - fetches the main Artist attributes from this endpoint https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent - it needs to parse html - - :param url: - :return: """ r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id) if r is None: - return Artist() + return Artist(source_list=[source]) soup = self.get_soup_from_response(r) @@ -821,7 +907,7 @@ class Musify(Page): notes=notes ) - def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None) -> Album: + def _parse_album_card(self, album_card: BeautifulSoup, source: Source, artist_name: str = None, **kwargs) -> Album: """ """ - _id: Optional[str] = None + album_kwargs: Dict[str, Any] = { + "source_list": [source], + } + name: str = None source_list: List[Source] = [] timestamp: Optional[ID3Timestamp] = None album_status = None def set_name(new_name: str): + nonlocal album_kwargs nonlocal name nonlocal artist_name @@ -882,7 +972,7 @@ class Musify(Page): album_status = AlbumStatus.BOOTLEG def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False): - nonlocal _id + nonlocal album_kwargs nonlocal name nonlocal source_list @@ -892,21 +982,11 @@ class Musify(Page): href = _anchor.get("href") if href is not None: # add url to sources - source_list.append(Source( + album_kwargs["source_list"].append(Source( self.SOURCE_TYPE, self.HOST + href )) - # split id from url - split_href = href.split("-") - if len(split_href) > 1: - _id = split_href[-1] - - if not text_is_name: - return - - set_name(_anchor.text) - anchor_list = album_card.find_all("a", recursive=False) if len(anchor_list) > 0: anchor = anchor_list[0] @@ -964,104 +1044,16 @@ class Musify(Page): album_status=album_status, ) - def _parse_album(self, soup: BeautifulSoup) -> Album: - name: str = None - source_list: List[Source] = [] - artist_list: List[Artist] = [] - date: ID3Timestamp = None - - """ - if breadcrumb list has 4 elements, then - the -2 is the artist link, - the -1 is the album - """ - # breadcrumb - breadcrumb_soup: BeautifulSoup = soup.find("ol", {"class", "breadcrumb"}) - breadcrumb_elements: List[BeautifulSoup] = breadcrumb_soup.find_all("li", {"class": "breadcrumb-item"}) - if len(breadcrumb_elements) == 4: - # album - album_crumb: BeautifulSoup = breadcrumb_elements[-1] - name = album_crumb.text.strip() - - # artist - artist_crumb: BeautifulSoup = breadcrumb_elements[-2] - anchor: BeautifulSoup = artist_crumb.find("a") - if anchor is not None: - href = anchor.get("href") - artist_source_list: List[Source] = [] - - if href is not None: - artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip())) - - span: BeautifulSoup = anchor.find("span") - if span is not None: - artist_list.append(Artist( - name=span.get_text(strip=True), - source_list=artist_source_list - )) - else: - self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case") - - # meta - meta_url: BeautifulSoup = soup.find("meta", {"itemprop": "url"}) - if meta_url is not None: - url = meta_url.get("content") - if url is not None: - source_list.append(Source(self.SOURCE_TYPE, self.HOST + url)) - - meta_name: BeautifulSoup = soup.find("meta", {"itemprop": "name"}) - if meta_name is not None: - _name = meta_name.get("content") - if _name is not None: - name = _name - - # album info - album_info_ul: BeautifulSoup = soup.find("ul", {"class": "album-info"}) - if album_info_ul is not None: - artist_anchor: BeautifulSoup - for artist_anchor in album_info_ul.find_all("a", {"itemprop": "byArtist"}): - # line 98 - artist_source_list: List[Source] = [] - - artist_url_meta = artist_anchor.find("meta", {"itemprop": "url"}) - if artist_url_meta is not None: - artist_href = artist_url_meta.get("content") - if artist_href is not None: - artist_source_list.append(Source(self.SOURCE_TYPE, url=self.HOST + artist_href)) - - artist_meta_name = artist_anchor.find("meta", {"itemprop": "name"}) - if artist_meta_name is not None: - artist_name = artist_meta_name.get("content") - if artist_name is not None: - artist_list.append(Artist( - name=artist_name, - source_list=artist_source_list - )) - - time_soup: BeautifulSoup = album_info_ul.find("time", {"itemprop": "datePublished"}) - if time_soup is not None: - raw_datetime = time_soup.get("datetime") - if raw_datetime is not None: - try: - date = ID3Timestamp.strptime(raw_datetime, "%Y-%m-%d") - except ValueError: - self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}") - - return Album( - title=name, - source_list=source_list, - artist_list=artist_list, - date=date - ) - - def _get_discography(self, artist: Artist, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]: + def _fetch_artist_discography(self, artist: Artist, url: MusifyUrl, artist_name: str = None, **kwargs): """ POST https://musify.club/artist/filteralbums - ArtistID: 280348 - SortOrder.Property: dateCreated - SortOrder.IsAscending: false - X-Requested-With: XMLHttpRequest + ArtistID: 280348 + SortOrder.Property: dateCreated + SortOrder.IsAscending: false + X-Requested-With: XMLHttpRequest """ + _download_all = kwargs.get("download_all", False) + _album_type_blacklist = kwargs.get("album_type_blacklist", main_settings["album_type_blacklist"]) endpoint = self.HOST + "/" + url.source_type.value + "/filteralbums" @@ -1072,36 +1064,29 @@ class Musify(Page): "X-Requested-With": "XMLHttpRequest" }, name="discography_" + url.name_with_id) if r is None: - return [] - soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") + return + + soup: BeautifulSoup = self.get_soup_from_response(r) for card_soup in soup.find_all("div", {"class": "card"}): - album = self._parse_album_card(card_soup, artist_name) - if album.album_type is AlbumType.COMPILATION_ALBUM or album.album_type is AlbumType.MIXTAPE: + album = self._parse_album_card(card_soup, source, artist_name, **kwargs) + if album.album_type in _album_type_blacklist: continue + artist.main_album_collection.append(album) - def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: + def fetch_artist(self, source: Source, **kwargs) -> Artist: """ - fetches artist from source - + TODO [x] discography [x] attributes [] picture gallery - - Args: - source (Source): the source to fetch - stop_at_level: int = 1: if it is false, every album from discograohy will be fetched. Defaults to False. - - Returns: - Artist: the artist fetched """ url = parse_url(source.url) - artist = self._get_artist_attributes(url) - - self._get_discography(artist, url, artist.name) + artist = self._fetch_initial_artist(url, source=source, **kwargs) + self._fetch_artist_discography(artist, url, artist.name, **kwargs) return artist From a5f8057b823e188de0ccefdabbb683d550787aef Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 8 May 2024 09:44:18 +0200 Subject: [PATCH 3/6] feat: improved initialization of data objects --- music_kraken/objects/song.py | 115 +++++++++++++++++++++++---------- music_kraken/pages/abstract.py | 2 +- music_kraken/pages/musify.py | 8 +-- 3 files changed, 86 insertions(+), 39 deletions(-) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index d7924bd..fb4efc3 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -3,6 +3,7 @@ from __future__ import annotations import random from collections import defaultdict from typing import List, Optional, Dict, Tuple, Type, Union +import copy import pycountry @@ -118,13 +119,27 @@ class Song(Base): "tracksort": lambda: 0, } - def __init__(self, title: str = "", unified_title: str = None, isrc: str = None, length: int = None, - genre: str = None, note: FormattedText = None, source_list: List[Source] = None, - target_list: List[Target] = None, lyrics_list: List[Lyrics] = None, - main_artist_list: List[Artist] = None, feature_artist_list: List[Artist] = None, - album_list: List[Album] = None, tracksort: int = 0, artwork: Optional[Artwork] = None, **kwargs) -> None: + def __init__( + self, + title: str = None, + isrc: str = None, + length: int = None, + genre: str = None, + note: FormattedText = None, + source_list: List[Source] = None, + target_list: List[Target] = None, + lyrics_list: List[Lyrics] = None, + main_artist_list: List[Artist] = None, + feature_artist_list: List[Artist] = None, + album_list: List[Album] = None, + tracksort: int = 0, + artwork: Optional[Artwork] = None, + **kwargs + ) -> None: + real_kwargs = copy.copy(locals()) + real_kwargs.update(real_kwargs.pop("kwargs", {})) - Base.__init__(**locals()) + Base.__init__(**real_kwargs) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_artist_collection", "feature_artist_collection", "album_collection") TITEL = "title" @@ -245,6 +260,7 @@ class Album(Base): barcode: str albumsort: int notes: FormattedText + artwork: Artwork source_collection: SourceCollection @@ -263,6 +279,7 @@ class Album(Base): "language": lambda: Language.by_alpha_2("en"), "date": ID3Timestamp, "notes": FormattedText, + "artwork": Artwork, "source_collection": SourceCollection, "artist_collection": Collection, @@ -273,15 +290,27 @@ class Album(Base): TITEL = "title" # This is automatically generated - def __init__(self, title: str = None, unified_title: str = None, album_status: AlbumStatus = None, - album_type: AlbumType = None, language: Language = None, date: ID3Timestamp = None, - barcode: str = None, albumsort: int = None, notes: FormattedText = None, - source_list: List[Source] = None, artist_list: List[Artist] = None, song_list: List[Song] = None, - label_list: List[Label] = None, **kwargs) -> None: - super().__init__(title=title, unified_title=unified_title, album_status=album_status, album_type=album_type, - language=language, date=date, barcode=barcode, albumsort=albumsort, notes=notes, - source_list=source_list, artist_list=artist_list, song_list=song_list, label_list=label_list, - **kwargs) + def __init__( + self, + title: str = None, + unified_title: str = None, + album_status: AlbumStatus = None, + album_type: AlbumType = None, + language: Language = None, + date: ID3Timestamp = None, + barcode: str = None, + albumsort: int = None, + notes: FormattedText = None, + source_list: List[Source] = None, + artist_list: List[Artist] = None, + song_list: List[Song] = None, + label_list: List[Label] = None, + **kwargs + ) -> None: + real_kwargs = copy.copy(locals()) + real_kwargs.update(real_kwargs.pop("kwargs", {})) + + Base.__init__(**real_kwargs) DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection") @@ -415,7 +444,6 @@ class Album(Base): class Artist(Base): name: str - unified_name: str country: Country formed_in: ID3Timestamp notes: FormattedText @@ -432,8 +460,7 @@ class Artist(Base): label_collection: Collection[Label] _default_factories = { - "name": str, - "unified_name": lambda: None, + "name": lambda: None, "country": lambda: None, "unformatted_location": lambda: None, @@ -452,17 +479,28 @@ class Artist(Base): TITEL = "name" # This is automatically generated - def __init__(self, name: str = "", unified_name: str = None, country: Country = None, - formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None, - general_genre: str = None, unformatted_location: str = None, source_list: List[Source] = None, - contact_list: List[Contact] = None, feature_song_list: List[Song] = None, - main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None: + def __init__( + self, + name: str = None, + unified_name: str = None, + country: Country = None, + formed_in: ID3Timestamp = None, + notes: FormattedText = None, + lyrical_themes: List[str] = None, + general_genre: str = None, + unformatted_location: str = None, + source_list: List[Source] = None, + contact_list: List[Contact] = None, + feature_song_list: List[Song] = None, + main_album_list: List[Album] = None, + label_list: List[Label] = None, + **kwargs + ) -> None: + real_kwargs = copy.copy(locals()) + real_kwargs.update(real_kwargs.pop("kwargs", {})) + + Base.__init__(**real_kwargs) - super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes, - lyrical_themes=lyrical_themes, general_genre=general_genre, - unformatted_location=unformatted_location, source_list=source_list, contact_list=contact_list, - feature_song_list=feature_song_list, main_album_list=main_album_list, label_list=label_list, - **kwargs) DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection") UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",) @@ -615,12 +653,21 @@ class Label(Base): TITEL = "name" - def __init__(self, name: str = None, unified_name: str = None, notes: FormattedText = None, - source_list: List[Source] = None, contact_list: List[Contact] = None, - album_list: List[Album] = None, current_artist_list: List[Artist] = None, **kwargs) -> None: - super().__init__(name=name, unified_name=unified_name, notes=notes, source_list=source_list, - contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list, - **kwargs) + def __init__( + self, + name: str = None, + unified_name: str = None, + notes: FormattedText = None, + source_list: List[Source] = None, + contact_list: List[Contact] = None, + album_list: List[Album] = None, + current_artist_list: List[Artist] = None, + **kwargs + ) -> None: + real_kwargs = copy.copy(locals()) + real_kwargs.update(real_kwargs.pop("kwargs", {})) + + Base.__init__(**real_kwargs) def __init_collections__(self): self.album_collection.append_object_to_attribute = { diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 080f310..e322048 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -254,7 +254,7 @@ class Page: } if obj_type in fetch_map: - music_object = fetch_map[obj_type](source, stop_at_level) + music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level) else: self.LOGGER.warning(f"Can't fetch details of type: {obj_type}") return None diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 242d0fa..a5c3f10 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -785,7 +785,7 @@ class Musify(Page): return album - def _fetch_initial_artist(self, url: MusifyUrl, source: Source) -> Artist: + def _fetch_initial_artist(self, url: MusifyUrl, source: Source, **kwargs) -> Artist: """ https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent """ @@ -907,7 +907,7 @@ class Musify(Page): notes=notes ) - def _parse_album_card(self, album_card: BeautifulSoup, source: Source, artist_name: str = None, **kwargs) -> Album: + def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album: """
@@ -932,7 +932,7 @@ class Musify(Page): """ album_kwargs: Dict[str, Any] = { - "source_list": [source], + "source_list": [], } name: str = None @@ -1069,7 +1069,7 @@ class Musify(Page): soup: BeautifulSoup = self.get_soup_from_response(r) for card_soup in soup.find_all("div", {"class": "card"}): - album = self._parse_album_card(card_soup, source, artist_name, **kwargs) + album = self._parse_album_card(card_soup, artist_name, **kwargs) if album.album_type in _album_type_blacklist: continue From a97f8872c8eed63016a7f61f9ab9f715b8e7bc96 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 8 May 2024 09:57:11 +0200 Subject: [PATCH 4/6] fix: refetching release title from album card --- music_kraken/pages/musify.py | 47 ++++++------------------------------ 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index a5c3f10..ebcb8e6 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -935,34 +935,6 @@ class Musify(Page): "source_list": [], } - name: str = None - source_list: List[Source] = [] - timestamp: Optional[ID3Timestamp] = None - album_status = None - - def set_name(new_name: str): - nonlocal album_kwargs - nonlocal name - nonlocal artist_name - - # example of just setting not working: - # https://musify.club/release/unjoy-eurythmie-psychonaut-4-tired-numb-still-alive-2012-324067 - if new_name.count(" - ") != 1: - name = new_name - return - - potential_artist_list, potential_name = new_name.split(" - ") - unified_artist_list = string_processing.unify(potential_artist_list) - if artist_name is not None: - if string_processing.unify(artist_name) not in unified_artist_list: - name = new_name - return - - name = potential_name - return - - name = new_name - album_status_id = album_card.get("data-type") if album_status_id.isdigit(): album_status_id = int(album_status_id) @@ -973,8 +945,6 @@ class Musify(Page): def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False): nonlocal album_kwargs - nonlocal name - nonlocal source_list if _anchor is None: return @@ -987,6 +957,9 @@ class Musify(Page): self.HOST + href )) + if text_is_name: + album_kwargs["title"] = clean_song_title(_anchor.text, artist_name) + anchor_list = album_card.find_all("a", recursive=False) if len(anchor_list) > 0: anchor = anchor_list[0] @@ -996,7 +969,7 @@ class Musify(Page): if thumbnail is not None: alt = thumbnail.get("alt") if alt is not None: - set_name(alt) + album_kwargs["title"] = clean_song_title(alt, artist_name) image_url = thumbnail.get("src") else: @@ -1013,7 +986,7 @@ class Musify(Page): 13.11.2021 """ - nonlocal timestamp + nonlocal album_kwargs italic_tagging_soup: BeautifulSoup = small_soup.find("i") if italic_tagging_soup is None: @@ -1023,7 +996,7 @@ class Musify(Page): return raw_time = small_soup.text.strip() - timestamp = ID3Timestamp.strptime(raw_time, "%d.%m.%Y") + album_kwargs["date"] = ID3Timestamp.strptime(raw_time, "%d.%m.%Y") # parse small date card_footer_list = album_card.find_all("div", {"class": "card-footer"}) @@ -1036,13 +1009,7 @@ class Musify(Page): else: self.LOGGER.debug("there is not even 1 footer in the album card") - return Album( - title=name, - source_list=source_list, - date=timestamp, - album_type=album_type, - album_status=album_status, - ) + return Album(**album_kwargs) def _fetch_artist_discography(self, artist: Artist, url: MusifyUrl, artist_name: str = None, **kwargs): """ From 9c63e8e55ae516042ae15fe04bde1995b00faaf3 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 8 May 2024 12:09:41 +0200 Subject: [PATCH 5/6] fix: correct collections --- music_kraken/objects/collection.py | 59 ++++++++++++++++-------------- music_kraken/objects/parents.py | 4 ++ music_kraken/objects/song.py | 12 +----- 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 255caaa..9fd9f90 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -2,6 +2,8 @@ from __future__ import annotations from collections import defaultdict from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set +import copy + from .parents import OuterProxy from ..utils import object_trace from ..utils import output, BColors @@ -47,8 +49,15 @@ class Collection(Generic[T]): self.extend(data) + def __hash__(self) -> int: + return id(self) + + @property + def collection_names(self) -> List[str]: + return list(set(self._collection_for.values())) + def __repr__(self) -> str: - return f"Collection({' | '.join(self._collection_for.values())} {id(self)})" + return f"Collection({' | '.join(self.collection_names)} {id(self)})" def _map_element(self, __object: T, no_unmap: bool = False, **kwargs): if not no_unmap: @@ -104,8 +113,9 @@ class Collection(Generic[T]): """ self._data.append(other) + other._inner._is_in_collection.add(self) - # all of the existing hooks to get the defined datastructure + # all of the existing hooks to get the defined datastructures for collection_attribute, generator in self.extend_object_to_attribute.items(): other.__getattribute__(collection_attribute).extend(generator, **kwargs) @@ -148,32 +158,28 @@ class Collection(Generic[T]): object_trace(f"Appending {other.option_string} to {self}") - - for c in self.pull_from: - r = c._find_object(other) - if r is not None: - output("found pull from", r, other, self, color=BColors.RED, sep="\t") - other.merge(r, **kwargs) - c.remove(r, existing=r, **kwargs) - break - - existing_object = self._find_object(other) - # switching collection in the case of push to for c in self.push_to: r = c._find_object(other) if r is not None: - output("found push to", r, other, self, color=BColors.RED, sep="\t") - if existing_object is not None: - self.remove(existing_object) + # output("found push to", r, other, c, self, color=BColors.RED, sep="\t") return c.append(other, **kwargs) + + for c in self.pull_from: + r = c._find_object(other) + if r is not None: + # output("found pull from", r, other, c, self, color=BColors.RED, sep="\t") + c.remove(r, existing=r, **kwargs) - if existing_object is None: + existing = self._find_object(other) + + if existing is None: self._append_new_object(other, **kwargs) else: - existing_object.merge(other, **kwargs) + existing.merge(other, **kwargs) - def remove(self, *other_list: List[T], silent: bool = False, existing: Optional[T] = None, **kwargs): + def remove(self, *other_list: List[T], silent: bool = False, existing: Optional[T] = None, remove_from_other_collection=True, **kwargs): + other: T for other in other_list: existing: Optional[T] = existing or self._indexed_values["id"].get(other.id, None) if existing is None: @@ -181,14 +187,13 @@ class Collection(Generic[T]): raise ValueError(f"Object {other} not found in {self}") return other - for collection_attribute, generator in self.extend_object_to_attribute.items(): - other.__getattribute__(collection_attribute).remove(*generator, silent=True, **kwargs) - - for attribute, new_object in self.append_object_to_attribute.items(): - other.__getattribute__(attribute).remove(new_object, silent=True, **kwargs) - - self._data.remove(existing) - self._unmap_element(existing) + if remove_from_other_collection: + for c in copy.copy(other._inner._is_in_collection): + c.remove(other, silent=True, remove_from_other_collection=False, **kwargs) + other._inner._is_in_collection = set() + else: + self._data.remove(existing) + self._unmap_element(existing) def contains(self, __object: T) -> bool: return self._find_object(__object) is not None diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index a79887a..b4f867a 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -29,12 +29,15 @@ class InnerData: """ _refers_to_instances: set = None + _is_in_collection: set = None """ Attribute versions keep track, of if the attribute has been changed. """ def __init__(self, object_type, **kwargs): self._refers_to_instances = set() + self._is_in_collection = set() + self._fetched_from: dict = {} # initialize the default values @@ -58,6 +61,7 @@ class InnerData: """ self._fetched_from.update(__other._fetched_from) + self._is_in_collection.update(__other._is_in_collection) for key, value in __other.__dict__.copy().items(): if key.startswith("_"): diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index fb4efc3..33f68a0 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -222,17 +222,9 @@ class Song(Base): r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value r += get_collection_string(self.album_collection, " from {}", ignore_titles={self.title}) r += get_collection_string(self.main_artist_collection, " by {}") - r += get_collection_string(self.feature_artist_collection, " feat. {}" if not self.main_artist_collection.empty or True else " by {}") + r += get_collection_string(self.feature_artist_collection, " feat. {}") return r - @property - def options(self) -> List[P]: - options = self.main_artist_collection.shallow_list - options.extend(self.feature_artist_collection) - options.extend(self.album_collection) - options.append(self) - return options - @property def tracksort_str(self) -> str: """ @@ -260,7 +252,6 @@ class Album(Base): barcode: str albumsort: int notes: FormattedText - artwork: Artwork source_collection: SourceCollection @@ -279,7 +270,6 @@ class Album(Base): "language": lambda: Language.by_alpha_2("en"), "date": ID3Timestamp, "notes": FormattedText, - "artwork": Artwork, "source_collection": SourceCollection, "artist_collection": Collection, From 9d4e3e8545d245bd5966cc91d33625a7cb36356d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 8 May 2024 12:23:16 +0200 Subject: [PATCH 6/6] fix: bounds get respected --- development/actual_donwload.py | 3 ++- music_kraken/cli/main_downloader.py | 8 ++++---- music_kraken/download/results.py | 3 +++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index c5c3890..548e228 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -7,7 +7,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ "s: #a Crystal F", - "10" + "10", + "2", ] diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index 73812cd..f9321b4 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -378,13 +378,13 @@ class Downloader: continue i = 0 - if possible_index.isdigit(): + try: i = int(possible_index) - else: + except ValueError: raise MKInvalidInputException(message=f"The index \"{possible_index}\" is not a number.") - if i < 0 and i >= len(self.current_results): - raise MKInvalidInputException(message=f"The index \"{i}\" is not within the bounds of 0-{len(self.current_results)}.") + if i < 0 or i >= len(self.current_results): + raise MKInvalidInputException(message=f"The index \"{i}\" is not within the bounds of 0-{len(self.current_results) - 1}.") indices.append(i) diff --git a/music_kraken/download/results.py b/music_kraken/download/results.py index a96d152..a8fead7 100644 --- a/music_kraken/download/results.py +++ b/music_kraken/download/results.py @@ -28,6 +28,9 @@ class Results: self._by_index = dict() self._page_by_index = dict() + def __len__(self) -> int: + return max(self._by_index.keys()) + def __getitem__(self, index: int): return self._by_index[index]