From 2e6c58255cb0318f443d2893825e0ffed8c59dbf Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Tue, 31 Jan 2023 13:18:52 +0100 Subject: [PATCH] fixed mutable argument bug in the source dict of SourceAttribute --- src/goof.py | 14 ++- src/music_kraken/database/database.py | 1 + src/music_kraken/database/objects/song.py | 8 ++ src/music_kraken/database/objects/source.py | 7 +- .../pages/encyclopaedia_metallum.py | 89 +++++++++++++++++- src/music_kraken/utils/string_processing.py | 9 ++ test.db | Bin 65536 -> 65536 bytes 7 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 src/music_kraken/utils/string_processing.py diff --git a/src/goof.py b/src/goof.py index a69807d..689b016 100644 --- a/src/goof.py +++ b/src/goof.py @@ -1,6 +1,7 @@ from music_kraken import ( Song, - Database + Database, + Artist ) from music_kraken.pages import ( @@ -20,8 +21,17 @@ def print_song(song_: Song): print(song_.source_list) print("album:") print(song_.album.source_list) + print("artist:") + print([a.source_list for a in song_.main_artist_list]) + print([a.source_list for a in song_.feature_artist_list]) print("\n") +def print_artist(artist: Artist): + print(artist) + print("---discography---") + for album in artist.discography: + print(album) + # only_smile = EncyclopaediaMetallum.search_by_query("only smile") # print(EncyclopaediaMetallum.search_by_query("#a Ghost Bath")) @@ -43,4 +53,6 @@ print_song(song) artist = song.main_artist_list[0] artist = EncyclopaediaMetallum.fetch_artist_details(artist) +print_artist(artist) + # print(only_smile) diff --git a/src/music_kraken/database/database.py b/src/music_kraken/database/database.py index 8cbe196..08e1e5b 100644 --- a/src/music_kraken/database/database.py +++ b/src/music_kraken/database/database.py @@ -329,6 +329,7 @@ class Database: for source in artist.source_list: source.type_enum = SourceTypes.ARTIST + source.add_song(artist) self.push_source(source) def pull_lyrics(self, song_ref: Reference = None, lyrics_ref: Reference = None) -> List[Lyrics]: diff --git a/src/music_kraken/database/objects/song.py b/src/music_kraken/database/objects/song.py index 1feb08e..6da7416 100644 --- a/src/music_kraken/database/objects/song.py +++ b/src/music_kraken/database/objects/song.py @@ -362,10 +362,15 @@ class Artist(DatabaseObject, SourceAttribute, MetadataAttribute): main_songs: List[Song] = None, feature_songs: List[Song] = None, main_albums: List[Album] = None, + album_type: str = None, notes: str = None ): DatabaseObject.__init__(self, id_=id_) + """ + TODO implement album type and notes + """ + self.album_type = album_type self.notes = notes if main_albums is None: @@ -391,6 +396,9 @@ class Artist(DatabaseObject, SourceAttribute, MetadataAttribute): def __repr__(self): return self.__str__() + def __eq__(self, __o: object) -> bool: + return self.id_ == __o.id_ + def get_features(self) -> Album: feature_release = Album( title="features", diff --git a/src/music_kraken/database/objects/source.py b/src/music_kraken/database/objects/source.py index 0b5e3c5..444862c 100644 --- a/src/music_kraken/database/objects/source.py +++ b/src/music_kraken/database/objects/source.py @@ -88,7 +88,12 @@ class SourceAttribute: This is a class that is meant to be inherited from. it adds the source_list attribute to a class """ - _source_dict: Dict[object, List[Source]] = {page_enum: list() for page_enum in SourcePages} + _source_dict: Dict[object, List[Source]] + + def __new__(cls, **_): + new = object.__new__(cls) + new._source_dict = {page_enum: list() for page_enum in SourcePages} + return new def add_source(self, source: Source): """ diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 094e4f5..7d6e37e 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -13,7 +13,11 @@ from ..database import ( Source, SourcePages, Song, - Album + Album, + ID3Timestamp +) +from ..utils import ( + string_processing ) @@ -195,6 +199,71 @@ class EncyclopaediaMetallum(Page): ] ) + @classmethod + def add_dicography(cls, artist: Artist, ma_artist_id: str) -> Artist: + discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all" + + # prepare tracklist + album_by_url = dict() + album_by_name = dict() + for album in artist.main_albums: + album_by_name[string_processing.unify(album.title)] = album + for source in album.get_sources_from_page(cls.SOURCE_TYPE): + album_by_url[source.url] = Album + old_discography = artist.main_albums.copy() + # save the ids of the albums, that are added to this set, so I can + # efficiently add all leftover albums from the discograpy to the new one + used_ids = set() + + new_discography: List[Album] = [] + + r = cls.API_SESSION.get(discography_url.format(ma_artist_id)) + if r.status_code != 200: + LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}") + return artist + + soup = BeautifulSoup(r.text, 'html.parser') + + tbody_soup = soup.find('tbody') + for tr_soup in tbody_soup.find_all('tr'): + td_list = tr_soup.findChildren(recursive=False) + + album_soup = td_list[0] + album_name = album_soup.text + album_url = album_soup.find('a').get('href') + album_id = album_url.split('/')[-1] + album_type = td_list[1].text + album_year = td_list[2].text + + unified_name = string_processing.unify(album_name) + + album_obj: Album = Album(id_=album_id) + if album_url in album_by_url: + album_obj = album_by_url[album_url] + used_ids.add(album_obj.id) + elif unified_name in album_by_name: + album_obj = album_by_name[unified_name] + album_obj.add_source(Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)) + used_ids.add(album_obj.id) + else: + album_obj.add_source(Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)) + + album_obj.title = album_name + album_obj.album_type = album_type + try: + album_obj.date = ID3Timestamp(year=int(album_year)) + except ValueError(): + pass + + new_discography.append(album_obj) + + for old_object in old_discography: + if old_object.id not in used_ids: + new_discography.append(old_object) + + artist.main_albums = new_discography + return artist + @classmethod def fetch_artist_details(cls, artist: Artist) -> Artist: source_list = artist.get_sources_from_page(cls.SOURCE_TYPE) @@ -203,6 +272,24 @@ class EncyclopaediaMetallum(Page): # taking the fist source, cuz I only need one and multiple sources don't make that much sense source = source_list[0] + artist_id = source.url.split("/")[-1] print(source) + print("id", artist_id) + + """ + https://www.metal-archives.com/bands/Ghost_Bath/3540372489 + https://www.metal-archives.com/band/discography/id/3540372489/tab/all + ---review--- + https://www.metal-archives.com/review/ajax-list-band/id/3540372489/json/1?sEcho=1&iColumns=4&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&iSortCol_0=3&sSortDir_0=desc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&_=1675155257133 + ---simmilar-bands--- + https://www.metal-archives.com/band/ajax-recommendations/id/3540372489 + ---external-sources--- + https://www.metal-archives.com/link/ajax-list/type/band/id/3540372489 + """ + + # SIMPLE METADATA + + # DISCOGRAPHY + artist = cls.add_dicography(artist, artist_id) return artist diff --git a/src/music_kraken/utils/string_processing.py b/src/music_kraken/utils/string_processing.py new file mode 100644 index 0000000..46edb8c --- /dev/null +++ b/src/music_kraken/utils/string_processing.py @@ -0,0 +1,9 @@ +def unify(string: str) -> str: + """ + returns an unified str, to make comparosons easy. + an unified string has following attributes: + - is lowercase + """ + + return string.lower() + \ No newline at end of file diff --git a/test.db b/test.db index f30bb9618315b5b409b7a1741b70dc4cc62117ad..b941c19337cdb85a6d0599e214c03cb7e2d6d967 100644 GIT binary patch delta 522 zcma)&J4ypl7)BFkq8ZWLSSW~{1ho0_z8@lq8xYb=X>sR1z)m!MCXKZ)-~wELirI#p zje?*nu(B1LAP9ET9}eI7$}%m>^kEsVrY&@bc6&FybiUF5TCe)UwLu{xmC*&D%tYV} zYcN)8P)1v&15uXo>UiixmSp+1nYFuFM@H>kkPI}J9Jmo23LFhYsu(5;2XC#tn4Dcs zs*llD^*KB|YPFs_PuVP6`jdDjyogMKjs*cusQ?p902i%_UP)!5{x9Lbl5xA6Z|xgf5n-bS!;g-+2uda0{V&~!h^+fAy|1=u7tS%6J) wmiCf78#Gz{IDS1CQ0gs}!GTxx!<>0re`P3GZY|a_NTu-nd-{$Vk+;|9KLA6DyZ`_I delta 534 zcma)&u}T9$5QdZ7p*f4}rHCRaYyzek-M!tt+i6T|Bc{31Vs~#B1O?Gp=dQ66guFrQ zRm{sM2>K#|7Z4kpYlbPl`DeZ<%djlNmkqiVx5yJ2_m;i*u^;zjMZzfG!D`Trf`SWA zePTc{$3Z08gW{BdERs}vL@EoTFU;-b;%l|Xv-6wT@ATf{elkp}pz{~OolWW(37PW>gm(3Q%hw%G4MWKo&HhIm zOxxXjfWunAb1G%QJ=hEl2*y~gq8y;s8m&^4NxJx$z~(c#BWL7o)sIied>rKMChFlD zY@!*g!6q`D{UDD*3BCWrRv@Swe&$1#DjLOyb{F5{ymk(Dz DOc{)L