From e48285c3b04631cda3debb6c7c9d046e25953f5e Mon Sep 17 00:00:00 2001 From: Hellow Date: Wed, 1 Feb 2023 00:07:13 +0100 Subject: [PATCH] started implementation of fetching from ma artis sources --- src/music_kraken/database/objects/source.py | 33 +++++++++++++++++- .../pages/encyclopaedia_metallum.py | 31 +++++++++++++--- src/test.db | Bin 65536 -> 65536 bytes 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/src/music_kraken/database/objects/source.py b/src/music_kraken/database/objects/source.py index 269f41f..665ec76 100644 --- a/src/music_kraken/database/objects/source.py +++ b/src/music_kraken/database/objects/source.py @@ -21,6 +21,14 @@ class SourcePages(Enum): GENIUS = "genius" MUSICBRAINZ = "musicbrainz" ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum" + BANDCAMP = "bandcamp" + DEEZER = "deezer" + SPOTIFY = "spotify" + + # This has nothing to do with audio, but bands can be here + INSTAGRAM = "instagram" + FACEBOOK = "facebook" + TWITTER = "twitter" # I will use nitter though lol @classmethod def get_homepage(cls, attribute) -> str: @@ -29,7 +37,13 @@ class SourcePages(Enum): cls.MUSIFY: "https://musify.club/", cls.MUSICBRAINZ: "https://musicbrainz.org/", cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/", - cls.GENIUS: "https://genius.com/" + cls.GENIUS: "https://genius.com/", + cls.BANDCAMP: "https://bandcamp.com/", + cls.DEEZER: "https://www.deezer.com/", + cls.INSTAGRAM: "https://www.instagram.com/", + cls.FACEBOOK: "https://www.facebook.com/", + cls.SPOTIFY: "https://open.spotify.com/", + cls.TWITTER: "https://twitter.com/" } return homepage_map[attribute] @@ -89,16 +103,33 @@ class SourceAttribute: it adds the source_list attribute to a class """ _source_dict: Dict[object, List[Source]] + source_url_map: Dict[str, Source] def __new__(cls, **kwargs): new = object.__new__(cls) new._source_dict = {page_enum: list() for page_enum in SourcePages} + new.source_url_map = dict() return new + def match_source_with_url(self, url: str) -> bool: + """ + this function returns true, if a source with this url exists, + else it returns false + :param url: + :return source_with_url_exists: + """ + return url in self.source_url_map + + def match_source(self, source: Source) -> bool: + return self.match_source_with_url(source.url) + def add_source(self, source: Source): """ adds a new Source to the sources """ + if self.match_source(source): + return + self.source_url_map[source.url] = source self._source_dict[source.page_enum].append(source) def get_sources_from_page(self, page_enum) -> List[Source]: diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 94804f7..2b8bdfc 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -200,10 +200,10 @@ class EncyclopaediaMetallum(Page): ) @classmethod - def add_dicography(cls, artist: Artist, ma_artist_id: str) -> Artist: + def fetch_artist_discography(cls, artist: Artist, ma_artist_id: str) -> Artist: """ TODO - I'd guess this funktion has quite some posibility for otimizations + I'd guess this funktion has quite some possibility for optimizations in form of performance and clean code """ discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all" @@ -217,16 +217,18 @@ class EncyclopaediaMetallum(Page): album_by_url[source.url] = album old_discography = artist.main_albums.copy() # save the ids of the albums, that are added to this set, so I can - # efficiently add all leftover albums from the discograpy to the new one + # efficiently add all leftover albums from the discography to the new one used_ids = set() new_discography: List[Album] = [] - + + # make the request r = cls.API_SESSION.get(discography_url.format(ma_artist_id)) if r.status_code != 200: LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}") return artist + # parse the html soup = BeautifulSoup(r.text, 'html.parser') tbody_soup = soup.find('tbody') @@ -264,11 +266,27 @@ class EncyclopaediaMetallum(Page): new_discography.append(album_obj) + # add the albums back, which weren't on this page for old_object in old_discography: if old_object.id not in used_ids: new_discography.append(old_object) artist.main_albums = new_discography + + return artist + + @classmethod + def fetch_artist_sources(cls, artist: Artist, ma_artist_id: str) -> Artist: + sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}" + + # make the request + r = cls.API_SESSION.get(sources_url.format(ma_artist_id)) + if r.status_code != 200: + LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}") + return artist + + print(r.text) + return artist @classmethod @@ -297,6 +315,9 @@ class EncyclopaediaMetallum(Page): # SIMPLE METADATA # DISCOGRAPHY - artist = cls.add_dicography(artist, artist_id) + artist = cls.fetch_artist_discography(artist, artist_id) + + # External Sources + artist = cls.fetch_artist_sources(artist, artist_id) return artist diff --git a/src/test.db b/src/test.db index 500c5cdf47a7bfab1a2908c9c116b6acfcb49ee8..42f100548b3c19afe1579226081daf3873953a7d 100644 GIT binary patch delta 788 zcma)3O=}ZD7@o~;(%odU(@<(CsTj0kL8h7gnEenXpa(@1iV}*4Cd}^6P@yG~tl&Xt za?p#HmVkf2yJGf72r5Rtxy}2&56?UwosQJ$NLwXgW_$kH z#Tl@)ax)JP0nF5a_6!cx4_ZC9puSN4WPfMh%7@B8z9+w%x{|5C-u|49A7$Q1%H#A@ zu~-zItn_Z=hPjYNtihYD4I0Js>fSk3(j)+1!n4|QO^OTCzw)FDC8bvMy0fw(x-KCu zb__&J-A5KNY~+SGKvu|z<1_Bqn9+6r;YM$_da4&y&(!};?aKAFW;<$JrO{gDE?eh~ zTg~LjS{2X1u4!o>?{pPWsm*y^K~~b#;Ci4NA;QFQk>xQHQ3Er?bQ)NmPaV^sv=!ZN zM+hoZ8){m-?>irzdNe2KP#D(SW hFqak6!nluAe^;8$2yh?3{Y2;<)Kozf(}|Yx{TME@*(?A6 delta 442 zcma)$KT85(0Egf6{&=t2y&9y5LP0@-ymJ4~TfCvIr5X-5;eGWE+9Dm?I)|nRjQawD zdxG~NS|SIRf*_$90&A3pg064i_8T6a=NANA5OBj1Bi)n_suHL*4>!;^Ktdl#S7?@b zkXq?RW}M2WbE!AD2|vWH_?~PE{V13H;-i*u$3e;`3xxu6xf|BfuTeGgv@0f11)y70 ziK#L&2W$emMSH-A5SSWKiAOBl_9(%+tm(LJQ4cE}suI1^)(zhx=e^TzFI+hJ@TjpJ zUOT1QF(bNLc1eV+t`Lpds)dbzi*8wVEHG58%9N0JDZXKZ&rT)&FahWhP2vw@bW&km zW}S9opLO9njp<0(iG;H`+9RfBDz+zM&sSxvn-;}(ofufQNynp}rl`Id{a(DRM^1-J hhhf6cS&>@-w2E@0B}ze-hXBm^dpKQi*@zqcpg$Mzd7=OS