From f1e92a8464faa3eb4a96d0c3ca7adf8ce2385906 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Wed, 1 Feb 2023 09:10:05 +0100 Subject: [PATCH] added the fetching of the sources from ma regarding artists --- src/goof.py | 6 ++++ src/music_kraken/database/objects/source.py | 33 ++++++++++++++++++- .../pages/encyclopaedia_metallum.py | 16 ++++++++- 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/goof.py b/src/goof.py index 31f24d5..90bf0f3 100644 --- a/src/goof.py +++ b/src/goof.py @@ -12,6 +12,11 @@ from music_kraken.pages import ( test_db = Database("test.db") # test_db.reset() +def print_source(source_obj): + print("---source---") + for source in source_obj.source_list: + print(source) + def print_song(song_: Song): print(str(song_.metadata)) print("----album--") @@ -31,6 +36,7 @@ def print_artist(artist: Artist): print("---discography---") for album in artist.discography: print(album) + print_source(artist) # only_smile = EncyclopaediaMetallum.search_by_query("only smile") diff --git a/src/music_kraken/database/objects/source.py b/src/music_kraken/database/objects/source.py index 665ec76..b80ef89 100644 --- a/src/music_kraken/database/objects/source.py +++ b/src/music_kraken/database/objects/source.py @@ -66,6 +66,37 @@ class Source(DatabaseObject, SongAttribute, MetadataAttribute): self.url = url + @classmethod + def match_url(cls, url: str): + """ + this shouldn't be used, unlesse you are not certain what the source is for + the reason is that it is more inefficient + """ + if url.startswith("https://www.youtube"): + return cls(SourcePages.YOUTUBE, url) + + if url.startswith("https://www.deezer"): + return cls(SourcePages.DEEZER, url) + + if url.startswith("https://open.spotify.com"): + return cls(SourcePages.SPOTIFY, url) + + if "bandcamp" in url: + return cls(SourcePages.BANDCAMP, url) + + if url.startswith("https://www.metal-archives.com/"): + return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url) + + # the less important once + if url.startswith("https://www.facebook"): + return cls(SourcePages.FACEBOOK, url) + + if url.startswith("https://www.instagram"): + return cls(SourcePages.INSTAGRAM, url) + + if url.startswith("https://twitter"): + return cls(SourcePages.TWITTER, url) + def get_song_metadata(self) -> MetadataAttribute.Metadata: return MetadataAttribute.Metadata({ Mapping.FILE_WEBPAGE_URL: [self.url], @@ -87,7 +118,7 @@ class Source(DatabaseObject, SongAttribute, MetadataAttribute): return super().get_metadata() def __str__(self): - return f"{self.page_enum}: {self.url}" + return self.__repr__() def __repr__(self) -> str: return f"Src({self.page_enum.value}: {self.url})" diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 2b8bdfc..31523b5 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -285,7 +285,21 @@ class EncyclopaediaMetallum(Page): LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}") return artist - print(r.text) + soup = BeautifulSoup(r.text, 'html.parser') + + artist_source = soup.find("div", {"id": "band_links_Official"}) + merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"}) + label_source = soup.find("div", {"id": "band_links_Labels"}) + + for tr in artist_source.find_all("td"): + a = tr.find("a") + url = a.get("href") + + source = Source.match_url(url) + if source is None: + continue + + artist.add_source(source) return artist