added the fetching of the sources from ma regarding artists

2023-02-01 09:10:05 +01:00
parent e48285c3b0
commit f1e92a8464
3 changed files with 53 additions and 2 deletions
--- a/src/goof.py
+++ b/src/goof.py
@@ -12,6 +12,11 @@ from music_kraken.pages import (
 test_db = Database("test.db")
 # test_db.reset()

+def print_source(source_obj):
+    print("---source---")
+    for source in source_obj.source_list:
+        print(source)
+
 def print_song(song_: Song):
    print(str(song_.metadata))
    print("----album--")
@@ -31,6 +36,7 @@ def print_artist(artist: Artist):
    print("---discography---")
    for album in artist.discography:
        print(album)
+    print_source(artist)


 # only_smile = EncyclopaediaMetallum.search_by_query("only smile")
--- a/src/music_kraken/database/objects/source.py
+++ b/src/music_kraken/database/objects/source.py
@@ -66,6 +66,37 @@ class Source(DatabaseObject, SongAttribute, MetadataAttribute):

        self.url = url

+    @classmethod
+    def match_url(cls, url: str):
+        """
+        this shouldn't be used, unlesse you are not certain what the source is for
+        the reason is that it is more inefficient
+        """
+        if url.startswith("https://www.youtube"):
+            return cls(SourcePages.YOUTUBE, url)
+
+        if url.startswith("https://www.deezer"):
+            return cls(SourcePages.DEEZER, url)
+        
+        if url.startswith("https://open.spotify.com"):
+            return cls(SourcePages.SPOTIFY, url)
+
+        if "bandcamp" in url:
+            return cls(SourcePages.BANDCAMP, url)
+
+        if url.startswith("https://www.metal-archives.com/"):
+            return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url)
+
+        # the less important once
+        if url.startswith("https://www.facebook"):
+            return cls(SourcePages.FACEBOOK, url)
+
+        if url.startswith("https://www.instagram"):
+            return cls(SourcePages.INSTAGRAM, url)
+
+        if url.startswith("https://twitter"):
+            return cls(SourcePages.TWITTER, url)
+
    def get_song_metadata(self) -> MetadataAttribute.Metadata:
        return MetadataAttribute.Metadata({
            Mapping.FILE_WEBPAGE_URL: [self.url],
@@ -87,7 +118,7 @@ class Source(DatabaseObject, SongAttribute, MetadataAttribute):
        return super().get_metadata()

    def __str__(self):
-        return f"{self.page_enum}: {self.url}"
+        return self.__repr__()

    def __repr__(self) -> str:
        return f"Src({self.page_enum.value}: {self.url})"
--- a/src/music_kraken/pages/encyclopaedia_metallum.py
+++ b/src/music_kraken/pages/encyclopaedia_metallum.py
@@ -285,7 +285,21 @@ class EncyclopaediaMetallum(Page):
            LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")
            return artist

-        print(r.text)
+        soup = BeautifulSoup(r.text, 'html.parser')
+
+        artist_source = soup.find("div", {"id": "band_links_Official"})
+        merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"})
+        label_source = soup.find("div", {"id": "band_links_Labels"})
+
+        for tr in artist_source.find_all("td"):
+            a = tr.find("a")
+            url = a.get("href")
+
+            source = Source.match_url(url)
+            if source is None:
+                continue
+
+            artist.add_source(source)

        return artist