From f1e92a8464faa3eb4a96d0c3ca7adf8ce2385906 Mon Sep 17 00:00:00 2001
From: Hellow2 <lars.noack@outlook.de>
Date: Wed, 1 Feb 2023 09:10:05 +0100
Subject: [PATCH] added the fetching of the sources from ma regarding artists

---
 src/goof.py                                   |  6 ++++
 src/music_kraken/database/objects/source.py   | 33 ++++++++++++++++++-
 .../pages/encyclopaedia_metallum.py           | 16 ++++++++-
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/src/goof.py b/src/goof.py
index 31f24d5..90bf0f3 100644
--- a/src/goof.py
+++ b/src/goof.py
@@ -12,6 +12,11 @@ from music_kraken.pages import (
 test_db = Database("test.db")
 # test_db.reset()
 
+def print_source(source_obj):
+    print("---source---")
+    for source in source_obj.source_list:
+        print(source)
+
 def print_song(song_: Song):
     print(str(song_.metadata))
     print("----album--")
@@ -31,6 +36,7 @@ def print_artist(artist: Artist):
     print("---discography---")
     for album in artist.discography:
         print(album)
+    print_source(artist)
 
 
 # only_smile = EncyclopaediaMetallum.search_by_query("only smile")
diff --git a/src/music_kraken/database/objects/source.py b/src/music_kraken/database/objects/source.py
index 665ec76..b80ef89 100644
--- a/src/music_kraken/database/objects/source.py
+++ b/src/music_kraken/database/objects/source.py
@@ -66,6 +66,37 @@ class Source(DatabaseObject, SongAttribute, MetadataAttribute):
 
         self.url = url
 
+    @classmethod
+    def match_url(cls, url: str):
+        """
+        this shouldn't be used, unlesse you are not certain what the source is for
+        the reason is that it is more inefficient
+        """
+        if url.startswith("https://www.youtube"):
+            return cls(SourcePages.YOUTUBE, url)
+
+        if url.startswith("https://www.deezer"):
+            return cls(SourcePages.DEEZER, url)
+        
+        if url.startswith("https://open.spotify.com"):
+            return cls(SourcePages.SPOTIFY, url)
+
+        if "bandcamp" in url:
+            return cls(SourcePages.BANDCAMP, url)
+
+        if url.startswith("https://www.metal-archives.com/"):
+            return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url)
+
+        # the less important once
+        if url.startswith("https://www.facebook"):
+            return cls(SourcePages.FACEBOOK, url)
+
+        if url.startswith("https://www.instagram"):
+            return cls(SourcePages.INSTAGRAM, url)
+
+        if url.startswith("https://twitter"):
+            return cls(SourcePages.TWITTER, url)
+
     def get_song_metadata(self) -> MetadataAttribute.Metadata:
         return MetadataAttribute.Metadata({
             Mapping.FILE_WEBPAGE_URL: [self.url],
@@ -87,7 +118,7 @@ class Source(DatabaseObject, SongAttribute, MetadataAttribute):
         return super().get_metadata()
 
     def __str__(self):
-        return f"{self.page_enum}: {self.url}"
+        return self.__repr__()
 
     def __repr__(self) -> str:
         return f"Src({self.page_enum.value}: {self.url})"
diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py
index 2b8bdfc..31523b5 100644
--- a/src/music_kraken/pages/encyclopaedia_metallum.py
+++ b/src/music_kraken/pages/encyclopaedia_metallum.py
@@ -285,7 +285,21 @@ class EncyclopaediaMetallum(Page):
             LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")
             return artist
 
-        print(r.text)
+        soup = BeautifulSoup(r.text, 'html.parser')
+
+        artist_source = soup.find("div", {"id": "band_links_Official"})
+        merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"})
+        label_source = soup.find("div", {"id": "band_links_Labels"})
+
+        for tr in artist_source.find_all("td"):
+            a = tr.find("a")
+            url = a.get("href")
+
+            source = Source.match_url(url)
+            if source is None:
+                continue
+
+            artist.add_source(source)
 
         return artist