From 2724eb3e365483f9586b4ab96eeeb02c689ac545 Mon Sep 17 00:00:00 2001
From: Hellow <Lars.Noack@outlook.de>
Date: Sat, 18 Mar 2023 13:01:27 +0100
Subject: [PATCH] added fetching of linked sources to musify

---
 src/music_kraken/objects/song.py   |  2 +-
 src/music_kraken/objects/source.py | 19 ++++++++++++++-----
 src/music_kraken/pages/musify.py   | 26 ++++++++++++++++++++------
 src/musify_search.py               |  1 -
 4 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py
index 4169994..82a2cf0 100644
--- a/src/music_kraken/objects/song.py
+++ b/src/music_kraken/objects/song.py
@@ -377,7 +377,7 @@ All objects dependent on Artist
 
 
 class Artist(MainObject):
-    COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection")
+    COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection")
     SIMPLE_ATTRIBUTES = {
         "name": None,
         "unified_name": None,
diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py
index 7213575..3214513 100644
--- a/src/music_kraken/objects/source.py
+++ b/src/music_kraken/objects/source.py
@@ -1,6 +1,7 @@
 from collections import defaultdict
 from enum import Enum
-from typing import List, Dict, Tuple
+from typing import List, Dict, Tuple, Optional
+from urllib.parse import urlparse
 
 from .metadata import Mapping, Metadata
 from .parents import DatabaseObject
@@ -27,7 +28,8 @@ class SourcePages(Enum):
     # This has nothing to do with audio, but bands can be here
     INSTAGRAM = "instagram"
     FACEBOOK = "facebook"
-    TWITTER = "twitter" # I will use nitter though lol
+    TWITTER = "twitter"     # I will use nitter though lol
+    MYSPACE = "myspace"     # Yes somehow this ancient site is linked EVERYWHERE
 
     @classmethod
     def get_homepage(cls, attribute) -> str:
@@ -42,7 +44,8 @@ class SourcePages(Enum):
             cls.INSTAGRAM: "https://www.instagram.com/",
             cls.FACEBOOK: "https://www.facebook.com/",
             cls.SPOTIFY: "https://open.spotify.com/",
-            cls.TWITTER: "https://twitter.com/"
+            cls.TWITTER: "https://twitter.com/",
+            cls.MYSPACE: "https://myspace.com/"
         }
         return homepage_map[attribute]
 
@@ -71,11 +74,14 @@ class Source(DatabaseObject):
         self.url = url
 
     @classmethod
-    def match_url(cls, url: str):
+    def match_url(cls, url: str) -> Optional["Source"]:
         """
         this shouldn't be used, unlesse you are not certain what the source is for
         the reason is that it is more inefficient
         """
+        parsed = urlparse(url)
+        url = parsed.geturl()
+
         if url.startswith("https://www.youtube"):
             return cls(SourcePages.YOUTUBE, url)
 
@@ -101,6 +107,9 @@ class Source(DatabaseObject):
         if url.startswith("https://twitter"):
             return cls(SourcePages.TWITTER, url)
 
+        if url.startswith("https://myspace.com"):
+            return cls(SourcePages.MYSPACE, url)
+
     def get_song_metadata(self) -> Metadata:
         return Metadata({
             Mapping.FILE_WEBPAGE_URL: [self.url],
@@ -157,4 +166,4 @@ class SourceCollection(Collection):
         getting the sources for a specific page like
         YouTube or musify
         """
-        return self._page_to_source_list[source_page]
+        return self._page_to_source_list[source_page].copy()
diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py
index 1dfe02f..6059495 100644
--- a/src/music_kraken/pages/musify.py
+++ b/src/music_kraken/pages/musify.py
@@ -102,7 +102,7 @@ class Musify(Page):
     @classmethod
     def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
         source_list: List[Source] = []
-        name = ""
+        name = None
         _id = None
 
         # source
@@ -153,12 +153,15 @@ class Musify(Page):
         """
 
         source_list: List[Source] = []
-        title = ""
+        title = None
         _id = None
         year = None
         artist_list: List[Artist] = []
 
         def parse_title_date(title_date: Optional[str], delimiter: str = " - "):
+            nonlocal year
+            nonlocal title
+
             if title_date is None:
                 return
 
@@ -262,7 +265,7 @@ class Musify(Page):
     @classmethod
     def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song:
         _id = None
-        song_title = playlist_item_soup.get("data-name") or ""
+        song_title = playlist_item_soup.get("data-name")
         artist_list: List[Artist] = []
         source_list: List[Source] = []
 
@@ -415,7 +418,7 @@ class Musify(Page):
         })
 
         _id: Optional[str] = None
-        name: str = ""
+        name: str = None
         source_list: List[Source] = []
         timestamp: Optional[ID3Timestamp] = None
         album_status = None
@@ -586,7 +589,7 @@ class Musify(Page):
             </li>
         </ul>
         """
-        name = ""
+        name = None
         source_list: List[Source] = []
         country = None
         notes: FormattedText = None
@@ -625,6 +628,7 @@ class Musify(Page):
             if h1_name is not None:
                 name = h1_name.get_text(strip=True)
 
+        # country and sources
         icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"})
         if icon_list is not None:
             country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"})
@@ -646,10 +650,20 @@ class Musify(Page):
 
                     country = pycountry.countries.get(alpha_2=list(country_set)[0])
 
+            # get all additional sources
+            additional_source: BeautifulSoup
+            for additional_source in icon_list.find_all("a", {"class", "link"}):
+                href = additional_source.get("href")
+                if href is None:
+                    continue
+                new_src = Source.match_url(href)
+                if new_src is None:
+                    continue
+                source_list.append(new_src)
+
         note_soup: BeautifulSoup = soup.find(id="text-main")
         if note_soup is not None:
             notes = FormattedText(html=note_soup.decode_contents())
-            print(notes.plaintext)
 
         return Artist(
             _id=url.musify_id,
diff --git a/src/musify_search.py b/src/musify_search.py
index a95e172..bb80746 100644
--- a/src/musify_search.py
+++ b/src/musify_search.py
@@ -9,7 +9,6 @@ def search():
 
 def fetch_artist():
     artist = objects.Artist(
-        name="Ghost Bath",
         source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")]
     )