added fetching of linked sources to musify
This commit is contained in:
parent
862c25dd35
commit
2724eb3e36
@ -377,7 +377,7 @@ All objects dependent on Artist
|
||||
|
||||
|
||||
class Artist(MainObject):
|
||||
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection")
|
||||
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection")
|
||||
SIMPLE_ATTRIBUTES = {
|
||||
"name": None,
|
||||
"unified_name": None,
|
||||
|
@ -1,6 +1,7 @@
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Tuple
|
||||
from typing import List, Dict, Tuple, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .metadata import Mapping, Metadata
|
||||
from .parents import DatabaseObject
|
||||
@ -28,6 +29,7 @@ class SourcePages(Enum):
|
||||
INSTAGRAM = "instagram"
|
||||
FACEBOOK = "facebook"
|
||||
TWITTER = "twitter" # I will use nitter though lol
|
||||
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
|
||||
|
||||
@classmethod
|
||||
def get_homepage(cls, attribute) -> str:
|
||||
@ -42,7 +44,8 @@ class SourcePages(Enum):
|
||||
cls.INSTAGRAM: "https://www.instagram.com/",
|
||||
cls.FACEBOOK: "https://www.facebook.com/",
|
||||
cls.SPOTIFY: "https://open.spotify.com/",
|
||||
cls.TWITTER: "https://twitter.com/"
|
||||
cls.TWITTER: "https://twitter.com/",
|
||||
cls.MYSPACE: "https://myspace.com/"
|
||||
}
|
||||
return homepage_map[attribute]
|
||||
|
||||
@ -71,11 +74,14 @@ class Source(DatabaseObject):
|
||||
self.url = url
|
||||
|
||||
@classmethod
|
||||
def match_url(cls, url: str):
|
||||
def match_url(cls, url: str) -> Optional["Source"]:
|
||||
"""
|
||||
this shouldn't be used, unlesse you are not certain what the source is for
|
||||
the reason is that it is more inefficient
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
url = parsed.geturl()
|
||||
|
||||
if url.startswith("https://www.youtube"):
|
||||
return cls(SourcePages.YOUTUBE, url)
|
||||
|
||||
@ -101,6 +107,9 @@ class Source(DatabaseObject):
|
||||
if url.startswith("https://twitter"):
|
||||
return cls(SourcePages.TWITTER, url)
|
||||
|
||||
if url.startswith("https://myspace.com"):
|
||||
return cls(SourcePages.MYSPACE, url)
|
||||
|
||||
def get_song_metadata(self) -> Metadata:
|
||||
return Metadata({
|
||||
Mapping.FILE_WEBPAGE_URL: [self.url],
|
||||
@ -157,4 +166,4 @@ class SourceCollection(Collection):
|
||||
getting the sources for a specific page like
|
||||
YouTube or musify
|
||||
"""
|
||||
return self._page_to_source_list[source_page]
|
||||
return self._page_to_source_list[source_page].copy()
|
||||
|
@ -102,7 +102,7 @@ class Musify(Page):
|
||||
@classmethod
|
||||
def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
|
||||
source_list: List[Source] = []
|
||||
name = ""
|
||||
name = None
|
||||
_id = None
|
||||
|
||||
# source
|
||||
@ -153,12 +153,15 @@ class Musify(Page):
|
||||
"""
|
||||
|
||||
source_list: List[Source] = []
|
||||
title = ""
|
||||
title = None
|
||||
_id = None
|
||||
year = None
|
||||
artist_list: List[Artist] = []
|
||||
|
||||
def parse_title_date(title_date: Optional[str], delimiter: str = " - "):
|
||||
nonlocal year
|
||||
nonlocal title
|
||||
|
||||
if title_date is None:
|
||||
return
|
||||
|
||||
@ -262,7 +265,7 @@ class Musify(Page):
|
||||
@classmethod
|
||||
def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song:
|
||||
_id = None
|
||||
song_title = playlist_item_soup.get("data-name") or ""
|
||||
song_title = playlist_item_soup.get("data-name")
|
||||
artist_list: List[Artist] = []
|
||||
source_list: List[Source] = []
|
||||
|
||||
@ -415,7 +418,7 @@ class Musify(Page):
|
||||
})
|
||||
|
||||
_id: Optional[str] = None
|
||||
name: str = ""
|
||||
name: str = None
|
||||
source_list: List[Source] = []
|
||||
timestamp: Optional[ID3Timestamp] = None
|
||||
album_status = None
|
||||
@ -586,7 +589,7 @@ class Musify(Page):
|
||||
</li>
|
||||
</ul>
|
||||
"""
|
||||
name = ""
|
||||
name = None
|
||||
source_list: List[Source] = []
|
||||
country = None
|
||||
notes: FormattedText = None
|
||||
@ -625,6 +628,7 @@ class Musify(Page):
|
||||
if h1_name is not None:
|
||||
name = h1_name.get_text(strip=True)
|
||||
|
||||
# country and sources
|
||||
icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"})
|
||||
if icon_list is not None:
|
||||
country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"})
|
||||
@ -646,10 +650,20 @@ class Musify(Page):
|
||||
|
||||
country = pycountry.countries.get(alpha_2=list(country_set)[0])
|
||||
|
||||
# get all additional sources
|
||||
additional_source: BeautifulSoup
|
||||
for additional_source in icon_list.find_all("a", {"class", "link"}):
|
||||
href = additional_source.get("href")
|
||||
if href is None:
|
||||
continue
|
||||
new_src = Source.match_url(href)
|
||||
if new_src is None:
|
||||
continue
|
||||
source_list.append(new_src)
|
||||
|
||||
note_soup: BeautifulSoup = soup.find(id="text-main")
|
||||
if note_soup is not None:
|
||||
notes = FormattedText(html=note_soup.decode_contents())
|
||||
print(notes.plaintext)
|
||||
|
||||
return Artist(
|
||||
_id=url.musify_id,
|
||||
|
@ -9,7 +9,6 @@ def search():
|
||||
|
||||
def fetch_artist():
|
||||
artist = objects.Artist(
|
||||
name="Ghost Bath",
|
||||
source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")]
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user