added fetching of linked sources to musify
This commit is contained in:
parent
862c25dd35
commit
2724eb3e36
@ -377,7 +377,7 @@ All objects dependent on Artist
|
|||||||
|
|
||||||
|
|
||||||
class Artist(MainObject):
|
class Artist(MainObject):
|
||||||
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection")
|
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection")
|
||||||
SIMPLE_ATTRIBUTES = {
|
SIMPLE_ATTRIBUTES = {
|
||||||
"name": None,
|
"name": None,
|
||||||
"unified_name": None,
|
"unified_name": None,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Dict, Tuple
|
from typing import List, Dict, Tuple, Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from .metadata import Mapping, Metadata
|
from .metadata import Mapping, Metadata
|
||||||
from .parents import DatabaseObject
|
from .parents import DatabaseObject
|
||||||
@ -28,6 +29,7 @@ class SourcePages(Enum):
|
|||||||
INSTAGRAM = "instagram"
|
INSTAGRAM = "instagram"
|
||||||
FACEBOOK = "facebook"
|
FACEBOOK = "facebook"
|
||||||
TWITTER = "twitter" # I will use nitter though lol
|
TWITTER = "twitter" # I will use nitter though lol
|
||||||
|
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_homepage(cls, attribute) -> str:
|
def get_homepage(cls, attribute) -> str:
|
||||||
@ -42,7 +44,8 @@ class SourcePages(Enum):
|
|||||||
cls.INSTAGRAM: "https://www.instagram.com/",
|
cls.INSTAGRAM: "https://www.instagram.com/",
|
||||||
cls.FACEBOOK: "https://www.facebook.com/",
|
cls.FACEBOOK: "https://www.facebook.com/",
|
||||||
cls.SPOTIFY: "https://open.spotify.com/",
|
cls.SPOTIFY: "https://open.spotify.com/",
|
||||||
cls.TWITTER: "https://twitter.com/"
|
cls.TWITTER: "https://twitter.com/",
|
||||||
|
cls.MYSPACE: "https://myspace.com/"
|
||||||
}
|
}
|
||||||
return homepage_map[attribute]
|
return homepage_map[attribute]
|
||||||
|
|
||||||
@ -71,11 +74,14 @@ class Source(DatabaseObject):
|
|||||||
self.url = url
|
self.url = url
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def match_url(cls, url: str):
|
def match_url(cls, url: str) -> Optional["Source"]:
|
||||||
"""
|
"""
|
||||||
this shouldn't be used, unlesse you are not certain what the source is for
|
this shouldn't be used, unlesse you are not certain what the source is for
|
||||||
the reason is that it is more inefficient
|
the reason is that it is more inefficient
|
||||||
"""
|
"""
|
||||||
|
parsed = urlparse(url)
|
||||||
|
url = parsed.geturl()
|
||||||
|
|
||||||
if url.startswith("https://www.youtube"):
|
if url.startswith("https://www.youtube"):
|
||||||
return cls(SourcePages.YOUTUBE, url)
|
return cls(SourcePages.YOUTUBE, url)
|
||||||
|
|
||||||
@ -101,6 +107,9 @@ class Source(DatabaseObject):
|
|||||||
if url.startswith("https://twitter"):
|
if url.startswith("https://twitter"):
|
||||||
return cls(SourcePages.TWITTER, url)
|
return cls(SourcePages.TWITTER, url)
|
||||||
|
|
||||||
|
if url.startswith("https://myspace.com"):
|
||||||
|
return cls(SourcePages.MYSPACE, url)
|
||||||
|
|
||||||
def get_song_metadata(self) -> Metadata:
|
def get_song_metadata(self) -> Metadata:
|
||||||
return Metadata({
|
return Metadata({
|
||||||
Mapping.FILE_WEBPAGE_URL: [self.url],
|
Mapping.FILE_WEBPAGE_URL: [self.url],
|
||||||
@ -157,4 +166,4 @@ class SourceCollection(Collection):
|
|||||||
getting the sources for a specific page like
|
getting the sources for a specific page like
|
||||||
YouTube or musify
|
YouTube or musify
|
||||||
"""
|
"""
|
||||||
return self._page_to_source_list[source_page]
|
return self._page_to_source_list[source_page].copy()
|
||||||
|
@ -102,7 +102,7 @@ class Musify(Page):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
|
def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
|
||||||
source_list: List[Source] = []
|
source_list: List[Source] = []
|
||||||
name = ""
|
name = None
|
||||||
_id = None
|
_id = None
|
||||||
|
|
||||||
# source
|
# source
|
||||||
@ -153,12 +153,15 @@ class Musify(Page):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
source_list: List[Source] = []
|
source_list: List[Source] = []
|
||||||
title = ""
|
title = None
|
||||||
_id = None
|
_id = None
|
||||||
year = None
|
year = None
|
||||||
artist_list: List[Artist] = []
|
artist_list: List[Artist] = []
|
||||||
|
|
||||||
def parse_title_date(title_date: Optional[str], delimiter: str = " - "):
|
def parse_title_date(title_date: Optional[str], delimiter: str = " - "):
|
||||||
|
nonlocal year
|
||||||
|
nonlocal title
|
||||||
|
|
||||||
if title_date is None:
|
if title_date is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -262,7 +265,7 @@ class Musify(Page):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song:
|
def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song:
|
||||||
_id = None
|
_id = None
|
||||||
song_title = playlist_item_soup.get("data-name") or ""
|
song_title = playlist_item_soup.get("data-name")
|
||||||
artist_list: List[Artist] = []
|
artist_list: List[Artist] = []
|
||||||
source_list: List[Source] = []
|
source_list: List[Source] = []
|
||||||
|
|
||||||
@ -415,7 +418,7 @@ class Musify(Page):
|
|||||||
})
|
})
|
||||||
|
|
||||||
_id: Optional[str] = None
|
_id: Optional[str] = None
|
||||||
name: str = ""
|
name: str = None
|
||||||
source_list: List[Source] = []
|
source_list: List[Source] = []
|
||||||
timestamp: Optional[ID3Timestamp] = None
|
timestamp: Optional[ID3Timestamp] = None
|
||||||
album_status = None
|
album_status = None
|
||||||
@ -586,7 +589,7 @@ class Musify(Page):
|
|||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
"""
|
"""
|
||||||
name = ""
|
name = None
|
||||||
source_list: List[Source] = []
|
source_list: List[Source] = []
|
||||||
country = None
|
country = None
|
||||||
notes: FormattedText = None
|
notes: FormattedText = None
|
||||||
@ -625,6 +628,7 @@ class Musify(Page):
|
|||||||
if h1_name is not None:
|
if h1_name is not None:
|
||||||
name = h1_name.get_text(strip=True)
|
name = h1_name.get_text(strip=True)
|
||||||
|
|
||||||
|
# country and sources
|
||||||
icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"})
|
icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"})
|
||||||
if icon_list is not None:
|
if icon_list is not None:
|
||||||
country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"})
|
country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"})
|
||||||
@ -646,10 +650,20 @@ class Musify(Page):
|
|||||||
|
|
||||||
country = pycountry.countries.get(alpha_2=list(country_set)[0])
|
country = pycountry.countries.get(alpha_2=list(country_set)[0])
|
||||||
|
|
||||||
|
# get all additional sources
|
||||||
|
additional_source: BeautifulSoup
|
||||||
|
for additional_source in icon_list.find_all("a", {"class", "link"}):
|
||||||
|
href = additional_source.get("href")
|
||||||
|
if href is None:
|
||||||
|
continue
|
||||||
|
new_src = Source.match_url(href)
|
||||||
|
if new_src is None:
|
||||||
|
continue
|
||||||
|
source_list.append(new_src)
|
||||||
|
|
||||||
note_soup: BeautifulSoup = soup.find(id="text-main")
|
note_soup: BeautifulSoup = soup.find(id="text-main")
|
||||||
if note_soup is not None:
|
if note_soup is not None:
|
||||||
notes = FormattedText(html=note_soup.decode_contents())
|
notes = FormattedText(html=note_soup.decode_contents())
|
||||||
print(notes.plaintext)
|
|
||||||
|
|
||||||
return Artist(
|
return Artist(
|
||||||
_id=url.musify_id,
|
_id=url.musify_id,
|
||||||
|
@ -9,7 +9,6 @@ def search():
|
|||||||
|
|
||||||
def fetch_artist():
|
def fetch_artist():
|
||||||
artist = objects.Artist(
|
artist = objects.Artist(
|
||||||
name="Ghost Bath",
|
|
||||||
source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")]
|
source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user