This commit is contained in:
Hellow 2023-03-17 13:11:18 +01:00
parent a27c4e28c3
commit a01ea0cfcd

View File

@ -54,9 +54,10 @@ SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest
"""
class MusifyTypes(Enum):
ARTIST = "artist"
@dataclass
class MusifyUrl:
@ -77,7 +78,7 @@ class Musify(Page):
API_SESSION.proxies = shared.proxies
SOURCE_TYPE = SourcePages.MUSIFY
HOST = "https://musify.club"
@classmethod
@ -112,39 +113,39 @@ class Musify(Page):
LOGGER.warning("too many tries, returning")
return None
return BeautifulSoup(r.content, features="html.parser")
@classmethod
def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
source_list: List[Source] = []
name = ""
_id = None
# source
anchor = contact.find("a")
if anchor is not None:
href = anchor.get("href")
name = anchor.get("title")
if "-" in href:
_id = href.split("-")[-1]
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
# artist image
image_soup = contact.find("img")
if image_soup is not None:
alt = image_soup.get("alt")
if alt is not None:
name = alt
artist_thumbnail = image_soup.get("src")
return Artist(
_id=_id,
name=name,
source_list=source_list
)
@classmethod
def parse_album_contact(cls, contact: BeautifulSoup) -> Album:
"""
@ -165,51 +166,50 @@ class Musify(Page):
</a>
</div>
"""
source_list: List[Source] = []
title = ""
_id = None
year = None
artist_list: List[Artist] = []
def parse_title_date(title_date: Optional[str], delimiter: str = " - "):
if title_date is None:
return
title_date = title_date.strip()
title_date = title_date.strip()
split_attr = title_date.split(delimiter)
if len(split_attr) < 2:
return
if not split_attr[-1].isdigit():
return
year = int(split_attr[-1])
title = delimiter.join(split_attr[:-1])
# source
anchor = contact.find("a")
if anchor is not None:
href = anchor.get("href")
# get the title and year
parse_title_date(anchor.get("title"))
if "-" in href:
_id = href.split("-")[-1]
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
# cover art
image_soup = contact.find("img")
if image_soup is not None:
alt = image_soup.get("alt")
if alt is not None:
title = alt
cover_art = image_soup.get("src")
contact_info_soup = contact.find("div", {"class": "contacts__info"})
if contact_info_soup is not None:
"""
@ -218,11 +218,11 @@ class Musify(Page):
<small>Треков: 4</small> <!--tracks-->
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
"""
title_soup = contact_info_soup.find("strong")
if title_soup is None:
parse_title_date(title_soup)
small_list = contact_info_soup.find_all("small")
if len(small_list) == 3:
# artist
@ -232,18 +232,17 @@ class Musify(Page):
for artist_str in raw_artist_str.split("&\r\n"):
artist_str = artist_str.rstrip("& ...\r\n")
artist_str = artist_str.strip()
if artist_str.endswith("]") and "[" in artist_str:
artist_str = artist_str.rsplit("[", maxsplit=1)[0]
artist_list.append(Artist(name=artist_str))
track_count_soup: BeautifulSoup = small_list[1]
rating_soup: BeautifulSoup = small_list[2]
else:
LOGGER.warning("got an unequal ammount than 3 small elements")
return Album(
_id=_id,
title=title,
@ -251,59 +250,59 @@ class Musify(Page):
date=ID3Timestamp(year=year),
artist_list=artist_list
)
@classmethod
def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]:
#print(contact_container_soup.prettify)
# print(contact_container_soup.prettify)
contacts = []
# print(contact_container_soup)
contact: BeautifulSoup
for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}):
anchor_soup = contact.find("a")
if anchor_soup is not None:
url = anchor_soup.get("href")
if url is not None:
#print(url)
# print(url)
if "artist" in url:
contacts.append(cls.parse_artist_contact(contact))
elif "release" in url:
contacts.append(cls.parse_album_contact(contact))
return contacts
@classmethod
def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song:
_id = None
song_title = playlist_item_soup.get("data-name") or ""
artist_list: List[Artist] = []
source_list: List[Source] = []
# details
playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"})
if playlist_details is not None:
anchor_list = playlist_details.find_all("a")
if len(anchor_list) >= 2:
print(anchor_list)
# artists
artist_anchor: BeautifulSoup
artist_anchor: BeautifulSoup
for artist_anchor in anchor_list[:-1]:
_id = None
href = artist_anchor.get("href")
artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href)
if "-" in href:
_id = href.split("-")[-1]
artist_list.append(Artist(
_id=_id,
name=artist_anchor.get_text(strip=True),
source_list=[artist_source]
))
# track
track_soup: BeautifulSoup = anchor_list[-1]
"""
@ -318,11 +317,11 @@ class Musify(Page):
if raw_id.isdigit():
_id = raw_id
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
else:
LOGGER.warning("there are not enough anchors (2) for artist and track")
LOGGER.warning(str(artist_list))
"""
artist_name = playlist_item_soup.get("data-artist")
if artist_name is not None:
@ -333,53 +332,53 @@ class Musify(Page):
raw_id = id_attribute.replace("playerDiv", "")
if raw_id.isdigit():
_id = raw_id
return Song(
_id=_id,
title=song_title,
main_artist_list=artist_list,
source_list=source_list
)
@classmethod
def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]:
song_list = []
for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}):
song_list.append(cls.parse_playlist_item(playlist_item_soup))
return song_list
@classmethod
def plaintext_search(cls, query: str) -> Options:
search_results = []
search_soup = cls.get_soup_of_search(query=query)
if search_soup is None:
return None
# album and songs
# child of div class: contacts row
for contact_container_soup in search_soup.find_all("div", {"class": "contacts"}):
search_results.extend(cls.parse_contact_container(contact_container_soup))
# song
# div class: playlist__item
for playlist_soup in search_soup.find_all("div", {"class": "playlist"}):
search_results.extend(cls.parse_playlist_soup(playlist_soup))
return Options(search_results)
@classmethod
def parse_url(cls, url: str) -> MusifyUrl:
parsed = urlparse(url)
path = parsed.path.split("/")
split_name = path[2].split("-")
url_id = split_name[-1]
name_for_url = "-".join(split_name[:-1])
return MusifyUrl(
source_type=MusifyTypes(path[1]),
name_without_id=name_for_url,
@ -387,7 +386,7 @@ class Musify(Page):
musify_id=url_id,
url=url
)
@classmethod
def parse_album_card(cls, album_card: BeautifulSoup) -> Album:
"""
@ -414,51 +413,47 @@ class Musify(Page):
"""
name: str = ""
source_list: List[Source] = []
def parse_release_anchor(anchor: BeautifulSoup, text_is_name=False):
if anchor is None:
return
source_list.append(Source(
cls.SOURCE_TYPE,
cls.HOST + anchor.get("href")
))
if not text_is_name:
return
name = anchor.text
anchor_list = album_card.find_all("a", recursive=False)
if len(anchor_list) > 0:
anchor = anchor_list[0]
parse_release_anchor(anchor)
thumbnail: BeautifulSoup = anchor.find("img")
if thumbnail is not None:
alt = thumbnail.get("alt")
if alt is not None:
name = alt
image_url = thumbnail.get("src")
else:
LOGGER.debug("the card has no thumbnail or url")
card_body = album_card.find("div", {"class": "card-body"})
if card_body is not None:
parse_release_anchor(card_body.find("a"), text_is_name=True)
card_footer_list = album_card.find_all("div", {"class": "card-footer"})
return Album(
title=name,
source_list=source_list
)
@classmethod
def get_discography(cls, url: MusifyUrl) -> List[Album]:
"""
@ -468,27 +463,27 @@ class Musify(Page):
SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest
"""
endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums"
r = cls.API_SESSION.post(url=endpoint, json={
"ArtistID": str(url.musify_id),
"SortOrder.Property": "dateCreated",
"SortOrder.IsAscending": False,
"X-Requested-With": "XMLHttpRequest"
})
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
print(r)
# print(soup.prettify)
discography: List[Album] = []
for card_soup in soup.find_all("div", {"class": "card"}):
discography.append(cls.parse_album_card(card_soup))
return discography
@classmethod
def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
"""
@ -505,13 +500,13 @@ class Musify(Page):
Returns:
Artist: the artist fetched
"""
print(source)
url = cls.parse_url(source.url)
print(url)
discography: List[Album] = cls.get_discography(url)
return Artist(
name="",
main_album_list=discography
@ -522,10 +517,10 @@ class Musify(Page):
source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
if len(source_list) == 0:
return artist
for source in source_list:
artist.merge(cls.get_artist_from_source(source, flat=flat))
return artist
@classmethod