This commit is contained in:
Hellow 2023-03-17 13:11:18 +01:00
parent a27c4e28c3
commit a01ea0cfcd

View File

@ -54,9 +54,10 @@ SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest X-Requested-With: XMLHttpRequest
""" """
class MusifyTypes(Enum): class MusifyTypes(Enum):
ARTIST = "artist" ARTIST = "artist"
@dataclass @dataclass
class MusifyUrl: class MusifyUrl:
@ -77,7 +78,7 @@ class Musify(Page):
API_SESSION.proxies = shared.proxies API_SESSION.proxies = shared.proxies
SOURCE_TYPE = SourcePages.MUSIFY SOURCE_TYPE = SourcePages.MUSIFY
HOST = "https://musify.club" HOST = "https://musify.club"
@classmethod @classmethod
@ -112,39 +113,39 @@ class Musify(Page):
LOGGER.warning("too many tries, returning") LOGGER.warning("too many tries, returning")
return None return None
return BeautifulSoup(r.content, features="html.parser") return BeautifulSoup(r.content, features="html.parser")
@classmethod @classmethod
def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
source_list: List[Source] = [] source_list: List[Source] = []
name = "" name = ""
_id = None _id = None
# source # source
anchor = contact.find("a") anchor = contact.find("a")
if anchor is not None: if anchor is not None:
href = anchor.get("href") href = anchor.get("href")
name = anchor.get("title") name = anchor.get("title")
if "-" in href: if "-" in href:
_id = href.split("-")[-1] _id = href.split("-")[-1]
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
# artist image # artist image
image_soup = contact.find("img") image_soup = contact.find("img")
if image_soup is not None: if image_soup is not None:
alt = image_soup.get("alt") alt = image_soup.get("alt")
if alt is not None: if alt is not None:
name = alt name = alt
artist_thumbnail = image_soup.get("src") artist_thumbnail = image_soup.get("src")
return Artist( return Artist(
_id=_id, _id=_id,
name=name, name=name,
source_list=source_list source_list=source_list
) )
@classmethod @classmethod
def parse_album_contact(cls, contact: BeautifulSoup) -> Album: def parse_album_contact(cls, contact: BeautifulSoup) -> Album:
""" """
@ -165,51 +166,50 @@ class Musify(Page):
</a> </a>
</div> </div>
""" """
source_list: List[Source] = [] source_list: List[Source] = []
title = "" title = ""
_id = None _id = None
year = None year = None
artist_list: List[Artist] = [] artist_list: List[Artist] = []
def parse_title_date(title_date: Optional[str], delimiter: str = " - "): def parse_title_date(title_date: Optional[str], delimiter: str = " - "):
if title_date is None: if title_date is None:
return return
title_date = title_date.strip() title_date = title_date.strip()
split_attr = title_date.split(delimiter) split_attr = title_date.split(delimiter)
if len(split_attr) < 2: if len(split_attr) < 2:
return return
if not split_attr[-1].isdigit(): if not split_attr[-1].isdigit():
return return
year = int(split_attr[-1]) year = int(split_attr[-1])
title = delimiter.join(split_attr[:-1]) title = delimiter.join(split_attr[:-1])
# source # source
anchor = contact.find("a") anchor = contact.find("a")
if anchor is not None: if anchor is not None:
href = anchor.get("href") href = anchor.get("href")
# get the title and year # get the title and year
parse_title_date(anchor.get("title")) parse_title_date(anchor.get("title"))
if "-" in href: if "-" in href:
_id = href.split("-")[-1] _id = href.split("-")[-1]
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
# cover art # cover art
image_soup = contact.find("img") image_soup = contact.find("img")
if image_soup is not None: if image_soup is not None:
alt = image_soup.get("alt") alt = image_soup.get("alt")
if alt is not None: if alt is not None:
title = alt title = alt
cover_art = image_soup.get("src") cover_art = image_soup.get("src")
contact_info_soup = contact.find("div", {"class": "contacts__info"}) contact_info_soup = contact.find("div", {"class": "contacts__info"})
if contact_info_soup is not None: if contact_info_soup is not None:
""" """
@ -218,11 +218,11 @@ class Musify(Page):
<small>Треков: 4</small> <!--tracks--> <small>Треков: 4</small> <!--tracks-->
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small> <small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
""" """
title_soup = contact_info_soup.find("strong") title_soup = contact_info_soup.find("strong")
if title_soup is None: if title_soup is None:
parse_title_date(title_soup) parse_title_date(title_soup)
small_list = contact_info_soup.find_all("small") small_list = contact_info_soup.find_all("small")
if len(small_list) == 3: if len(small_list) == 3:
# artist # artist
@ -232,18 +232,17 @@ class Musify(Page):
for artist_str in raw_artist_str.split("&\r\n"): for artist_str in raw_artist_str.split("&\r\n"):
artist_str = artist_str.rstrip("& ...\r\n") artist_str = artist_str.rstrip("& ...\r\n")
artist_str = artist_str.strip() artist_str = artist_str.strip()
if artist_str.endswith("]") and "[" in artist_str: if artist_str.endswith("]") and "[" in artist_str:
artist_str = artist_str.rsplit("[", maxsplit=1)[0] artist_str = artist_str.rsplit("[", maxsplit=1)[0]
artist_list.append(Artist(name=artist_str)) artist_list.append(Artist(name=artist_str))
track_count_soup: BeautifulSoup = small_list[1] track_count_soup: BeautifulSoup = small_list[1]
rating_soup: BeautifulSoup = small_list[2] rating_soup: BeautifulSoup = small_list[2]
else: else:
LOGGER.warning("got an unequal ammount than 3 small elements") LOGGER.warning("got an unequal ammount than 3 small elements")
return Album( return Album(
_id=_id, _id=_id,
title=title, title=title,
@ -251,59 +250,59 @@ class Musify(Page):
date=ID3Timestamp(year=year), date=ID3Timestamp(year=year),
artist_list=artist_list artist_list=artist_list
) )
@classmethod @classmethod
def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]: def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]:
#print(contact_container_soup.prettify) # print(contact_container_soup.prettify)
contacts = [] contacts = []
# print(contact_container_soup) # print(contact_container_soup)
contact: BeautifulSoup contact: BeautifulSoup
for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}): for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}):
anchor_soup = contact.find("a") anchor_soup = contact.find("a")
if anchor_soup is not None: if anchor_soup is not None:
url = anchor_soup.get("href") url = anchor_soup.get("href")
if url is not None: if url is not None:
#print(url) # print(url)
if "artist" in url: if "artist" in url:
contacts.append(cls.parse_artist_contact(contact)) contacts.append(cls.parse_artist_contact(contact))
elif "release" in url: elif "release" in url:
contacts.append(cls.parse_album_contact(contact)) contacts.append(cls.parse_album_contact(contact))
return contacts return contacts
@classmethod @classmethod
def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song: def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song:
_id = None _id = None
song_title = playlist_item_soup.get("data-name") or "" song_title = playlist_item_soup.get("data-name") or ""
artist_list: List[Artist] = [] artist_list: List[Artist] = []
source_list: List[Source] = [] source_list: List[Source] = []
# details # details
playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"}) playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"})
if playlist_details is not None: if playlist_details is not None:
anchor_list = playlist_details.find_all("a") anchor_list = playlist_details.find_all("a")
if len(anchor_list) >= 2: if len(anchor_list) >= 2:
print(anchor_list) print(anchor_list)
# artists # artists
artist_anchor: BeautifulSoup artist_anchor: BeautifulSoup
for artist_anchor in anchor_list[:-1]: for artist_anchor in anchor_list[:-1]:
_id = None _id = None
href = artist_anchor.get("href") href = artist_anchor.get("href")
artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href) artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href)
if "-" in href: if "-" in href:
_id = href.split("-")[-1] _id = href.split("-")[-1]
artist_list.append(Artist( artist_list.append(Artist(
_id=_id, _id=_id,
name=artist_anchor.get_text(strip=True), name=artist_anchor.get_text(strip=True),
source_list=[artist_source] source_list=[artist_source]
)) ))
# track # track
track_soup: BeautifulSoup = anchor_list[-1] track_soup: BeautifulSoup = anchor_list[-1]
""" """
@ -318,11 +317,11 @@ class Musify(Page):
if raw_id.isdigit(): if raw_id.isdigit():
_id = raw_id _id = raw_id
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
else: else:
LOGGER.warning("there are not enough anchors (2) for artist and track") LOGGER.warning("there are not enough anchors (2) for artist and track")
LOGGER.warning(str(artist_list)) LOGGER.warning(str(artist_list))
""" """
artist_name = playlist_item_soup.get("data-artist") artist_name = playlist_item_soup.get("data-artist")
if artist_name is not None: if artist_name is not None:
@ -333,53 +332,53 @@ class Musify(Page):
raw_id = id_attribute.replace("playerDiv", "") raw_id = id_attribute.replace("playerDiv", "")
if raw_id.isdigit(): if raw_id.isdigit():
_id = raw_id _id = raw_id
return Song( return Song(
_id=_id, _id=_id,
title=song_title, title=song_title,
main_artist_list=artist_list, main_artist_list=artist_list,
source_list=source_list source_list=source_list
) )
@classmethod @classmethod
def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]: def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]:
song_list = [] song_list = []
for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}): for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}):
song_list.append(cls.parse_playlist_item(playlist_item_soup)) song_list.append(cls.parse_playlist_item(playlist_item_soup))
return song_list return song_list
@classmethod @classmethod
def plaintext_search(cls, query: str) -> Options: def plaintext_search(cls, query: str) -> Options:
search_results = [] search_results = []
search_soup = cls.get_soup_of_search(query=query) search_soup = cls.get_soup_of_search(query=query)
if search_soup is None: if search_soup is None:
return None return None
# album and songs # album and songs
# child of div class: contacts row # child of div class: contacts row
for contact_container_soup in search_soup.find_all("div", {"class": "contacts"}): for contact_container_soup in search_soup.find_all("div", {"class": "contacts"}):
search_results.extend(cls.parse_contact_container(contact_container_soup)) search_results.extend(cls.parse_contact_container(contact_container_soup))
# song # song
# div class: playlist__item # div class: playlist__item
for playlist_soup in search_soup.find_all("div", {"class": "playlist"}): for playlist_soup in search_soup.find_all("div", {"class": "playlist"}):
search_results.extend(cls.parse_playlist_soup(playlist_soup)) search_results.extend(cls.parse_playlist_soup(playlist_soup))
return Options(search_results) return Options(search_results)
@classmethod @classmethod
def parse_url(cls, url: str) -> MusifyUrl: def parse_url(cls, url: str) -> MusifyUrl:
parsed = urlparse(url) parsed = urlparse(url)
path = parsed.path.split("/") path = parsed.path.split("/")
split_name = path[2].split("-") split_name = path[2].split("-")
url_id = split_name[-1] url_id = split_name[-1]
name_for_url = "-".join(split_name[:-1]) name_for_url = "-".join(split_name[:-1])
return MusifyUrl( return MusifyUrl(
source_type=MusifyTypes(path[1]), source_type=MusifyTypes(path[1]),
name_without_id=name_for_url, name_without_id=name_for_url,
@ -387,7 +386,7 @@ class Musify(Page):
musify_id=url_id, musify_id=url_id,
url=url url=url
) )
@classmethod @classmethod
def parse_album_card(cls, album_card: BeautifulSoup) -> Album: def parse_album_card(cls, album_card: BeautifulSoup) -> Album:
""" """
@ -414,51 +413,47 @@ class Musify(Page):
""" """
name: str = "" name: str = ""
source_list: List[Source] = [] source_list: List[Source] = []
def parse_release_anchor(anchor: BeautifulSoup, text_is_name=False): def parse_release_anchor(anchor: BeautifulSoup, text_is_name=False):
if anchor is None: if anchor is None:
return return
source_list.append(Source( source_list.append(Source(
cls.SOURCE_TYPE, cls.SOURCE_TYPE,
cls.HOST + anchor.get("href") cls.HOST + anchor.get("href")
)) ))
if not text_is_name: if not text_is_name:
return return
name = anchor.text name = anchor.text
anchor_list = album_card.find_all("a", recursive=False) anchor_list = album_card.find_all("a", recursive=False)
if len(anchor_list) > 0: if len(anchor_list) > 0:
anchor = anchor_list[0] anchor = anchor_list[0]
parse_release_anchor(anchor) parse_release_anchor(anchor)
thumbnail: BeautifulSoup = anchor.find("img") thumbnail: BeautifulSoup = anchor.find("img")
if thumbnail is not None: if thumbnail is not None:
alt = thumbnail.get("alt") alt = thumbnail.get("alt")
if alt is not None: if alt is not None:
name = alt name = alt
image_url = thumbnail.get("src") image_url = thumbnail.get("src")
else: else:
LOGGER.debug("the card has no thumbnail or url") LOGGER.debug("the card has no thumbnail or url")
card_body = album_card.find("div", {"class": "card-body"}) card_body = album_card.find("div", {"class": "card-body"})
if card_body is not None: if card_body is not None:
parse_release_anchor(card_body.find("a"), text_is_name=True) parse_release_anchor(card_body.find("a"), text_is_name=True)
card_footer_list = album_card.find_all("div", {"class": "card-footer"}) card_footer_list = album_card.find_all("div", {"class": "card-footer"})
return Album( return Album(
title=name, title=name,
source_list=source_list source_list=source_list
) )
@classmethod @classmethod
def get_discography(cls, url: MusifyUrl) -> List[Album]: def get_discography(cls, url: MusifyUrl) -> List[Album]:
""" """
@ -468,27 +463,27 @@ class Musify(Page):
SortOrder.IsAscending: false SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest X-Requested-With: XMLHttpRequest
""" """
endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums" endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums"
r = cls.API_SESSION.post(url=endpoint, json={ r = cls.API_SESSION.post(url=endpoint, json={
"ArtistID": str(url.musify_id), "ArtistID": str(url.musify_id),
"SortOrder.Property": "dateCreated", "SortOrder.Property": "dateCreated",
"SortOrder.IsAscending": False, "SortOrder.IsAscending": False,
"X-Requested-With": "XMLHttpRequest" "X-Requested-With": "XMLHttpRequest"
}) })
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
print(r) print(r)
# print(soup.prettify) # print(soup.prettify)
discography: List[Album] = [] discography: List[Album] = []
for card_soup in soup.find_all("div", {"class": "card"}): for card_soup in soup.find_all("div", {"class": "card"}):
discography.append(cls.parse_album_card(card_soup)) discography.append(cls.parse_album_card(card_soup))
return discography return discography
@classmethod @classmethod
def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
""" """
@ -505,13 +500,13 @@ class Musify(Page):
Returns: Returns:
Artist: the artist fetched Artist: the artist fetched
""" """
print(source) print(source)
url = cls.parse_url(source.url) url = cls.parse_url(source.url)
print(url) print(url)
discography: List[Album] = cls.get_discography(url) discography: List[Album] = cls.get_discography(url)
return Artist( return Artist(
name="", name="",
main_album_list=discography main_album_list=discography
@ -522,10 +517,10 @@ class Musify(Page):
source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE) source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
if len(source_list) == 0: if len(source_list) == 0:
return artist return artist
for source in source_list: for source in source_list:
artist.merge(cls.get_artist_from_source(source, flat=flat)) artist.merge(cls.get_artist_from_source(source, flat=flat))
return artist return artist
@classmethod @classmethod