From a01ea0cfcd5ede94e863bb80d01c727b511cc1f7 Mon Sep 17 00:00:00 2001 From: Hellow Date: Fri, 17 Mar 2023 13:11:18 +0100 Subject: [PATCH] reformat --- src/music_kraken/pages/musify.py | 165 +++++++++++++++---------------- 1 file changed, 80 insertions(+), 85 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index c11154b..62506d7 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -54,9 +54,10 @@ SortOrder.IsAscending: false X-Requested-With: XMLHttpRequest """ + class MusifyTypes(Enum): ARTIST = "artist" - + @dataclass class MusifyUrl: @@ -77,7 +78,7 @@ class Musify(Page): API_SESSION.proxies = shared.proxies SOURCE_TYPE = SourcePages.MUSIFY - + HOST = "https://musify.club" @classmethod @@ -112,39 +113,39 @@ class Musify(Page): LOGGER.warning("too many tries, returning") return None return BeautifulSoup(r.content, features="html.parser") - + @classmethod def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: source_list: List[Source] = [] name = "" _id = None - + # source anchor = contact.find("a") if anchor is not None: href = anchor.get("href") name = anchor.get("title") - + if "-" in href: _id = href.split("-")[-1] - + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) - + # artist image image_soup = contact.find("img") if image_soup is not None: alt = image_soup.get("alt") if alt is not None: name = alt - + artist_thumbnail = image_soup.get("src") - + return Artist( _id=_id, name=name, source_list=source_list ) - + @classmethod def parse_album_contact(cls, contact: BeautifulSoup) -> Album: """ @@ -165,51 +166,50 @@ class Musify(Page): """ - + source_list: List[Source] = [] title = "" _id = None year = None artist_list: List[Artist] = [] - + def parse_title_date(title_date: Optional[str], delimiter: str = " - "): if title_date is None: return - - title_date = title_date.strip() + + title_date = title_date.strip() split_attr = title_date.split(delimiter) - + if len(split_attr) < 2: return if not split_attr[-1].isdigit(): return - + year = int(split_attr[-1]) title = delimiter.join(split_attr[:-1]) - + # source anchor = contact.find("a") if anchor is not None: href = anchor.get("href") - + # get the title and year parse_title_date(anchor.get("title")) - - + if "-" in href: _id = href.split("-")[-1] - + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) - + # cover art image_soup = contact.find("img") if image_soup is not None: alt = image_soup.get("alt") if alt is not None: title = alt - + cover_art = image_soup.get("src") - + contact_info_soup = contact.find("div", {"class": "contacts__info"}) if contact_info_soup is not None: """ @@ -218,11 +218,11 @@ class Musify(Page): Треков: 4 9,04 """ - + title_soup = contact_info_soup.find("strong") if title_soup is None: parse_title_date(title_soup) - + small_list = contact_info_soup.find_all("small") if len(small_list) == 3: # artist @@ -232,18 +232,17 @@ class Musify(Page): for artist_str in raw_artist_str.split("&\r\n"): artist_str = artist_str.rstrip("& ...\r\n") artist_str = artist_str.strip() - + if artist_str.endswith("]") and "[" in artist_str: artist_str = artist_str.rsplit("[", maxsplit=1)[0] - + artist_list.append(Artist(name=artist_str)) - + track_count_soup: BeautifulSoup = small_list[1] rating_soup: BeautifulSoup = small_list[2] else: LOGGER.warning("got an unequal ammount than 3 small elements") - - + return Album( _id=_id, title=title, @@ -251,59 +250,59 @@ class Musify(Page): date=ID3Timestamp(year=year), artist_list=artist_list ) - + @classmethod def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]: - #print(contact_container_soup.prettify) + # print(contact_container_soup.prettify) contacts = [] - + # print(contact_container_soup) - + contact: BeautifulSoup for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}): - + anchor_soup = contact.find("a") if anchor_soup is not None: url = anchor_soup.get("href") - + if url is not None: - #print(url) + # print(url) if "artist" in url: contacts.append(cls.parse_artist_contact(contact)) elif "release" in url: contacts.append(cls.parse_album_contact(contact)) return contacts - + @classmethod def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song: _id = None song_title = playlist_item_soup.get("data-name") or "" artist_list: List[Artist] = [] source_list: List[Source] = [] - + # details playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"}) if playlist_details is not None: anchor_list = playlist_details.find_all("a") - + if len(anchor_list) >= 2: print(anchor_list) # artists - artist_anchor: BeautifulSoup + artist_anchor: BeautifulSoup for artist_anchor in anchor_list[:-1]: _id = None href = artist_anchor.get("href") artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href) if "-" in href: _id = href.split("-")[-1] - + artist_list.append(Artist( _id=_id, name=artist_anchor.get_text(strip=True), source_list=[artist_source] )) - + # track track_soup: BeautifulSoup = anchor_list[-1] """ @@ -318,11 +317,11 @@ class Musify(Page): if raw_id.isdigit(): _id = raw_id source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) - + else: LOGGER.warning("there are not enough anchors (2) for artist and track") LOGGER.warning(str(artist_list)) - + """ artist_name = playlist_item_soup.get("data-artist") if artist_name is not None: @@ -333,53 +332,53 @@ class Musify(Page): raw_id = id_attribute.replace("playerDiv", "") if raw_id.isdigit(): _id = raw_id - + return Song( _id=_id, title=song_title, main_artist_list=artist_list, source_list=source_list ) - + @classmethod def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]: song_list = [] - + for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}): song_list.append(cls.parse_playlist_item(playlist_item_soup)) - + return song_list @classmethod def plaintext_search(cls, query: str) -> Options: search_results = [] - + search_soup = cls.get_soup_of_search(query=query) if search_soup is None: return None - + # album and songs # child of div class: contacts row for contact_container_soup in search_soup.find_all("div", {"class": "contacts"}): search_results.extend(cls.parse_contact_container(contact_container_soup)) - + # song # div class: playlist__item for playlist_soup in search_soup.find_all("div", {"class": "playlist"}): search_results.extend(cls.parse_playlist_soup(playlist_soup)) return Options(search_results) - + @classmethod def parse_url(cls, url: str) -> MusifyUrl: parsed = urlparse(url) - + path = parsed.path.split("/") - + split_name = path[2].split("-") url_id = split_name[-1] name_for_url = "-".join(split_name[:-1]) - + return MusifyUrl( source_type=MusifyTypes(path[1]), name_without_id=name_for_url, @@ -387,7 +386,7 @@ class Musify(Page): musify_id=url_id, url=url ) - + @classmethod def parse_album_card(cls, album_card: BeautifulSoup) -> Album: """ @@ -414,51 +413,47 @@ class Musify(Page): """ name: str = "" source_list: List[Source] = [] - + def parse_release_anchor(anchor: BeautifulSoup, text_is_name=False): if anchor is None: return - + source_list.append(Source( cls.SOURCE_TYPE, cls.HOST + anchor.get("href") )) - + if not text_is_name: return - + name = anchor.text - - + anchor_list = album_card.find_all("a", recursive=False) if len(anchor_list) > 0: anchor = anchor_list[0] parse_release_anchor(anchor) - + thumbnail: BeautifulSoup = anchor.find("img") if thumbnail is not None: alt = thumbnail.get("alt") if alt is not None: name = alt - + image_url = thumbnail.get("src") else: LOGGER.debug("the card has no thumbnail or url") - + card_body = album_card.find("div", {"class": "card-body"}) if card_body is not None: parse_release_anchor(card_body.find("a"), text_is_name=True) - - - + card_footer_list = album_card.find_all("div", {"class": "card-footer"}) - + return Album( title=name, source_list=source_list ) - - + @classmethod def get_discography(cls, url: MusifyUrl) -> List[Album]: """ @@ -468,27 +463,27 @@ class Musify(Page): SortOrder.IsAscending: false X-Requested-With: XMLHttpRequest """ - + endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums" - + r = cls.API_SESSION.post(url=endpoint, json={ "ArtistID": str(url.musify_id), "SortOrder.Property": "dateCreated", "SortOrder.IsAscending": False, "X-Requested-With": "XMLHttpRequest" }) - + soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") - + print(r) # print(soup.prettify) - + discography: List[Album] = [] for card_soup in soup.find_all("div", {"class": "card"}): discography.append(cls.parse_album_card(card_soup)) - + return discography - + @classmethod def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: """ @@ -505,13 +500,13 @@ class Musify(Page): Returns: Artist: the artist fetched """ - + print(source) url = cls.parse_url(source.url) print(url) - + discography: List[Album] = cls.get_discography(url) - + return Artist( name="", main_album_list=discography @@ -522,10 +517,10 @@ class Musify(Page): source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE) if len(source_list) == 0: return artist - + for source in source_list: artist.merge(cls.get_artist_from_source(source, flat=flat)) - + return artist @classmethod