diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 91f500c..defd77e 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -67,7 +67,7 @@ class Musify(Page): return None if r.status_code != 200: if r.status_code in [503] and trie < cls.TRIES: - LOGGER.warning(f"youtube blocked downloading. ({trie}-{cls.TRIES})") + LOGGER.warning(f"{cls.__name__} blocked downloading. ({trie}-{cls.TRIES})") LOGGER.warning(f"retrying in {cls.TIMEOUT} seconds again") time.sleep(cls.TIMEOUT) return cls.get_soup_of_search(query, trie=trie + 1) @@ -200,13 +200,15 @@ class Musify(Page): artist_str = artist_str.rstrip("& ...\r\n") artist_str = artist_str.strip() + if artist_str.endswith("]") and "[" in artist_str: + artist_str = artist_str.rsplit("[", maxsplit=1)[0] + artist_list.append(Artist(name=artist_str)) track_count_soup: BeautifulSoup = small_list[1] rating_soup: BeautifulSoup = small_list[2] else: LOGGER.warning("got an unequal ammount than 3 small elements") - return Album( @@ -240,10 +242,80 @@ class Musify(Page): contacts.append(cls.parse_album_contact(contact)) return contacts + @classmethod + def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song: + _id = None + song_title = playlist_item_soup.get("data-name") or "" + artist_list: List[Artist] = [] + source_list: List[Source] = [] + + # details + playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"}) + if playlist_details is not None: + anchor_list = playlist_details.find_all("a") + + if len(anchor_list) >= 2: + print(anchor_list) + # artists + artist_anchor: BeautifulSoup + for artist_anchor in anchor_list[:-1]: + _id = None + href = artist_anchor.get("href") + artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href) + if "-" in href: + _id = href.split("-")[-1] + + artist_list.append(Artist( + _id=_id, + name=artist_anchor.get_text(strip=True), + source_list=[artist_source] + )) + + # track + track_soup: BeautifulSoup = anchor_list[-1] + """ + TODO + this anchor text may have something like (feat. some artist) + which is not acceptable + """ + href = track_soup.get("href") + if href is not None: + if "-" in href: + raw_id: str = href.split("-")[-1] + if raw_id.isdigit(): + _id = raw_id + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) + + else: + LOGGER.warning("there are not enough anchors (2) for artist and track") + LOGGER.warning(str(artist_list)) + + """ + artist_name = playlist_item_soup.get("data-artist") + if artist_name is not None: + artist_list.append(Artist(name=artist_name)) + """ + id_attribute = playlist_item_soup.get("id") + if id_attribute is not None: + raw_id = id_attribute.replace("playerDiv", "") + if raw_id.isdigit(): + _id = raw_id + + return Song( + _id=_id, + title=song_title, + main_artist_list=artist_list, + source_list=source_list + ) + @classmethod def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]: - # print(playlist_soup.prettify) - return [] + song_list = [] + + for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}): + song_list.append(cls.parse_playlist_item(playlist_item_soup)) + + return song_list @classmethod def plaintext_search(cls, query: str) -> Options: diff --git a/src/musify_search.py b/src/musify_search.py index 5b28d5c..3164ad4 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -2,7 +2,7 @@ from music_kraken import objects from music_kraken.pages import Musify -results = Musify.search_by_query("#a Ghost Bath") +results = Musify.search_by_query("#a Lorna Shore #t Wrath") print(results) exit()