From 0f47cdadb8a98ef2a50b325ef790d2d6c8e26455 Mon Sep 17 00:00:00 2001 From: Hellow Date: Mon, 20 Mar 2023 23:11:55 +0100 Subject: [PATCH] some more scraping --- src/music_kraken/objects/song.py | 25 +++++++-------- src/music_kraken/pages/musify.py | 52 ++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 82a2cf0..d323e69 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -51,7 +51,7 @@ class Song(MainObject): "title": None, "unified_title": None, "isrc": None, - "length": None, + "length": None, "tracksort": 0, "genre": None, "notes": FormattedText() @@ -101,12 +101,12 @@ class Song(MainObject): for album in self.album_collection: if album.song_collection.insecure_append(self): album.compile() - + artist: Artist for artist in self.feature_artist_collection: if artist.feature_song_collection.insecure_append(self): artist.compile() - + for artist in self.main_artist_collection: if artist.main_album_collection.insecure_extend(self.album_collection): artist.compile() @@ -261,18 +261,16 @@ class Album(MainObject): for song in self.song_collection: if song.album_collection.insecure_append(self): song.compile() - + artist: Artist for artist in self.artist_collection: if artist.main_album_collection.insecure_append(self): artist.compile() - + label: Label for label in self.label_collection: if label.album_collection.insecure_append(self): label.compile() - - @property def indexing_values(self) -> List[Tuple[str, object]]: @@ -369,15 +367,14 @@ class Album(MainObject): return len(self.artist_collection) > 1 - - """ All objects dependent on Artist """ class Artist(MainObject): - COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection") + COLLECTION_ATTRIBUTES = ( + "feature_song_collection", "main_album_collection", "label_collection", "source_collection") SIMPLE_ATTRIBUTES = { "name": None, "unified_name": None, @@ -440,12 +437,12 @@ class Artist(MainObject): for song in self.feature_song_collection: if song.feature_artist_collection.insecure_append(self): song.compile() - + album: "Album" for album in self.main_album_collection: if album.artist_collection.insecure_append(self): album.compile() - + label: Label for label in self.label_collection: if label.current_artist_collection.insecure_append(self): @@ -584,7 +581,7 @@ class Label(MainObject): for album in self.album_collection: if album.label_collection.insecure_append(self): album.compile() - + artist: Artist for artist in self.current_artist_collection: if artist.label_collection.insecure_append(self): @@ -602,4 +599,4 @@ class Label(MainObject): def options(self) -> Options: options = [self] options.extend(self.current_artist_collection.shallow_list) - options.extend(self.album_collection.shallow_list) \ No newline at end of file + options.extend(self.album_collection.shallow_list) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 72cacdd..bdf6f4c 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -780,12 +780,18 @@ class Musify(Page): """ song_name = song_card.get("data-name") artist_list: List[Artist] = [] + source_list: List[Source] = [] tracksort = None + def parse_title(_title: str) -> str: + return _title + + """ # get from parent div _artist_name = song_card.get("data-artist") if _artist_name is not None: artist_list.append(Artist(name=_artist_name)) + """ # get tracksort tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"}) @@ -795,6 +801,48 @@ class Musify(Page): tracksort = int(raw_tracksort) # playlist details + playlist_details: BeautifulSoup = song_card.find("div", {"class": "playlist__details"}) + if playlist_details is not None: + """ +
+ Tamas ft.Zombiez - Voodoo (Feat. Zombiez) + + + + + + + + +
+ """ + # track + anchor_list: List[BeautifulSoup] = playlist_details.find_all("a") + if len(anchor_list) > 1: + track_anchor: BeautifulSoup = anchor_list[-1] + href: str = track_anchor.get("href") + if href is not None: + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) + song_name = parse_title(track_anchor.get_text(strip=True)) + + # artist + artist_span: BeautifulSoup + for artist_span in playlist_details.find_all("span", {"itemprop": "byArtist"}): + _artist_src = None + _artist_name = None + meta_artist_src = artist_span.find("meta", {"itemprop": "url"}) + if meta_artist_src is not None: + meta_artist_url = meta_artist_src.get("content") + if meta_artist_url is not None: + _artist_src = [Source(cls.SOURCE_TYPE, cls.HOST + meta_artist_url)] + + meta_artist_name = artist_span.find("meta", {"itemprop": "name"}) + if meta_artist_name is not None: + meta_artist_name_text = meta_artist_name.get("content") + _artist_name = meta_artist_name_text + + if _artist_name is not None or _artist_src is not None: + artist_list.append(Artist(name=_artist_name, source_list=_artist_src)) return Song( title=song_name, @@ -810,14 +858,14 @@ class Musify(Page): /html/musify/album_overview.html [] tracklist - [] attributes *(name and country... wooooow and I waste one request for this)* + [] attributes [] ratings :param source: :param flat: :return: """ - album = Album() + album = Album(title="Hi :)") url = cls.parse_url(source.url)