some more scraping

This commit is contained in:
Hellow 2023-03-20 23:11:55 +01:00
parent 6f7763ada5
commit 0f47cdadb8
2 changed files with 61 additions and 16 deletions

View File

@ -51,7 +51,7 @@ class Song(MainObject):
"title": None,
"unified_title": None,
"isrc": None,
"length": None,
"length": None,
"tracksort": 0,
"genre": None,
"notes": FormattedText()
@ -101,12 +101,12 @@ class Song(MainObject):
for album in self.album_collection:
if album.song_collection.insecure_append(self):
album.compile()
artist: Artist
for artist in self.feature_artist_collection:
if artist.feature_song_collection.insecure_append(self):
artist.compile()
for artist in self.main_artist_collection:
if artist.main_album_collection.insecure_extend(self.album_collection):
artist.compile()
@ -261,18 +261,16 @@ class Album(MainObject):
for song in self.song_collection:
if song.album_collection.insecure_append(self):
song.compile()
artist: Artist
for artist in self.artist_collection:
if artist.main_album_collection.insecure_append(self):
artist.compile()
label: Label
for label in self.label_collection:
if label.album_collection.insecure_append(self):
label.compile()
@property
def indexing_values(self) -> List[Tuple[str, object]]:
@ -369,15 +367,14 @@ class Album(MainObject):
return len(self.artist_collection) > 1
"""
All objects dependent on Artist
"""
class Artist(MainObject):
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection")
COLLECTION_ATTRIBUTES = (
"feature_song_collection", "main_album_collection", "label_collection", "source_collection")
SIMPLE_ATTRIBUTES = {
"name": None,
"unified_name": None,
@ -440,12 +437,12 @@ class Artist(MainObject):
for song in self.feature_song_collection:
if song.feature_artist_collection.insecure_append(self):
song.compile()
album: "Album"
for album in self.main_album_collection:
if album.artist_collection.insecure_append(self):
album.compile()
label: Label
for label in self.label_collection:
if label.current_artist_collection.insecure_append(self):
@ -584,7 +581,7 @@ class Label(MainObject):
for album in self.album_collection:
if album.label_collection.insecure_append(self):
album.compile()
artist: Artist
for artist in self.current_artist_collection:
if artist.label_collection.insecure_append(self):
@ -602,4 +599,4 @@ class Label(MainObject):
def options(self) -> Options:
options = [self]
options.extend(self.current_artist_collection.shallow_list)
options.extend(self.album_collection.shallow_list)
options.extend(self.album_collection.shallow_list)

View File

@ -780,12 +780,18 @@ class Musify(Page):
"""
song_name = song_card.get("data-name")
artist_list: List[Artist] = []
source_list: List[Source] = []
tracksort = None
def parse_title(_title: str) -> str:
return _title
"""
# get from parent div
_artist_name = song_card.get("data-artist")
if _artist_name is not None:
artist_list.append(Artist(name=_artist_name))
"""
# get tracksort
tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"})
@ -795,6 +801,48 @@ class Musify(Page):
tracksort = int(raw_tracksort)
# playlist details
playlist_details: BeautifulSoup = song_card.find("div", {"class": "playlist__details"})
if playlist_details is not None:
"""
<div class="playlist__heading">
<a href="/artist/tamas-141317" rel="nofollow">Tamas</a> ft.<a href="/artist/zombiez-630767" rel="nofollow">Zombiez</a> - <a class="strong" href="/track/tamas-zombiez-voodoo-feat-zombiez-16185276">Voodoo (Feat. Zombiez)</a>
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
<meta content="/artist/tamas-141317" itemprop="url" />
<meta content="Tamas" itemprop="name" />
</span>
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
<meta content="/artist/zombiez-630767" itemprop="url" />
<meta content="Zombiez" itemprop="name" />
</span>
</div>
"""
# track
anchor_list: List[BeautifulSoup] = playlist_details.find_all("a")
if len(anchor_list) > 1:
track_anchor: BeautifulSoup = anchor_list[-1]
href: str = track_anchor.get("href")
if href is not None:
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
song_name = parse_title(track_anchor.get_text(strip=True))
# artist
artist_span: BeautifulSoup
for artist_span in playlist_details.find_all("span", {"itemprop": "byArtist"}):
_artist_src = None
_artist_name = None
meta_artist_src = artist_span.find("meta", {"itemprop": "url"})
if meta_artist_src is not None:
meta_artist_url = meta_artist_src.get("content")
if meta_artist_url is not None:
_artist_src = [Source(cls.SOURCE_TYPE, cls.HOST + meta_artist_url)]
meta_artist_name = artist_span.find("meta", {"itemprop": "name"})
if meta_artist_name is not None:
meta_artist_name_text = meta_artist_name.get("content")
_artist_name = meta_artist_name_text
if _artist_name is not None or _artist_src is not None:
artist_list.append(Artist(name=_artist_name, source_list=_artist_src))
return Song(
title=song_name,
@ -810,14 +858,14 @@ class Musify(Page):
/html/musify/album_overview.html
[] tracklist
[] attributes *(name and country... wooooow and I waste one request for this)*
[] attributes
[] ratings
:param source:
:param flat:
:return:
"""
album = Album()
album = Album(title="Hi :)")
url = cls.parse_url(source.url)