some more scraping

This commit is contained in:
Hellow 2023-03-20 23:11:55 +01:00
parent 6f7763ada5
commit 0f47cdadb8
2 changed files with 61 additions and 16 deletions

View File

@ -51,7 +51,7 @@ class Song(MainObject):
"title": None, "title": None,
"unified_title": None, "unified_title": None,
"isrc": None, "isrc": None,
"length": None, "length": None,
"tracksort": 0, "tracksort": 0,
"genre": None, "genre": None,
"notes": FormattedText() "notes": FormattedText()
@ -272,8 +272,6 @@ class Album(MainObject):
if label.album_collection.insecure_append(self): if label.album_collection.insecure_append(self):
label.compile() label.compile()
@property @property
def indexing_values(self) -> List[Tuple[str, object]]: def indexing_values(self) -> List[Tuple[str, object]]:
return [ return [
@ -369,15 +367,14 @@ class Album(MainObject):
return len(self.artist_collection) > 1 return len(self.artist_collection) > 1
""" """
All objects dependent on Artist All objects dependent on Artist
""" """
class Artist(MainObject): class Artist(MainObject):
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection") COLLECTION_ATTRIBUTES = (
"feature_song_collection", "main_album_collection", "label_collection", "source_collection")
SIMPLE_ATTRIBUTES = { SIMPLE_ATTRIBUTES = {
"name": None, "name": None,
"unified_name": None, "unified_name": None,

View File

@ -780,12 +780,18 @@ class Musify(Page):
""" """
song_name = song_card.get("data-name") song_name = song_card.get("data-name")
artist_list: List[Artist] = [] artist_list: List[Artist] = []
source_list: List[Source] = []
tracksort = None tracksort = None
def parse_title(_title: str) -> str:
return _title
"""
# get from parent div # get from parent div
_artist_name = song_card.get("data-artist") _artist_name = song_card.get("data-artist")
if _artist_name is not None: if _artist_name is not None:
artist_list.append(Artist(name=_artist_name)) artist_list.append(Artist(name=_artist_name))
"""
# get tracksort # get tracksort
tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"}) tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"})
@ -795,6 +801,48 @@ class Musify(Page):
tracksort = int(raw_tracksort) tracksort = int(raw_tracksort)
# playlist details # playlist details
playlist_details: BeautifulSoup = song_card.find("div", {"class": "playlist__details"})
if playlist_details is not None:
"""
<div class="playlist__heading">
<a href="/artist/tamas-141317" rel="nofollow">Tamas</a> ft.<a href="/artist/zombiez-630767" rel="nofollow">Zombiez</a> - <a class="strong" href="/track/tamas-zombiez-voodoo-feat-zombiez-16185276">Voodoo (Feat. Zombiez)</a>
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
<meta content="/artist/tamas-141317" itemprop="url" />
<meta content="Tamas" itemprop="name" />
</span>
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
<meta content="/artist/zombiez-630767" itemprop="url" />
<meta content="Zombiez" itemprop="name" />
</span>
</div>
"""
# track
anchor_list: List[BeautifulSoup] = playlist_details.find_all("a")
if len(anchor_list) > 1:
track_anchor: BeautifulSoup = anchor_list[-1]
href: str = track_anchor.get("href")
if href is not None:
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
song_name = parse_title(track_anchor.get_text(strip=True))
# artist
artist_span: BeautifulSoup
for artist_span in playlist_details.find_all("span", {"itemprop": "byArtist"}):
_artist_src = None
_artist_name = None
meta_artist_src = artist_span.find("meta", {"itemprop": "url"})
if meta_artist_src is not None:
meta_artist_url = meta_artist_src.get("content")
if meta_artist_url is not None:
_artist_src = [Source(cls.SOURCE_TYPE, cls.HOST + meta_artist_url)]
meta_artist_name = artist_span.find("meta", {"itemprop": "name"})
if meta_artist_name is not None:
meta_artist_name_text = meta_artist_name.get("content")
_artist_name = meta_artist_name_text
if _artist_name is not None or _artist_src is not None:
artist_list.append(Artist(name=_artist_name, source_list=_artist_src))
return Song( return Song(
title=song_name, title=song_name,
@ -810,14 +858,14 @@ class Musify(Page):
/html/musify/album_overview.html /html/musify/album_overview.html
[] tracklist [] tracklist
[] attributes *(name and country... wooooow and I waste one request for this)* [] attributes
[] ratings [] ratings
:param source: :param source:
:param flat: :param flat:
:return: :return:
""" """
album = Album() album = Album(title="Hi :)")
url = cls.parse_url(source.url) url = cls.parse_url(source.url)