some more scraping
This commit is contained in:
parent
6f7763ada5
commit
0f47cdadb8
@ -51,7 +51,7 @@ class Song(MainObject):
|
|||||||
"title": None,
|
"title": None,
|
||||||
"unified_title": None,
|
"unified_title": None,
|
||||||
"isrc": None,
|
"isrc": None,
|
||||||
"length": None,
|
"length": None,
|
||||||
"tracksort": 0,
|
"tracksort": 0,
|
||||||
"genre": None,
|
"genre": None,
|
||||||
"notes": FormattedText()
|
"notes": FormattedText()
|
||||||
@ -272,8 +272,6 @@ class Album(MainObject):
|
|||||||
if label.album_collection.insecure_append(self):
|
if label.album_collection.insecure_append(self):
|
||||||
label.compile()
|
label.compile()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||||
return [
|
return [
|
||||||
@ -369,15 +367,14 @@ class Album(MainObject):
|
|||||||
return len(self.artist_collection) > 1
|
return len(self.artist_collection) > 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
All objects dependent on Artist
|
All objects dependent on Artist
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class Artist(MainObject):
|
class Artist(MainObject):
|
||||||
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection")
|
COLLECTION_ATTRIBUTES = (
|
||||||
|
"feature_song_collection", "main_album_collection", "label_collection", "source_collection")
|
||||||
SIMPLE_ATTRIBUTES = {
|
SIMPLE_ATTRIBUTES = {
|
||||||
"name": None,
|
"name": None,
|
||||||
"unified_name": None,
|
"unified_name": None,
|
||||||
|
@ -780,12 +780,18 @@ class Musify(Page):
|
|||||||
"""
|
"""
|
||||||
song_name = song_card.get("data-name")
|
song_name = song_card.get("data-name")
|
||||||
artist_list: List[Artist] = []
|
artist_list: List[Artist] = []
|
||||||
|
source_list: List[Source] = []
|
||||||
tracksort = None
|
tracksort = None
|
||||||
|
|
||||||
|
def parse_title(_title: str) -> str:
|
||||||
|
return _title
|
||||||
|
|
||||||
|
"""
|
||||||
# get from parent div
|
# get from parent div
|
||||||
_artist_name = song_card.get("data-artist")
|
_artist_name = song_card.get("data-artist")
|
||||||
if _artist_name is not None:
|
if _artist_name is not None:
|
||||||
artist_list.append(Artist(name=_artist_name))
|
artist_list.append(Artist(name=_artist_name))
|
||||||
|
"""
|
||||||
|
|
||||||
# get tracksort
|
# get tracksort
|
||||||
tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"})
|
tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"})
|
||||||
@ -795,6 +801,48 @@ class Musify(Page):
|
|||||||
tracksort = int(raw_tracksort)
|
tracksort = int(raw_tracksort)
|
||||||
|
|
||||||
# playlist details
|
# playlist details
|
||||||
|
playlist_details: BeautifulSoup = song_card.find("div", {"class": "playlist__details"})
|
||||||
|
if playlist_details is not None:
|
||||||
|
"""
|
||||||
|
<div class="playlist__heading">
|
||||||
|
<a href="/artist/tamas-141317" rel="nofollow">Tamas</a> ft.<a href="/artist/zombiez-630767" rel="nofollow">Zombiez</a> - <a class="strong" href="/track/tamas-zombiez-voodoo-feat-zombiez-16185276">Voodoo (Feat. Zombiez)</a>
|
||||||
|
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
|
||||||
|
<meta content="/artist/tamas-141317" itemprop="url" />
|
||||||
|
<meta content="Tamas" itemprop="name" />
|
||||||
|
</span>
|
||||||
|
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
|
||||||
|
<meta content="/artist/zombiez-630767" itemprop="url" />
|
||||||
|
<meta content="Zombiez" itemprop="name" />
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
# track
|
||||||
|
anchor_list: List[BeautifulSoup] = playlist_details.find_all("a")
|
||||||
|
if len(anchor_list) > 1:
|
||||||
|
track_anchor: BeautifulSoup = anchor_list[-1]
|
||||||
|
href: str = track_anchor.get("href")
|
||||||
|
if href is not None:
|
||||||
|
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
|
||||||
|
song_name = parse_title(track_anchor.get_text(strip=True))
|
||||||
|
|
||||||
|
# artist
|
||||||
|
artist_span: BeautifulSoup
|
||||||
|
for artist_span in playlist_details.find_all("span", {"itemprop": "byArtist"}):
|
||||||
|
_artist_src = None
|
||||||
|
_artist_name = None
|
||||||
|
meta_artist_src = artist_span.find("meta", {"itemprop": "url"})
|
||||||
|
if meta_artist_src is not None:
|
||||||
|
meta_artist_url = meta_artist_src.get("content")
|
||||||
|
if meta_artist_url is not None:
|
||||||
|
_artist_src = [Source(cls.SOURCE_TYPE, cls.HOST + meta_artist_url)]
|
||||||
|
|
||||||
|
meta_artist_name = artist_span.find("meta", {"itemprop": "name"})
|
||||||
|
if meta_artist_name is not None:
|
||||||
|
meta_artist_name_text = meta_artist_name.get("content")
|
||||||
|
_artist_name = meta_artist_name_text
|
||||||
|
|
||||||
|
if _artist_name is not None or _artist_src is not None:
|
||||||
|
artist_list.append(Artist(name=_artist_name, source_list=_artist_src))
|
||||||
|
|
||||||
return Song(
|
return Song(
|
||||||
title=song_name,
|
title=song_name,
|
||||||
@ -810,14 +858,14 @@ class Musify(Page):
|
|||||||
|
|
||||||
/html/musify/album_overview.html
|
/html/musify/album_overview.html
|
||||||
[] tracklist
|
[] tracklist
|
||||||
[] attributes *(name and country... wooooow and I waste one request for this)*
|
[] attributes
|
||||||
[] ratings
|
[] ratings
|
||||||
|
|
||||||
:param source:
|
:param source:
|
||||||
:param flat:
|
:param flat:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
album = Album()
|
album = Album(title="Hi :)")
|
||||||
|
|
||||||
url = cls.parse_url(source.url)
|
url = cls.parse_url(source.url)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user