some more scraping
This commit is contained in:
parent
6f7763ada5
commit
0f47cdadb8
@ -51,7 +51,7 @@ class Song(MainObject):
|
|||||||
"title": None,
|
"title": None,
|
||||||
"unified_title": None,
|
"unified_title": None,
|
||||||
"isrc": None,
|
"isrc": None,
|
||||||
"length": None,
|
"length": None,
|
||||||
"tracksort": 0,
|
"tracksort": 0,
|
||||||
"genre": None,
|
"genre": None,
|
||||||
"notes": FormattedText()
|
"notes": FormattedText()
|
||||||
@ -101,12 +101,12 @@ class Song(MainObject):
|
|||||||
for album in self.album_collection:
|
for album in self.album_collection:
|
||||||
if album.song_collection.insecure_append(self):
|
if album.song_collection.insecure_append(self):
|
||||||
album.compile()
|
album.compile()
|
||||||
|
|
||||||
artist: Artist
|
artist: Artist
|
||||||
for artist in self.feature_artist_collection:
|
for artist in self.feature_artist_collection:
|
||||||
if artist.feature_song_collection.insecure_append(self):
|
if artist.feature_song_collection.insecure_append(self):
|
||||||
artist.compile()
|
artist.compile()
|
||||||
|
|
||||||
for artist in self.main_artist_collection:
|
for artist in self.main_artist_collection:
|
||||||
if artist.main_album_collection.insecure_extend(self.album_collection):
|
if artist.main_album_collection.insecure_extend(self.album_collection):
|
||||||
artist.compile()
|
artist.compile()
|
||||||
@ -261,18 +261,16 @@ class Album(MainObject):
|
|||||||
for song in self.song_collection:
|
for song in self.song_collection:
|
||||||
if song.album_collection.insecure_append(self):
|
if song.album_collection.insecure_append(self):
|
||||||
song.compile()
|
song.compile()
|
||||||
|
|
||||||
artist: Artist
|
artist: Artist
|
||||||
for artist in self.artist_collection:
|
for artist in self.artist_collection:
|
||||||
if artist.main_album_collection.insecure_append(self):
|
if artist.main_album_collection.insecure_append(self):
|
||||||
artist.compile()
|
artist.compile()
|
||||||
|
|
||||||
label: Label
|
label: Label
|
||||||
for label in self.label_collection:
|
for label in self.label_collection:
|
||||||
if label.album_collection.insecure_append(self):
|
if label.album_collection.insecure_append(self):
|
||||||
label.compile()
|
label.compile()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||||
@ -369,15 +367,14 @@ class Album(MainObject):
|
|||||||
return len(self.artist_collection) > 1
|
return len(self.artist_collection) > 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
All objects dependent on Artist
|
All objects dependent on Artist
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class Artist(MainObject):
|
class Artist(MainObject):
|
||||||
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection")
|
COLLECTION_ATTRIBUTES = (
|
||||||
|
"feature_song_collection", "main_album_collection", "label_collection", "source_collection")
|
||||||
SIMPLE_ATTRIBUTES = {
|
SIMPLE_ATTRIBUTES = {
|
||||||
"name": None,
|
"name": None,
|
||||||
"unified_name": None,
|
"unified_name": None,
|
||||||
@ -440,12 +437,12 @@ class Artist(MainObject):
|
|||||||
for song in self.feature_song_collection:
|
for song in self.feature_song_collection:
|
||||||
if song.feature_artist_collection.insecure_append(self):
|
if song.feature_artist_collection.insecure_append(self):
|
||||||
song.compile()
|
song.compile()
|
||||||
|
|
||||||
album: "Album"
|
album: "Album"
|
||||||
for album in self.main_album_collection:
|
for album in self.main_album_collection:
|
||||||
if album.artist_collection.insecure_append(self):
|
if album.artist_collection.insecure_append(self):
|
||||||
album.compile()
|
album.compile()
|
||||||
|
|
||||||
label: Label
|
label: Label
|
||||||
for label in self.label_collection:
|
for label in self.label_collection:
|
||||||
if label.current_artist_collection.insecure_append(self):
|
if label.current_artist_collection.insecure_append(self):
|
||||||
@ -584,7 +581,7 @@ class Label(MainObject):
|
|||||||
for album in self.album_collection:
|
for album in self.album_collection:
|
||||||
if album.label_collection.insecure_append(self):
|
if album.label_collection.insecure_append(self):
|
||||||
album.compile()
|
album.compile()
|
||||||
|
|
||||||
artist: Artist
|
artist: Artist
|
||||||
for artist in self.current_artist_collection:
|
for artist in self.current_artist_collection:
|
||||||
if artist.label_collection.insecure_append(self):
|
if artist.label_collection.insecure_append(self):
|
||||||
@ -602,4 +599,4 @@ class Label(MainObject):
|
|||||||
def options(self) -> Options:
|
def options(self) -> Options:
|
||||||
options = [self]
|
options = [self]
|
||||||
options.extend(self.current_artist_collection.shallow_list)
|
options.extend(self.current_artist_collection.shallow_list)
|
||||||
options.extend(self.album_collection.shallow_list)
|
options.extend(self.album_collection.shallow_list)
|
||||||
|
@ -780,12 +780,18 @@ class Musify(Page):
|
|||||||
"""
|
"""
|
||||||
song_name = song_card.get("data-name")
|
song_name = song_card.get("data-name")
|
||||||
artist_list: List[Artist] = []
|
artist_list: List[Artist] = []
|
||||||
|
source_list: List[Source] = []
|
||||||
tracksort = None
|
tracksort = None
|
||||||
|
|
||||||
|
def parse_title(_title: str) -> str:
|
||||||
|
return _title
|
||||||
|
|
||||||
|
"""
|
||||||
# get from parent div
|
# get from parent div
|
||||||
_artist_name = song_card.get("data-artist")
|
_artist_name = song_card.get("data-artist")
|
||||||
if _artist_name is not None:
|
if _artist_name is not None:
|
||||||
artist_list.append(Artist(name=_artist_name))
|
artist_list.append(Artist(name=_artist_name))
|
||||||
|
"""
|
||||||
|
|
||||||
# get tracksort
|
# get tracksort
|
||||||
tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"})
|
tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"})
|
||||||
@ -795,6 +801,48 @@ class Musify(Page):
|
|||||||
tracksort = int(raw_tracksort)
|
tracksort = int(raw_tracksort)
|
||||||
|
|
||||||
# playlist details
|
# playlist details
|
||||||
|
playlist_details: BeautifulSoup = song_card.find("div", {"class": "playlist__details"})
|
||||||
|
if playlist_details is not None:
|
||||||
|
"""
|
||||||
|
<div class="playlist__heading">
|
||||||
|
<a href="/artist/tamas-141317" rel="nofollow">Tamas</a> ft.<a href="/artist/zombiez-630767" rel="nofollow">Zombiez</a> - <a class="strong" href="/track/tamas-zombiez-voodoo-feat-zombiez-16185276">Voodoo (Feat. Zombiez)</a>
|
||||||
|
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
|
||||||
|
<meta content="/artist/tamas-141317" itemprop="url" />
|
||||||
|
<meta content="Tamas" itemprop="name" />
|
||||||
|
</span>
|
||||||
|
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
|
||||||
|
<meta content="/artist/zombiez-630767" itemprop="url" />
|
||||||
|
<meta content="Zombiez" itemprop="name" />
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
# track
|
||||||
|
anchor_list: List[BeautifulSoup] = playlist_details.find_all("a")
|
||||||
|
if len(anchor_list) > 1:
|
||||||
|
track_anchor: BeautifulSoup = anchor_list[-1]
|
||||||
|
href: str = track_anchor.get("href")
|
||||||
|
if href is not None:
|
||||||
|
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
|
||||||
|
song_name = parse_title(track_anchor.get_text(strip=True))
|
||||||
|
|
||||||
|
# artist
|
||||||
|
artist_span: BeautifulSoup
|
||||||
|
for artist_span in playlist_details.find_all("span", {"itemprop": "byArtist"}):
|
||||||
|
_artist_src = None
|
||||||
|
_artist_name = None
|
||||||
|
meta_artist_src = artist_span.find("meta", {"itemprop": "url"})
|
||||||
|
if meta_artist_src is not None:
|
||||||
|
meta_artist_url = meta_artist_src.get("content")
|
||||||
|
if meta_artist_url is not None:
|
||||||
|
_artist_src = [Source(cls.SOURCE_TYPE, cls.HOST + meta_artist_url)]
|
||||||
|
|
||||||
|
meta_artist_name = artist_span.find("meta", {"itemprop": "name"})
|
||||||
|
if meta_artist_name is not None:
|
||||||
|
meta_artist_name_text = meta_artist_name.get("content")
|
||||||
|
_artist_name = meta_artist_name_text
|
||||||
|
|
||||||
|
if _artist_name is not None or _artist_src is not None:
|
||||||
|
artist_list.append(Artist(name=_artist_name, source_list=_artist_src))
|
||||||
|
|
||||||
return Song(
|
return Song(
|
||||||
title=song_name,
|
title=song_name,
|
||||||
@ -810,14 +858,14 @@ class Musify(Page):
|
|||||||
|
|
||||||
/html/musify/album_overview.html
|
/html/musify/album_overview.html
|
||||||
[] tracklist
|
[] tracklist
|
||||||
[] attributes *(name and country... wooooow and I waste one request for this)*
|
[] attributes
|
||||||
[] ratings
|
[] ratings
|
||||||
|
|
||||||
:param source:
|
:param source:
|
||||||
:param flat:
|
:param flat:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
album = Album()
|
album = Album(title="Hi :)")
|
||||||
|
|
||||||
url = cls.parse_url(source.url)
|
url = cls.parse_url(source.url)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user