feat: improved musify

This commit is contained in:
Hazel 2024-05-08 09:15:41 +02:00
parent 960d3b74ac
commit e3e547c232
4 changed files with 130 additions and 157 deletions

View File

@ -304,10 +304,8 @@ class Downloader:
def goto(self, data_object: DatabaseObject): def goto(self, data_object: DatabaseObject):
page: Type[Page] page: Type[Page]
self.pages.fetch_details(data_object) self.pages.fetch_details(data_object, stop_at_level=1)
print(data_object)
print(data_object.options)
self.set_current_options(GoToResults(data_object.options, max_items_per_page=self.max_displayed_options)) self.set_current_options(GoToResults(data_object.options, max_items_per_page=self.max_displayed_options))
self.print_current_options() self.print_current_options()

View File

@ -164,6 +164,8 @@ class Collection(Generic[T]):
r = c._find_object(other) r = c._find_object(other)
if r is not None: if r is not None:
output("found push to", r, other, self, color=BColors.RED, sep="\t") output("found push to", r, other, self, color=BColors.RED, sep="\t")
if existing_object is not None:
self.remove(existing_object)
return c.append(other, **kwargs) return c.append(other, **kwargs)
if existing_object is None: if existing_object is None:
@ -179,13 +181,11 @@ class Collection(Generic[T]):
raise ValueError(f"Object {other} not found in {self}") raise ValueError(f"Object {other} not found in {self}")
return other return other
"""
for collection_attribute, generator in self.extend_object_to_attribute.items(): for collection_attribute, generator in self.extend_object_to_attribute.items():
other.__getattribute__(collection_attribute).remove(*generator, silent=silent, **kwargs) other.__getattribute__(collection_attribute).remove(*generator, silent=True, **kwargs)
for attribute, new_object in self.append_object_to_attribute.items(): for attribute, new_object in self.append_object_to_attribute.items():
other.__getattribute__(attribute).remove(new_object, silent=silent, **kwargs) other.__getattribute__(attribute).remove(new_object, silent=True, **kwargs)
"""
self._data.remove(existing) self._data.remove(existing)
self._unmap_element(existing) self._unmap_element(existing)

View File

@ -207,7 +207,7 @@ class Song(Base):
r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value
r += get_collection_string(self.album_collection, " from {}", ignore_titles={self.title}) r += get_collection_string(self.album_collection, " from {}", ignore_titles={self.title})
r += get_collection_string(self.main_artist_collection, " by {}") r += get_collection_string(self.main_artist_collection, " by {}")
r += get_collection_string(self.feature_artist_collection, " feat. {}") r += get_collection_string(self.feature_artist_collection, " feat. {}" if not self.main_artist_collection.empty or True else " by {}")
return r return r
@property @property
@ -413,11 +413,6 @@ class Album(Base):
return self.album_type.value return self.album_type.value
"""
All objects dependent on Artist
"""
class Artist(Base): class Artist(Base):
name: str name: str
unified_name: str unified_name: str
@ -593,11 +588,6 @@ class Artist(Base):
return r return r
"""
Label
"""
class Label(Base): class Label(Base):
COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection") COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection")

View File

@ -1,7 +1,7 @@
from collections import defaultdict from collections import defaultdict
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from typing import List, Optional, Type, Union, Generator from typing import List, Optional, Type, Union, Generator, Dict, Any
from urllib.parse import urlparse from urllib.parse import urlparse
import pycountry import pycountry
@ -24,7 +24,7 @@ from ..objects import (
Lyrics, Lyrics,
Artwork Artwork
) )
from ..utils.config import logging_settings from ..utils.config import logging_settings, main_settings
from ..utils import string_processing, shared from ..utils import string_processing, shared
from ..utils.string_processing import clean_song_title from ..utils.string_processing import clean_song_title
from ..utils.support_classes.query import Query from ..utils.support_classes.query import Query
@ -361,7 +361,7 @@ class Musify(Page):
return Song( return Song(
title=clean_song_title(song_title, artist_name=artist_list[0].name if len(artist_list) > 0 else None), title=clean_song_title(song_title, artist_name=artist_list[0].name if len(artist_list) > 0 else None),
main_artist_list=artist_list, feature_artist_list=artist_list,
source_list=source_list source_list=source_list
) )
@ -510,7 +510,7 @@ class Musify(Page):
title=clean_song_title(track_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None), title=clean_song_title(track_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None),
source_list=source_list, source_list=source_list,
lyrics_list=lyrics_list, lyrics_list=lyrics_list,
main_artist_list=artist_list, feature_artist_list=artist_list,
album_list=album_list, album_list=album_list,
artwork=artwork, artwork=artwork,
) )
@ -652,10 +652,101 @@ class Musify(Page):
return Song( return Song(
title=clean_song_title(song_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None), title=clean_song_title(song_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None),
tracksort=tracksort, tracksort=tracksort,
main_artist_list=artist_list, feature_artist_list=artist_list,
source_list=source_list source_list=source_list
) )
def _parse_album(self, soup: BeautifulSoup) -> Album:
name: str = None
source_list: List[Source] = []
artist_list: List[Artist] = []
date: ID3Timestamp = None
"""
if breadcrumb list has 4 elements, then
the -2 is the artist link,
the -1 is the album
"""
# breadcrumb
breadcrumb_soup: BeautifulSoup = soup.find("ol", {"class", "breadcrumb"})
breadcrumb_elements: List[BeautifulSoup] = breadcrumb_soup.find_all("li", {"class": "breadcrumb-item"})
if len(breadcrumb_elements) == 4:
# album
album_crumb: BeautifulSoup = breadcrumb_elements[-1]
name = album_crumb.text.strip()
# artist
artist_crumb: BeautifulSoup = breadcrumb_elements[-2]
anchor: BeautifulSoup = artist_crumb.find("a")
if anchor is not None:
href = anchor.get("href")
artist_source_list: List[Source] = []
if href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip()))
span: BeautifulSoup = anchor.find("span")
if span is not None:
artist_list.append(Artist(
name=span.get_text(strip=True),
source_list=artist_source_list
))
else:
self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case")
# meta
meta_url: BeautifulSoup = soup.find("meta", {"itemprop": "url"})
if meta_url is not None:
url = meta_url.get("content")
if url is not None:
source_list.append(Source(self.SOURCE_TYPE, self.HOST + url))
meta_name: BeautifulSoup = soup.find("meta", {"itemprop": "name"})
if meta_name is not None:
_name = meta_name.get("content")
if _name is not None:
name = _name
# album info
album_info_ul: BeautifulSoup = soup.find("ul", {"class": "album-info"})
if album_info_ul is not None:
artist_anchor: BeautifulSoup
for artist_anchor in album_info_ul.find_all("a", {"itemprop": "byArtist"}):
# line 98
artist_source_list: List[Source] = []
artist_url_meta = artist_anchor.find("meta", {"itemprop": "url"})
if artist_url_meta is not None:
artist_href = artist_url_meta.get("content")
if artist_href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, url=self.HOST + artist_href))
artist_meta_name = artist_anchor.find("meta", {"itemprop": "name"})
if artist_meta_name is not None:
artist_name = artist_meta_name.get("content")
if artist_name is not None:
artist_list.append(Artist(
name=artist_name,
source_list=artist_source_list
))
time_soup: BeautifulSoup = album_info_ul.find("time", {"itemprop": "datePublished"})
if time_soup is not None:
raw_datetime = time_soup.get("datetime")
if raw_datetime is not None:
try:
date = ID3Timestamp.strptime(raw_datetime, "%Y-%m-%d")
except ValueError:
self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}")
return Album(
title=name,
source_list=source_list,
artist_list=artist_list,
date=date
)
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
""" """
fetches album from source: fetches album from source:
@ -694,19 +785,14 @@ class Musify(Page):
return album return album
def _get_artist_attributes(self, url: MusifyUrl) -> Artist: def _fetch_initial_artist(self, url: MusifyUrl, source: Source) -> Artist:
""" """
fetches the main Artist attributes from this endpoint
https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent
it needs to parse html
:param url:
:return:
""" """
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id) r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id)
if r is None: if r is None:
return Artist() return Artist(source_list=[source])
soup = self.get_soup_from_response(r) soup = self.get_soup_from_response(r)
@ -821,7 +907,7 @@ class Musify(Page):
notes=notes notes=notes
) )
def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None) -> Album: def _parse_album_card(self, album_card: BeautifulSoup, source: Source, artist_name: str = None, **kwargs) -> Album:
""" """
<div class="card release-thumbnail" data-type="2"> <div class="card release-thumbnail" data-type="2">
<a href="/release/ghost-bath-self-loather-2021-1554266"> <a href="/release/ghost-bath-self-loather-2021-1554266">
@ -845,13 +931,17 @@ class Musify(Page):
</div> </div>
""" """
_id: Optional[str] = None album_kwargs: Dict[str, Any] = {
"source_list": [source],
}
name: str = None name: str = None
source_list: List[Source] = [] source_list: List[Source] = []
timestamp: Optional[ID3Timestamp] = None timestamp: Optional[ID3Timestamp] = None
album_status = None album_status = None
def set_name(new_name: str): def set_name(new_name: str):
nonlocal album_kwargs
nonlocal name nonlocal name
nonlocal artist_name nonlocal artist_name
@ -882,7 +972,7 @@ class Musify(Page):
album_status = AlbumStatus.BOOTLEG album_status = AlbumStatus.BOOTLEG
def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False): def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False):
nonlocal _id nonlocal album_kwargs
nonlocal name nonlocal name
nonlocal source_list nonlocal source_list
@ -892,21 +982,11 @@ class Musify(Page):
href = _anchor.get("href") href = _anchor.get("href")
if href is not None: if href is not None:
# add url to sources # add url to sources
source_list.append(Source( album_kwargs["source_list"].append(Source(
self.SOURCE_TYPE, self.SOURCE_TYPE,
self.HOST + href self.HOST + href
)) ))
# split id from url
split_href = href.split("-")
if len(split_href) > 1:
_id = split_href[-1]
if not text_is_name:
return
set_name(_anchor.text)
anchor_list = album_card.find_all("a", recursive=False) anchor_list = album_card.find_all("a", recursive=False)
if len(anchor_list) > 0: if len(anchor_list) > 0:
anchor = anchor_list[0] anchor = anchor_list[0]
@ -964,97 +1044,7 @@ class Musify(Page):
album_status=album_status, album_status=album_status,
) )
def _parse_album(self, soup: BeautifulSoup) -> Album: def _fetch_artist_discography(self, artist: Artist, url: MusifyUrl, artist_name: str = None, **kwargs):
name: str = None
source_list: List[Source] = []
artist_list: List[Artist] = []
date: ID3Timestamp = None
"""
if breadcrumb list has 4 elements, then
the -2 is the artist link,
the -1 is the album
"""
# breadcrumb
breadcrumb_soup: BeautifulSoup = soup.find("ol", {"class", "breadcrumb"})
breadcrumb_elements: List[BeautifulSoup] = breadcrumb_soup.find_all("li", {"class": "breadcrumb-item"})
if len(breadcrumb_elements) == 4:
# album
album_crumb: BeautifulSoup = breadcrumb_elements[-1]
name = album_crumb.text.strip()
# artist
artist_crumb: BeautifulSoup = breadcrumb_elements[-2]
anchor: BeautifulSoup = artist_crumb.find("a")
if anchor is not None:
href = anchor.get("href")
artist_source_list: List[Source] = []
if href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip()))
span: BeautifulSoup = anchor.find("span")
if span is not None:
artist_list.append(Artist(
name=span.get_text(strip=True),
source_list=artist_source_list
))
else:
self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case")
# meta
meta_url: BeautifulSoup = soup.find("meta", {"itemprop": "url"})
if meta_url is not None:
url = meta_url.get("content")
if url is not None:
source_list.append(Source(self.SOURCE_TYPE, self.HOST + url))
meta_name: BeautifulSoup = soup.find("meta", {"itemprop": "name"})
if meta_name is not None:
_name = meta_name.get("content")
if _name is not None:
name = _name
# album info
album_info_ul: BeautifulSoup = soup.find("ul", {"class": "album-info"})
if album_info_ul is not None:
artist_anchor: BeautifulSoup
for artist_anchor in album_info_ul.find_all("a", {"itemprop": "byArtist"}):
# line 98
artist_source_list: List[Source] = []
artist_url_meta = artist_anchor.find("meta", {"itemprop": "url"})
if artist_url_meta is not None:
artist_href = artist_url_meta.get("content")
if artist_href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, url=self.HOST + artist_href))
artist_meta_name = artist_anchor.find("meta", {"itemprop": "name"})
if artist_meta_name is not None:
artist_name = artist_meta_name.get("content")
if artist_name is not None:
artist_list.append(Artist(
name=artist_name,
source_list=artist_source_list
))
time_soup: BeautifulSoup = album_info_ul.find("time", {"itemprop": "datePublished"})
if time_soup is not None:
raw_datetime = time_soup.get("datetime")
if raw_datetime is not None:
try:
date = ID3Timestamp.strptime(raw_datetime, "%Y-%m-%d")
except ValueError:
self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}")
return Album(
title=name,
source_list=source_list,
artist_list=artist_list,
date=date
)
def _get_discography(self, artist: Artist, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]:
""" """
POST https://musify.club/artist/filteralbums POST https://musify.club/artist/filteralbums
ArtistID: 280348 ArtistID: 280348
@ -1062,6 +1052,8 @@ class Musify(Page):
SortOrder.IsAscending: false SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest X-Requested-With: XMLHttpRequest
""" """
_download_all = kwargs.get("download_all", False)
_album_type_blacklist = kwargs.get("album_type_blacklist", main_settings["album_type_blacklist"])
endpoint = self.HOST + "/" + url.source_type.value + "/filteralbums" endpoint = self.HOST + "/" + url.source_type.value + "/filteralbums"
@ -1072,36 +1064,29 @@ class Musify(Page):
"X-Requested-With": "XMLHttpRequest" "X-Requested-With": "XMLHttpRequest"
}, name="discography_" + url.name_with_id) }, name="discography_" + url.name_with_id)
if r is None: if r is None:
return [] return
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
soup: BeautifulSoup = self.get_soup_from_response(r)
for card_soup in soup.find_all("div", {"class": "card"}): for card_soup in soup.find_all("div", {"class": "card"}):
album = self._parse_album_card(card_soup, artist_name) album = self._parse_album_card(card_soup, source, artist_name, **kwargs)
if album.album_type is AlbumType.COMPILATION_ALBUM or album.album_type is AlbumType.MIXTAPE: if album.album_type in _album_type_blacklist:
continue continue
artist.main_album_collection.append(album) artist.main_album_collection.append(album)
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: def fetch_artist(self, source: Source, **kwargs) -> Artist:
""" """
fetches artist from source TODO
[x] discography [x] discography
[x] attributes [x] attributes
[] picture gallery [] picture gallery
Args:
source (Source): the source to fetch
stop_at_level: int = 1: if it is false, every album from discograohy will be fetched. Defaults to False.
Returns:
Artist: the artist fetched
""" """
url = parse_url(source.url) url = parse_url(source.url)
artist = self._get_artist_attributes(url) artist = self._fetch_initial_artist(url, source=source, **kwargs)
self._fetch_artist_discography(artist, url, artist.name, **kwargs)
self._get_discography(artist, url, artist.name)
return artist return artist