fixed some bugs in the preset

This commit is contained in:
Hellow 2023-03-27 18:41:50 +02:00
parent ac474c6079
commit 2bae6e1cbd
3 changed files with 64 additions and 34 deletions

View File

@ -5,11 +5,15 @@ from music_kraken.pages import EncyclopaediaMetallum
def search(): def search():
results = EncyclopaediaMetallum.search_by_query("#a Ghost Bath") results = EncyclopaediaMetallum.search_by_query("#a Ghost Bath")
print(results) print(results)
print(results[0].source_collection)
def fetch_artist(): def fetch_artist():
artist = objects.Artist( artist = objects.Artist(
source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")] source_list=[
objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193"),
objects.Source(objects.SourcePages.ENCYCLOPAEDIA_METALLUM, "https://www.metal-archives.com/bands/Ghost_Bath/3540372489")
]
) )
artist = EncyclopaediaMetallum.fetch_details(artist) artist = EncyclopaediaMetallum.fetch_details(artist)
@ -34,4 +38,4 @@ def fetch_album():
if __name__ == "__main__": if __name__ == "__main__":
search() fetch_artist()

View File

@ -36,38 +36,42 @@ class Page:
@classmethod @classmethod
def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
requests.Response]: requests.Response]:
retry = False
try: try:
r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT) r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT)
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
return None retry = True
if r.status_code in accepted_response_codes: if not retry and r.status_code in accepted_response_codes:
return r return r
LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})") LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})")
LOGGER.debug(r.content) LOGGER.debug(r.content)
if trie <= cls.TRIES: if trie >= cls.TRIES:
LOGGER.warning("to many tries. Aborting.") LOGGER.warning("to many tries. Aborting.")
return None
return cls.get_request(url, accepted_response_codes, trie + 1) return cls.get_request(url, accepted_response_codes, trie + 1)
@classmethod @classmethod
def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
requests.Response]: requests.Response]:
retry = False
try: try:
r = cls.API_SESSION.post(url, json=json, timeout=cls.TIMEOUT) r = cls.API_SESSION.post(url, json=json, timeout=cls.TIMEOUT)
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
return None retry = True
if r.status_code in accepted_response_codes: if not retry and r.status_code in accepted_response_codes:
return r return r
LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})") LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at POST:{url}. ({trie}-{cls.TRIES})")
LOGGER.debug(r.content) LOGGER.debug(r.content)
if trie <= cls.TRIES: if trie >= cls.TRIES:
LOGGER.warning("to many tries. Aborting.") LOGGER.warning("to many tries. Aborting.")
return None
return cls.post_request(url, accepted_response_codes, trie + 1) return cls.post_request(url, accepted_response_codes, trie + 1)
@ -163,7 +167,7 @@ class Page:
new_music_object: DatabaseObject = type(music_object)() new_music_object: DatabaseObject = type(music_object)()
source: Source source: Source
for source in music_object.source_collection: for source in music_object.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
new_music_object.merge(cls._fetch_object_from_source(source=source, obj_type=type(music_object), stop_at_level=stop_at_level)) new_music_object.merge(cls._fetch_object_from_source(source=source, obj_type=type(music_object), stop_at_level=stop_at_level))
collections = { collections = {

View File

@ -299,12 +299,7 @@ class EncyclopaediaMetallum(Page):
return artist return artist
@classmethod @classmethod
def fetch_artist_attributes(cls, artist: Artist, url: str) -> Artist: def _parse_artist_attributes(cls, artist_soup: BeautifulSoup) -> Artist:
r = cls.get_request(url)
if r is None:
return artist
soup: BeautifulSoup = cls.get_soup_from_response(r)
country: pycountry.Countrie = None country: pycountry.Countrie = None
formed_in_year: int = None formed_in_year: int = None
genre: str = None genre: str = None
@ -312,7 +307,7 @@ class EncyclopaediaMetallum(Page):
label_name: str = None label_name: str = None
label_url: str = None label_url: str = None
band_stat_soup = soup.find("div", {"id": "band_stats"}) band_stat_soup = artist_soup.find("div", {"id": "band_stats"})
for dl_soup in band_stat_soup.find_all("dl"): for dl_soup in band_stat_soup.find_all("dl"):
for title, data in zip(dl_soup.find_all("dt"), dl_soup.find_all("dd")): for title, data in zip(dl_soup.find_all("dt"), dl_soup.find_all("dd")):
title_text = title.text title_text = title.text
@ -320,7 +315,6 @@ class EncyclopaediaMetallum(Page):
if "Country of origin:" == title_text: if "Country of origin:" == title_text:
href = data.find('a').get('href') href = data.find('a').get('href')
country = pycountry.countries.get(alpha_2=href.split("/")[-1]) country = pycountry.countries.get(alpha_2=href.split("/")[-1])
artist.country = country
continue continue
# not needed: Location: Minot, North Dakota # not needed: Location: Minot, North Dakota
@ -335,15 +329,12 @@ class EncyclopaediaMetallum(Page):
if not data.text.isnumeric(): if not data.text.isnumeric():
continue continue
formed_in_year = int(data.text) formed_in_year = int(data.text)
artist.formed_in = ID3Timestamp(year=formed_in_year)
continue continue
if "Genre:" == title_text: if "Genre:" == title_text:
genre = data.text genre = data.text
artist.general_genre = genre
continue continue
if "Lyrical themes:" == title_text: if "Lyrical themes:" == title_text:
lyrical_themes = data.text.split(", ") lyrical_themes = data.text.split(", ")
artist.lyrical_themes = lyrical_themes
continue continue
if "Current label:" == title_text: if "Current label:" == title_text:
label_name = data.text label_name = data.text
@ -355,22 +346,36 @@ class EncyclopaediaMetallum(Page):
if type(label_url) is str and "/" in label_url: if type(label_url) is str and "/" in label_url:
label_id = label_url.split("/")[-1] label_id = label_url.split("/")[-1]
artist.label_collection.append(
Label(
_id=label_id,
name=label_name,
source_list=[
Source(cls.SOURCE_TYPE, label_url)
]
))
""" """
TODO
years active: 2012-present years active: 2012-present
process this and add field to class process this and add field to class
""" """
return artist return Artist(
country=country,
formed_in=ID3Timestamp(year=formed_in_year),
general_genre=genre,
lyrical_themes=lyrical_themes,
label_list=[
Label(
name=label_name,
source_list=[
Source(cls.SOURCE_TYPE, label_url)
]
)
]
)
@classmethod
def _fetch_artist_attributes(cls, url: str) -> Artist:
print(url)
r = cls.get_request(url)
if r is None:
return Artist()
soup: BeautifulSoup = cls.get_soup_from_response(r)
return cls._parse_artist_attributes(artist_soup=soup)
@classmethod @classmethod
def fetch_band_notes(cls, artist: Artist, ma_artist_id: str) -> Artist: def fetch_band_notes(cls, artist: Artist, ma_artist_id: str) -> Artist:
@ -385,6 +390,23 @@ class EncyclopaediaMetallum(Page):
artist.notes.html = r.text artist.notes.html = r.text
return artist return artist
@classmethod
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
"""
What it could fetch, and what is implemented:
[x] https://www.metal-archives.com/bands/Ghost_Bath/3540372489
[x] https://www.metal-archives.com/band/discography/id/3540372489/tab/all
[] reviews: https://www.metal-archives.com/review/ajax-list-band/id/3540372489/json/1?sEcho=1&iColumns=4&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&iSortCol_0=3&sSortDir_0=desc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&_=1675155257133
[] simmilar: https://www.metal-archives.com/band/ajax-recommendations/id/3540372489
[x] sources: https://www.metal-archives.com/link/ajax-list/type/band/id/3540372489
[x] band notes: https://www.metal-archives.com/band/read-more/id/3540372489
"""
artist = cls._fetch_artist_attributes(source.url)
return artist
@classmethod @classmethod
def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist: def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist:
source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE) source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
@ -406,7 +428,7 @@ class EncyclopaediaMetallum(Page):
""" """
# SIMPLE METADATA # SIMPLE METADATA
artist = cls.fetch_artist_attributes(artist, source.url) artist = cls._fetch_artist_attributes(artist, source.url)
# DISCOGRAPHY # DISCOGRAPHY
artist = cls.fetch_artist_discography(artist, artist_id, flat=flat) artist = cls.fetch_artist_discography(artist, artist_id, flat=flat)