diff --git a/src/metal_archives.py b/src/metal_archives.py index c151f5c..b3d5ceb 100644 --- a/src/metal_archives.py +++ b/src/metal_archives.py @@ -5,11 +5,15 @@ from music_kraken.pages import EncyclopaediaMetallum def search(): results = EncyclopaediaMetallum.search_by_query("#a Ghost Bath") print(results) + print(results[0].source_collection) def fetch_artist(): artist = objects.Artist( - source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")] + source_list=[ + objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193"), + objects.Source(objects.SourcePages.ENCYCLOPAEDIA_METALLUM, "https://www.metal-archives.com/bands/Ghost_Bath/3540372489") + ] ) artist = EncyclopaediaMetallum.fetch_details(artist) @@ -34,4 +38,4 @@ def fetch_album(): if __name__ == "__main__": - search() + fetch_artist() diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index c6d0adc..8aa25f2 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -36,38 +36,42 @@ class Page: @classmethod def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ requests.Response]: + retry = False try: r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT) except requests.exceptions.Timeout: - return None + retry = True - if r.status_code in accepted_response_codes: + if not retry and r.status_code in accepted_response_codes: return r - LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})") + LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})") LOGGER.debug(r.content) - if trie <= cls.TRIES: + if trie >= cls.TRIES: LOGGER.warning("to many tries. Aborting.") + return None return cls.get_request(url, accepted_response_codes, trie + 1) @classmethod def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ requests.Response]: + retry = False try: r = cls.API_SESSION.post(url, json=json, timeout=cls.TIMEOUT) except requests.exceptions.Timeout: - return None + retry = True - if r.status_code in accepted_response_codes: + if not retry and r.status_code in accepted_response_codes: return r - LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})") + LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at POST:{url}. ({trie}-{cls.TRIES})") LOGGER.debug(r.content) - if trie <= cls.TRIES: + if trie >= cls.TRIES: LOGGER.warning("to many tries. Aborting.") + return None return cls.post_request(url, accepted_response_codes, trie + 1) @@ -163,7 +167,7 @@ class Page: new_music_object: DatabaseObject = type(music_object)() source: Source - for source in music_object.source_collection: + for source in music_object.source_collection.get_sources_from_page(cls.SOURCE_TYPE): new_music_object.merge(cls._fetch_object_from_source(source=source, obj_type=type(music_object), stop_at_level=stop_at_level)) collections = { diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 65b6aa4..f8f4450 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -299,12 +299,7 @@ class EncyclopaediaMetallum(Page): return artist @classmethod - def fetch_artist_attributes(cls, artist: Artist, url: str) -> Artist: - r = cls.get_request(url) - if r is None: - return artist - soup: BeautifulSoup = cls.get_soup_from_response(r) - + def _parse_artist_attributes(cls, artist_soup: BeautifulSoup) -> Artist: country: pycountry.Countrie = None formed_in_year: int = None genre: str = None @@ -312,7 +307,7 @@ class EncyclopaediaMetallum(Page): label_name: str = None label_url: str = None - band_stat_soup = soup.find("div", {"id": "band_stats"}) + band_stat_soup = artist_soup.find("div", {"id": "band_stats"}) for dl_soup in band_stat_soup.find_all("dl"): for title, data in zip(dl_soup.find_all("dt"), dl_soup.find_all("dd")): title_text = title.text @@ -320,7 +315,6 @@ class EncyclopaediaMetallum(Page): if "Country of origin:" == title_text: href = data.find('a').get('href') country = pycountry.countries.get(alpha_2=href.split("/")[-1]) - artist.country = country continue # not needed: Location: Minot, North Dakota @@ -335,15 +329,12 @@ class EncyclopaediaMetallum(Page): if not data.text.isnumeric(): continue formed_in_year = int(data.text) - artist.formed_in = ID3Timestamp(year=formed_in_year) continue if "Genre:" == title_text: genre = data.text - artist.general_genre = genre continue if "Lyrical themes:" == title_text: lyrical_themes = data.text.split(", ") - artist.lyrical_themes = lyrical_themes continue if "Current label:" == title_text: label_name = data.text @@ -354,23 +345,37 @@ class EncyclopaediaMetallum(Page): label_id = None if type(label_url) is str and "/" in label_url: label_id = label_url.split("/")[-1] - - artist.label_collection.append( - Label( - _id=label_id, - name=label_name, - source_list=[ - Source(cls.SOURCE_TYPE, label_url) - ] - )) - """ + TODO years active: 2012-present process this and add field to class """ - return artist + return Artist( + country=country, + formed_in=ID3Timestamp(year=formed_in_year), + general_genre=genre, + lyrical_themes=lyrical_themes, + label_list=[ + Label( + name=label_name, + source_list=[ + Source(cls.SOURCE_TYPE, label_url) + ] + ) + ] + ) + + @classmethod + def _fetch_artist_attributes(cls, url: str) -> Artist: + print(url) + r = cls.get_request(url) + if r is None: + return Artist() + soup: BeautifulSoup = cls.get_soup_from_response(r) + + return cls._parse_artist_attributes(artist_soup=soup) @classmethod def fetch_band_notes(cls, artist: Artist, ma_artist_id: str) -> Artist: @@ -385,6 +390,23 @@ class EncyclopaediaMetallum(Page): artist.notes.html = r.text return artist + @classmethod + def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist: + """ + What it could fetch, and what is implemented: + + [x] https://www.metal-archives.com/bands/Ghost_Bath/3540372489 + [x] https://www.metal-archives.com/band/discography/id/3540372489/tab/all + [] reviews: https://www.metal-archives.com/review/ajax-list-band/id/3540372489/json/1?sEcho=1&iColumns=4&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&iSortCol_0=3&sSortDir_0=desc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&_=1675155257133 + [] simmilar: https://www.metal-archives.com/band/ajax-recommendations/id/3540372489 + [x] sources: https://www.metal-archives.com/link/ajax-list/type/band/id/3540372489 + [x] band notes: https://www.metal-archives.com/band/read-more/id/3540372489 + """ + + artist = cls._fetch_artist_attributes(source.url) + + return artist + @classmethod def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist: source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE) @@ -406,7 +428,7 @@ class EncyclopaediaMetallum(Page): """ # SIMPLE METADATA - artist = cls.fetch_artist_attributes(artist, source.url) + artist = cls._fetch_artist_attributes(artist, source.url) # DISCOGRAPHY artist = cls.fetch_artist_discography(artist, artist_id, flat=flat)