improved metal archives

This commit is contained in:
Hellow2 2023-04-05 09:20:25 +02:00
parent 357038f8f6
commit 726da7fbe0

View File

@ -5,10 +5,8 @@ from bs4 import BeautifulSoup
import pycountry import pycountry
from urllib.parse import urlparse from urllib.parse import urlparse
from ..utils.shared import ( from ..utils.shared import ENCYCLOPAEDIA_METALLUM_LOGGER
ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER from ..utils import string_processing
)
from .abstract import Page from .abstract import Page
from ..objects import ( from ..objects import (
Lyrics, Lyrics,
@ -23,9 +21,6 @@ from ..objects import (
Options, Options,
AlbumType AlbumType
) )
from ..utils import (
string_processing
)
class EncyclopaediaMetallum(Page): class EncyclopaediaMetallum(Page):
@ -38,11 +33,17 @@ class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, { ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
"EP": AlbumType.EP,
"Full-length": AlbumType.STUDIO_ALBUM, "Full-length": AlbumType.STUDIO_ALBUM,
"Single": AlbumType.SINGLE "Single": AlbumType.SINGLE,
"EP": AlbumType.EP,
"Demo": AlbumType.DEMO,
"Video": AlbumType.OTHER,
"Live album": AlbumType.LIVE_ALBUM,
"Compilation": AlbumType.COMPILATION_ALBUM
}) })
LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
@classmethod @classmethod
def search_by_query(cls, query: str) -> Options: def search_by_query(cls, query: str) -> Options:
query_obj = cls.Query(query) query_obj = cls.Query(query)
@ -68,9 +69,9 @@ class EncyclopaediaMetallum(Page):
"&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \ "&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
"=1674550595663" "=1674550595663"
r = cls.API_SESSION.get(endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)) r = cls.get_request(endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str))
if r.status_code != 200: if r.status_code != 200:
LOGGER.warning( cls.LOGGER.warning(
f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}") f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")
return [] return []
@ -90,16 +91,16 @@ class EncyclopaediaMetallum(Page):
"=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \ "=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
"=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747" "=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
r = cls.API_SESSION.get(endpoint.format(artist=query.artist_str, album=query.album_str)) r = cls.get_request(endpoint.format(artist=query.artist_str, album=query.album_str))
if r.status_code != 200: if r.status_code != 200:
LOGGER.warning( cls.LOGGER.warning(
f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}") f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")
return [] return []
return [cls.get_album_from_json( return [cls.get_album_from_json(
artist_html=raw_album[0], artist_html=raw_album[0],
album_html=raw_album[1], album_html=raw_album[1],
release_type=[2] release_type=raw_album[2]
) for raw_album in r.json()['aaData']] ) for raw_album in r.json()['aaData']]
@classmethod @classmethod
@ -109,7 +110,10 @@ class EncyclopaediaMetallum(Page):
"=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \ "=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \
"&mDataProp_1=1&mDataProp_2=2&_=1674565459976" "&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
r = cls.API_SESSION.get(endpoint.format(artist=query.artist)) r = cls.get_request(endpoint.format(artist=query.artist))
if r is None:
return []
data_key = 'aaData' data_key = 'aaData'
parsed_data = r.json() parsed_data = r.json()
@ -129,9 +133,8 @@ class EncyclopaediaMetallum(Page):
""" """
endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2" endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"
r = cls.API_SESSION.get(endpoint.format(query=query)) r = cls.get_request(endpoint.format(query=query))
if r.status_code != 200: if r is None:
LOGGER.warning(f"code {r.status_code} at {endpoint.format(query=query.query)}")
return [] return []
return [ return [
@ -152,23 +155,17 @@ class EncyclopaediaMetallum(Page):
artist_url = anchor.get('href') artist_url = anchor.get('href')
artist_id = artist_url.split("/")[-1] artist_id = artist_url.split("/")[-1]
notes = f"{artist_name} is a {genre} band from {country}"
anchor.decompose() anchor.decompose()
strong = soup.find('strong') strong = soup.find('strong')
if strong is not None: if strong is not None:
strong.decompose() strong.decompose()
akronyms_ = soup.text[2:-2].split(', ') akronyms_ = soup.text[2:-2].split(', ')
notes += f"aka {akronyms_}"
notes += "."
return Artist( return Artist(
id_=artist_id,
name=artist_name, name=artist_name,
source_list=[ source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url) Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
], ]
notes=FormattedText(plaintext=notes)
) )
@classmethod @classmethod
@ -181,12 +178,11 @@ class EncyclopaediaMetallum(Page):
album_url = anchor.get('href') album_url = anchor.get('href')
album_id = album_url.split("/")[-1] album_id = album_url.split("/")[-1]
""" album_type = cls.ALBUM_TYPE_MAP[release_type.strip()]
TODO implement release type
"""
return Album( return Album(
id_=album_id,
title=album_name, title=album_name,
album_type=album_type,
source_list=[ source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url) Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
], ],
@ -206,7 +202,6 @@ class EncyclopaediaMetallum(Page):
song_id = raw_song_id.replace("lyricsLink_", "") song_id = raw_song_id.replace("lyricsLink_", "")
return Song( return Song(
id_=song_id,
title=title, title=title,
main_artist_list=[ main_artist_list=[
cls.get_artist_from_json(artist_html=artist_html) cls.get_artist_from_json(artist_html=artist_html)
@ -249,7 +244,6 @@ class EncyclopaediaMetallum(Page):
discography.append( discography.append(
Album( Album(
id_=album_id,
title=album_name, title=album_name,
date=date_obj, date=date_obj,
album_type=cls.ALBUM_TYPE_MAP[raw_album_type], album_type=cls.ALBUM_TYPE_MAP[raw_album_type],
@ -312,7 +306,7 @@ class EncyclopaediaMetallum(Page):
if title_text.count(bad_name_substring) == 1: if title_text.count(bad_name_substring) == 1:
name = title_text.replace(bad_name_substring, "") name = title_text.replace(bad_name_substring, "")
else: else:
LOGGER.debug(f"the title of the page is \"{title_text}\"") cls.LOGGER.debug(f"the title of the page is \"{title_text}\"")
""" """
TODO TODO
@ -528,7 +522,7 @@ class EncyclopaediaMetallum(Page):
album_name = anchor.get_text(strip=True) album_name = anchor.get_text(strip=True)
elif len(album_soup_list) > 1: elif len(album_soup_list) > 1:
LOGGER.debug("there are more than 1 album soups") cls.LOGGER.debug("there are more than 1 album soups")
artist_soup_list = album_info_soup.find_all("h2", {"class": "band_name"}) artist_soup_list = album_info_soup.find_all("h2", {"class": "band_name"})
@ -548,7 +542,7 @@ class EncyclopaediaMetallum(Page):
)) ))
elif len(artist_soup_list) > 1: elif len(artist_soup_list) > 1:
LOGGER.debug("there are more than 1 artist soups") cls.LOGGER.debug("there are more than 1 artist soups")
_parse_album_info(album_info_soup=album_soup.find(id="album_info")) _parse_album_info(album_info_soup=album_soup.find(id="album_info"))