2023-01-23 23:16:10 +00:00
|
|
|
from typing import List
|
2023-01-24 08:40:01 +00:00
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
from ..utils.shared import (
|
|
|
|
ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER
|
|
|
|
)
|
2023-01-23 23:16:10 +00:00
|
|
|
|
|
|
|
from .abstract import Page
|
2023-01-24 08:40:01 +00:00
|
|
|
from ..database import (
|
|
|
|
MusicObject,
|
|
|
|
Artist,
|
|
|
|
Source,
|
2023-01-24 09:51:41 +00:00
|
|
|
SourcePages,
|
2023-01-24 11:09:47 +00:00
|
|
|
Song,
|
|
|
|
Album
|
2023-01-24 08:40:01 +00:00
|
|
|
)
|
2023-01-23 23:16:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
class EncyclopaediaMetallum(Page):
|
2023-01-24 08:40:01 +00:00
|
|
|
API_SESSION: requests.Session = requests.Session()
|
|
|
|
API_SESSION.headers = {
|
|
|
|
"Host": "www.metal-archives.com",
|
|
|
|
"Connection": "keep-alive"
|
|
|
|
}
|
|
|
|
|
2023-01-24 17:15:07 +00:00
|
|
|
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
|
|
|
|
|
2023-01-23 23:16:10 +00:00
|
|
|
@classmethod
|
|
|
|
def search_by_query(cls, query: str) -> List[MusicObject]:
|
|
|
|
query_obj = cls.Query(query)
|
|
|
|
|
|
|
|
if query_obj.is_raw:
|
|
|
|
return cls.simple_search(query_obj)
|
2023-01-24 09:51:41 +00:00
|
|
|
return cls.advanced_search(query_obj)
|
2023-01-23 23:16:10 +00:00
|
|
|
|
|
|
|
@classmethod
|
2023-01-24 09:51:41 +00:00
|
|
|
def advanced_search(cls, query: Page.Query) -> List[MusicObject]:
|
|
|
|
if query.song is not None:
|
|
|
|
return cls.search_for_song(query=query)
|
2023-01-24 13:29:23 +00:00
|
|
|
if query.album is not None:
|
|
|
|
return cls.search_for_album(query=query)
|
|
|
|
if query.artist is not None:
|
|
|
|
return cls.search_for_artist(query=query)
|
2023-01-24 09:51:41 +00:00
|
|
|
return []
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def search_for_song(cls, query: Page.Query) -> List[Song]:
|
2023-01-30 22:54:21 +00:00
|
|
|
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \
|
|
|
|
"artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \
|
|
|
|
"&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
|
|
|
|
"=1674550595663"
|
2023-01-30 17:27:49 +00:00
|
|
|
|
2023-01-24 09:51:41 +00:00
|
|
|
r = cls.API_SESSION.get(endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str))
|
|
|
|
if r.status_code != 200:
|
2023-01-30 17:27:49 +00:00
|
|
|
LOGGER.warning(
|
|
|
|
f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")
|
2023-01-24 09:51:41 +00:00
|
|
|
return []
|
|
|
|
|
2023-01-24 11:09:47 +00:00
|
|
|
return [cls.get_song_from_json(
|
|
|
|
artist_html=raw_song[0],
|
|
|
|
album_html=raw_song[1],
|
|
|
|
release_type=raw_song[2],
|
2023-01-24 17:15:07 +00:00
|
|
|
title=raw_song[3],
|
|
|
|
lyrics_html=raw_song[4]
|
2023-01-24 11:09:47 +00:00
|
|
|
) for raw_song in r.json()['aaData']]
|
2023-01-24 09:51:41 +00:00
|
|
|
|
2023-01-24 13:29:23 +00:00
|
|
|
@classmethod
|
|
|
|
def search_for_album(cls, query: Page.Query) -> List[Album]:
|
2023-01-30 22:54:21 +00:00
|
|
|
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
|
|
|
|
"artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
|
|
|
|
"=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
|
|
|
|
"=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
|
|
|
|
"=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
|
2023-01-30 17:27:49 +00:00
|
|
|
|
2023-01-24 13:29:23 +00:00
|
|
|
r = cls.API_SESSION.get(endpoint.format(artist=query.artist_str, album=query.album_str))
|
|
|
|
if r.status_code != 200:
|
2023-01-30 17:27:49 +00:00
|
|
|
LOGGER.warning(
|
|
|
|
f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")
|
2023-01-24 13:29:23 +00:00
|
|
|
return []
|
|
|
|
|
|
|
|
return [cls.get_album_from_json(
|
|
|
|
artist_html=raw_album[0],
|
|
|
|
album_html=raw_album[1],
|
|
|
|
release_type=[2]
|
|
|
|
) for raw_album in r.json()['aaData']]
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def search_for_artist(cls, query: Page.Query) -> List[Artist]:
|
|
|
|
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
|
2023-01-30 17:27:49 +00:00
|
|
|
|
2023-01-24 13:29:23 +00:00
|
|
|
r = cls.API_SESSION.get(endpoint.format(artist=query.artist))
|
|
|
|
if r.status_code != 200:
|
|
|
|
LOGGER.warning(f"code {r.status_code} at {endpoint.format(artist=query.artist)}")
|
|
|
|
return []
|
|
|
|
|
|
|
|
return [
|
2023-01-30 22:54:21 +00:00
|
|
|
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
|
2023-01-24 13:29:23 +00:00
|
|
|
for raw_artist in r.json()['aaData']
|
|
|
|
]
|
|
|
|
|
2023-01-24 09:51:41 +00:00
|
|
|
@classmethod
|
|
|
|
def simple_search(cls, query: Page.Query) -> List[Artist]:
|
2023-01-24 08:40:01 +00:00
|
|
|
"""
|
|
|
|
Searches the default endpoint from metal archives, which intern searches only
|
|
|
|
for bands, but it is the default, thus I am rolling with it
|
|
|
|
"""
|
|
|
|
endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"
|
|
|
|
|
|
|
|
r = cls.API_SESSION.get(endpoint.format(query=query))
|
|
|
|
if r.status_code != 200:
|
|
|
|
LOGGER.warning(f"code {r.status_code} at {endpoint.format(query=query.query)}")
|
|
|
|
return []
|
|
|
|
|
2023-01-24 09:51:41 +00:00
|
|
|
return [
|
2023-01-30 22:54:21 +00:00
|
|
|
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
|
2023-01-24 09:51:41 +00:00
|
|
|
for raw_artist in r.json()['aaData']
|
|
|
|
]
|
2023-01-24 08:40:01 +00:00
|
|
|
|
|
|
|
@classmethod
|
2023-01-30 22:54:21 +00:00
|
|
|
def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:
|
2023-01-24 08:40:01 +00:00
|
|
|
"""
|
|
|
|
TODO parse the country to a standart
|
|
|
|
"""
|
|
|
|
# parse the html
|
|
|
|
# parse the html for the band name and link on metal-archives
|
2023-01-30 22:54:21 +00:00
|
|
|
soup = BeautifulSoup(artist_html, 'html.parser')
|
2023-01-24 08:40:01 +00:00
|
|
|
anchor = soup.find('a')
|
|
|
|
artist_name = anchor.text
|
|
|
|
artist_url = anchor.get('href')
|
2023-01-30 22:54:21 +00:00
|
|
|
artist_id = artist_url.split("/")[-1]
|
2023-01-24 08:40:01 +00:00
|
|
|
|
|
|
|
notes = f"{artist_name} is a {genre} band from {country}"
|
|
|
|
|
|
|
|
anchor.decompose()
|
|
|
|
strong = soup.find('strong')
|
|
|
|
if strong is not None:
|
|
|
|
strong.decompose()
|
|
|
|
akronyms_ = soup.text[2:-2].split(', ')
|
|
|
|
notes += f"aka {akronyms_}"
|
|
|
|
notes += "."
|
|
|
|
|
|
|
|
return Artist(
|
|
|
|
id_=artist_id,
|
|
|
|
name=artist_name,
|
2023-01-30 22:54:21 +00:00
|
|
|
source_list=[
|
2023-01-24 08:40:01 +00:00
|
|
|
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
|
|
|
|
],
|
2023-01-30 17:27:49 +00:00
|
|
|
notes=notes
|
2023-01-24 08:40:01 +00:00
|
|
|
)
|
2023-01-24 11:09:47 +00:00
|
|
|
|
|
|
|
@classmethod
|
2023-01-24 13:29:23 +00:00
|
|
|
def get_album_from_json(cls, album_html=None, release_type=None, artist_html=None) -> Album:
|
2023-01-24 11:09:47 +00:00
|
|
|
# parse the html
|
|
|
|
# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
|
|
|
|
soup = BeautifulSoup(album_html, 'html.parser')
|
|
|
|
anchor = soup.find('a')
|
|
|
|
album_name = anchor.text
|
|
|
|
album_url = anchor.get('href')
|
2023-01-30 22:54:21 +00:00
|
|
|
album_id = album_url.split("/")[-1]
|
2023-01-24 11:09:47 +00:00
|
|
|
|
|
|
|
"""
|
|
|
|
TODO implement release type
|
|
|
|
"""
|
|
|
|
return Album(
|
|
|
|
id_=album_id,
|
|
|
|
title=album_name,
|
2023-01-30 22:54:21 +00:00
|
|
|
source_list=[
|
2023-01-24 11:09:47 +00:00
|
|
|
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
|
2023-01-30 22:54:21 +00:00
|
|
|
],
|
|
|
|
artists=[
|
|
|
|
cls.get_artist_from_json(artist_html=artist_html)
|
2023-01-24 11:09:47 +00:00
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
@classmethod
|
2023-01-30 17:27:49 +00:00
|
|
|
def get_song_from_json(cls, artist_html=None, album_html=None, release_type=None, title=None,
|
|
|
|
lyrics_html=None) -> Song:
|
2023-01-24 17:15:07 +00:00
|
|
|
song_id = None
|
|
|
|
if lyrics_html is not None:
|
|
|
|
soup = BeautifulSoup(lyrics_html, 'html.parser')
|
|
|
|
anchor = soup.find('a')
|
|
|
|
raw_song_id = anchor.get('id')
|
|
|
|
song_id = raw_song_id.replace("lyricsLink_", "")
|
2023-01-30 17:27:49 +00:00
|
|
|
|
2023-01-24 11:09:47 +00:00
|
|
|
return Song(
|
2023-01-24 17:15:07 +00:00
|
|
|
id_=song_id,
|
2023-01-24 11:09:47 +00:00
|
|
|
title=title,
|
|
|
|
main_artist_list=[
|
2023-01-30 22:54:21 +00:00
|
|
|
cls.get_artist_from_json(artist_html=artist_html)
|
2023-01-24 11:09:47 +00:00
|
|
|
],
|
2023-01-30 22:54:21 +00:00
|
|
|
album=cls.get_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html),
|
|
|
|
source_list=[
|
|
|
|
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
|
|
|
|
]
|
2023-01-24 11:09:47 +00:00
|
|
|
)
|
2023-01-24 17:15:07 +00:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def fetch_artist_details(cls, artist: Artist) -> Artist:
|
2023-01-30 22:54:21 +00:00
|
|
|
source_list = artist.get_sources_from_page(cls.SOURCE_TYPE)
|
|
|
|
if len(source_list) == 0:
|
2023-01-24 17:15:07 +00:00
|
|
|
return artist
|
2023-01-30 17:27:49 +00:00
|
|
|
|
2023-01-30 22:54:21 +00:00
|
|
|
# taking the fist source, cuz I only need one and multiple sources don't make that much sense
|
|
|
|
source = source_list[0]
|
|
|
|
print(source)
|
|
|
|
|
2023-01-30 17:27:49 +00:00
|
|
|
return artist
|