continued refactoring and added fetching of discography again
This commit is contained in:
parent
7ae7aa87fd
commit
ec9bbf15d3
@ -1,4 +1,5 @@
|
|||||||
from typing import List
|
from collections import defaultdict
|
||||||
|
from typing import List, Optional, Dict
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import pycountry
|
import pycountry
|
||||||
@ -18,7 +19,8 @@ from ..objects import (
|
|||||||
ID3Timestamp,
|
ID3Timestamp,
|
||||||
FormattedText,
|
FormattedText,
|
||||||
Label,
|
Label,
|
||||||
Options
|
Options,
|
||||||
|
AlbumType
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
string_processing
|
string_processing
|
||||||
@ -34,6 +36,12 @@ class EncyclopaediaMetallum(Page):
|
|||||||
|
|
||||||
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
|
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
|
||||||
|
|
||||||
|
ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
|
||||||
|
"EP": AlbumType.EP,
|
||||||
|
"Full-length": AlbumType.STUDIO_ALBUM,
|
||||||
|
"Single": AlbumType.SINGLE
|
||||||
|
})
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def search_by_query(cls, query: str) -> Options:
|
def search_by_query(cls, query: str) -> Options:
|
||||||
query_obj = cls.Query(query)
|
query_obj = cls.Query(query)
|
||||||
@ -211,22 +219,16 @@ class EncyclopaediaMetallum(Page):
|
|||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetch_artist_discography(cls, artist: Artist, ma_artist_id: str, flat: bool = False) -> Artist:
|
def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]:
|
||||||
"""
|
|
||||||
TODO
|
|
||||||
I'd guess this funktion has quite some possibility for optimizations
|
|
||||||
in form of performance and clean code
|
|
||||||
"""
|
|
||||||
discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
|
discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
|
||||||
|
|
||||||
# make the request
|
# make the request
|
||||||
r = cls.API_SESSION.get(discography_url.format(ma_artist_id))
|
r = cls.get_request(discography_url.format(ma_artist_id))
|
||||||
if r.status_code != 200:
|
if r is None:
|
||||||
LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}")
|
return []
|
||||||
return artist
|
soup = cls.get_soup_from_response(r)
|
||||||
|
|
||||||
# parse the html
|
discography = []
|
||||||
soup = BeautifulSoup(r.text, 'html.parser')
|
|
||||||
|
|
||||||
tbody_soup = soup.find('tbody')
|
tbody_soup = soup.find('tbody')
|
||||||
for tr_soup in tbody_soup.find_all('tr'):
|
for tr_soup in tbody_soup.find_all('tr'):
|
||||||
@ -236,7 +238,7 @@ class EncyclopaediaMetallum(Page):
|
|||||||
album_name = album_soup.text
|
album_name = album_soup.text
|
||||||
album_url = album_soup.find('a').get('href')
|
album_url = album_soup.find('a').get('href')
|
||||||
album_id = album_url.split('/')[-1]
|
album_id = album_url.split('/')[-1]
|
||||||
album_type = td_list[1].text
|
raw_album_type = td_list[1].text
|
||||||
album_year = td_list[2].text
|
album_year = td_list[2].text
|
||||||
date_obj = None
|
date_obj = None
|
||||||
try:
|
try:
|
||||||
@ -244,36 +246,29 @@ class EncyclopaediaMetallum(Page):
|
|||||||
except ValueError():
|
except ValueError():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
artist.main_album_collection.append(
|
discography.append(
|
||||||
Album(
|
Album(
|
||||||
id_=album_id,
|
id_=album_id,
|
||||||
title=album_name,
|
title=album_name,
|
||||||
album_type=album_type,
|
|
||||||
date=date_obj,
|
date=date_obj,
|
||||||
|
album_type=cls.ALBUM_TYPE_MAP[raw_album_type],
|
||||||
source_list=[Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)]
|
source_list=[Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if not flat:
|
return discography
|
||||||
for album in artist.main_album_collection:
|
|
||||||
cls.fetch_album_details(album, flat=flat)
|
|
||||||
|
|
||||||
return artist
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetch_artist_sources(cls, artist: Artist, ma_artist_id: str) -> Artist:
|
def _fetch_artist_sources(cls, ma_artist_id: str) -> List[Source]:
|
||||||
sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"
|
sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"
|
||||||
|
r = cls.get_request(sources_url.format(ma_artist_id))
|
||||||
|
if r is None:
|
||||||
|
return []
|
||||||
|
|
||||||
# make the request
|
soup = cls.get_soup_from_response(r)
|
||||||
r = cls.API_SESSION.get(sources_url.format(ma_artist_id))
|
|
||||||
if r.status_code != 200:
|
|
||||||
LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")
|
|
||||||
return artist
|
|
||||||
|
|
||||||
soup = BeautifulSoup(r.text, 'html.parser')
|
if soup.find("span", {"id": "noLinks"}) is not None:
|
||||||
|
return []
|
||||||
if soup.find("span",{"id": "noLinks"}) is not None:
|
|
||||||
return artist
|
|
||||||
|
|
||||||
artist_source = soup.find("div", {"id": "band_links_Official"})
|
artist_source = soup.find("div", {"id": "band_links_Official"})
|
||||||
"""
|
"""
|
||||||
@ -285,18 +280,18 @@ class EncyclopaediaMetallum(Page):
|
|||||||
merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"})
|
merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"})
|
||||||
label_source = soup.find("div", {"id": "band_links_Labels"})
|
label_source = soup.find("div", {"id": "band_links_Labels"})
|
||||||
|
|
||||||
|
source_list = []
|
||||||
|
|
||||||
if artist_source is not None:
|
if artist_source is not None:
|
||||||
for tr in artist_source.find_all("td"):
|
for tr in artist_source.find_all("td"):
|
||||||
a = tr.find("a")
|
a = tr.find("a")
|
||||||
url = a.get("href")
|
url = a.get("href")
|
||||||
|
if url is None:
|
||||||
source = Source.match_url(url)
|
|
||||||
if source is None:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
artist.add_source(source)
|
source_list.append(Source.match_url(url))
|
||||||
|
|
||||||
return artist
|
return source_list
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _parse_artist_attributes(cls, artist_soup: BeautifulSoup) -> Artist:
|
def _parse_artist_attributes(cls, artist_soup: BeautifulSoup) -> Artist:
|
||||||
@ -350,7 +345,6 @@ class EncyclopaediaMetallum(Page):
|
|||||||
|
|
||||||
name = anchor.get_text(strip=True)
|
name = anchor.get_text(strip=True)
|
||||||
|
|
||||||
|
|
||||||
band_stat_soup = artist_soup.find("div", {"id": "band_stats"})
|
band_stat_soup = artist_soup.find("div", {"id": "band_stats"})
|
||||||
for dl_soup in band_stat_soup.find_all("dl"):
|
for dl_soup in band_stat_soup.find_all("dl"):
|
||||||
for title, data in zip(dl_soup.find_all("dt"), dl_soup.find_all("dd")):
|
for title, data in zip(dl_soup.find_all("dt"), dl_soup.find_all("dd")):
|
||||||
@ -423,17 +417,15 @@ class EncyclopaediaMetallum(Page):
|
|||||||
return cls._parse_artist_attributes(artist_soup=soup)
|
return cls._parse_artist_attributes(artist_soup=soup)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetch_band_notes(cls, artist: Artist, ma_artist_id: str) -> Artist:
|
def _fetch_band_notes(cls, ma_artist_id: str) -> Optional[FormattedText]:
|
||||||
endpoint = "https://www.metal-archives.com/band/read-more/id/{}"
|
endpoint = "https://www.metal-archives.com/band/read-more/id/{}"
|
||||||
|
|
||||||
# make the request
|
# make the request
|
||||||
r = cls.API_SESSION.get(endpoint.format(ma_artist_id))
|
r = cls.get_request(endpoint.format(ma_artist_id))
|
||||||
if r.status_code != 200:
|
if r is None:
|
||||||
LOGGER.warning(f"code {r.status_code} at {endpoint.format(ma_artist_id)}")
|
return FormattedText()
|
||||||
return artist
|
|
||||||
|
|
||||||
artist.notes.html = r.text
|
return FormattedText(html=r.text)
|
||||||
return artist
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
|
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
|
||||||
@ -450,62 +442,45 @@ class EncyclopaediaMetallum(Page):
|
|||||||
|
|
||||||
artist = cls._fetch_artist_attributes(source.url)
|
artist = cls._fetch_artist_attributes(source.url)
|
||||||
|
|
||||||
return artist
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist:
|
|
||||||
source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
|
|
||||||
if len(source_list) == 0:
|
|
||||||
return artist
|
|
||||||
|
|
||||||
# taking the fist source, cuz I only need one and multiple sources don't make that much sense
|
|
||||||
source = source_list[0]
|
|
||||||
artist_id = source.url.split("/")[-1]
|
artist_id = source.url.split("/")[-1]
|
||||||
|
|
||||||
"""
|
artist_sources = cls._fetch_artist_sources(artist_id)
|
||||||
TODO
|
artist.source_collection.extend(artist_sources)
|
||||||
[x] https://www.metal-archives.com/bands/Ghost_Bath/3540372489
|
|
||||||
[x] https://www.metal-archives.com/band/discography/id/3540372489/tab/all
|
|
||||||
[] reviews: https://www.metal-archives.com/review/ajax-list-band/id/3540372489/json/1?sEcho=1&iColumns=4&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&iSortCol_0=3&sSortDir_0=desc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&_=1675155257133
|
|
||||||
[] simmilar: https://www.metal-archives.com/band/ajax-recommendations/id/3540372489
|
|
||||||
[x] sources: https://www.metal-archives.com/link/ajax-list/type/band/id/3540372489
|
|
||||||
[x] band notes: https://www.metal-archives.com/band/read-more/id/3540372489
|
|
||||||
"""
|
|
||||||
|
|
||||||
# SIMPLE METADATA
|
band_notes = cls._fetch_band_notes(artist_id)
|
||||||
artist = cls._fetch_artist_attributes(artist, source.url)
|
if band_notes is not None:
|
||||||
|
artist.notes = band_notes
|
||||||
|
|
||||||
# DISCOGRAPHY
|
discography: List[Album] = cls._fetch_artist_discography(artist_id)
|
||||||
artist = cls.fetch_artist_discography(artist, artist_id, flat=flat)
|
if stop_at_level > 1:
|
||||||
|
for album in discography:
|
||||||
# EXTERNAL SOURCES
|
for source in album.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
|
||||||
artist = cls.fetch_artist_sources(artist, artist_id)
|
album.merge(cls._fetch_album_from_source(source, stop_at_level=stop_at_level-1))
|
||||||
|
artist.main_album_collection.extend(discography)
|
||||||
# ARTIST NOTES
|
|
||||||
artist = cls.fetch_band_notes(artist, artist_id)
|
|
||||||
|
|
||||||
return artist
|
return artist
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetch_album_details(cls, album: Album, flat: bool = False) -> Album:
|
def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album:
|
||||||
source_list = album.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
|
"""
|
||||||
if len(source_list) == 0:
|
I am preeeety sure I can get way more data than... nothing from there
|
||||||
return album
|
|
||||||
|
|
||||||
source = source_list[0]
|
:param source:
|
||||||
album_id = source.url.split("/")[-1]
|
:param stop_at_level:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
# <table class="display table_lyrics
|
# <table class="display table_lyrics
|
||||||
|
|
||||||
r = cls.API_SESSION.get(source.url)
|
album = Album()
|
||||||
if r.status_code != 200:
|
|
||||||
LOGGER.warning(f"code {r.status_code} at {source.url}")
|
r = cls.get_request(source.url)
|
||||||
|
if r is None:
|
||||||
return album
|
return album
|
||||||
|
|
||||||
soup = BeautifulSoup(r.text, 'html.parser')
|
soup = cls.get_soup_from_response(r)
|
||||||
|
|
||||||
tracklist_soup = soup.find("table", {"class": "table_lyrics"}).find("tbody")
|
tracklist_soup = soup.find("table", {"class": "table_lyrics"}).find("tbody")
|
||||||
|
|
||||||
for row in tracklist_soup.find_all("tr", {"class": ["even", "odd"]}):
|
for row in tracklist_soup.find_all("tr", {"class": ["even", "odd"]}):
|
||||||
"""
|
"""
|
||||||
example of row:
|
example of row:
|
||||||
@ -532,7 +507,7 @@ class EncyclopaediaMetallum(Page):
|
|||||||
duration_stamp = row_list[2].text
|
duration_stamp = row_list[2].text
|
||||||
if ":" in duration_stamp:
|
if ":" in duration_stamp:
|
||||||
minutes, seconds = duration_stamp.split(":")
|
minutes, seconds = duration_stamp.split(":")
|
||||||
length = (int(minutes) * 60 + int(seconds))*1000 # in milliseconds
|
length = (int(minutes) * 60 + int(seconds)) * 1000 # in milliseconds
|
||||||
|
|
||||||
album.song_collection.append(
|
album.song_collection.append(
|
||||||
Song(
|
Song(
|
||||||
@ -545,16 +520,3 @@ class EncyclopaediaMetallum(Page):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return album
|
return album
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fetch_song_details(cls, song: Song, flat: bool = False) -> Song:
|
|
||||||
source_list = song.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
|
|
||||||
if len(source_list) == 0:
|
|
||||||
return song
|
|
||||||
|
|
||||||
"""
|
|
||||||
TODO
|
|
||||||
lyrics
|
|
||||||
"""
|
|
||||||
|
|
||||||
return song
|
|
||||||
|
Loading…
Reference in New Issue
Block a user