much progress
This commit is contained in:
parent
51770ec0a9
commit
0fd743e81f
@ -29,6 +29,7 @@ from ..utils.support_classes import Query, DownloadResult, DefaultTarget
|
||||
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
|
||||
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
|
||||
|
||||
|
||||
def _clean_music_object(music_object: INDEPENDENT_DB_OBJECTS, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
|
||||
if type(music_object) == Label:
|
||||
return _clean_label(label=music_object, collections=collections)
|
||||
@ -105,8 +106,10 @@ class Page(threading.Thread):
|
||||
def run(self) -> None:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def get_soup_from_response(cls, r: requests.Response) -> BeautifulSoup:
|
||||
def get_source_type(self, source: Source) -> Optional[INDEPENDENT_DB_TYPES]:
|
||||
return None
|
||||
|
||||
def get_soup_from_response(self, r: requests.Response) -> BeautifulSoup:
|
||||
return BeautifulSoup(r.content, "html.parser")
|
||||
|
||||
# to search stuff
|
||||
@ -168,7 +171,7 @@ class Page(threading.Thread):
|
||||
new_music_object: DatabaseObject = type(music_object)()
|
||||
|
||||
# only certain database objects, have a source list
|
||||
if isinstance(music_object, INDEPENDENT_DB_TYPES):
|
||||
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
||||
source: Source
|
||||
for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE):
|
||||
new_music_object.merge(
|
||||
@ -181,34 +184,43 @@ class Page(threading.Thread):
|
||||
|
||||
return music_object
|
||||
|
||||
def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> DatabaseObject:
|
||||
obj_type = self._get_type_of_url(source.url)
|
||||
def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]:
|
||||
obj_type = self.get_source_type(source)
|
||||
|
||||
if obj_type is None:
|
||||
return None
|
||||
if enforce_type != obj_type and enforce_type is not None:
|
||||
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
|
||||
return None
|
||||
|
||||
music_object = cls._fetch_object_from_source(source=source, obj_type=obj_type, stop_at_level=stop_at_level)
|
||||
music_object: DatabaseObject = None
|
||||
|
||||
if post_process:
|
||||
fetch_map = {
|
||||
Song: self.fetch_song,
|
||||
Album: self.fetch_album,
|
||||
Artist: self.fetch_artist,
|
||||
Label: self.fetch_label
|
||||
}
|
||||
|
||||
if obj_type in fetch_map:
|
||||
music_object = fetch_map[obj_type](source, stop_at_level)
|
||||
|
||||
if post_process and music_object is not None:
|
||||
return post_process_object(music_object)
|
||||
|
||||
return music_object
|
||||
|
||||
@classmethod
|
||||
def _fetch_object_from_source(cls, source: Source,
|
||||
obj_type: Union[Type[Song], Type[Album], Type[Artist], Type[Label]],
|
||||
stop_at_level: int = 1) -> Union[Song, Album, Artist, Label]:
|
||||
if obj_type == Artist:
|
||||
return cls._fetch_artist_from_source(source=source, stop_at_level=stop_at_level)
|
||||
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
|
||||
return Song()
|
||||
|
||||
if obj_type == Song:
|
||||
return cls._fetch_song_from_source(source=source, stop_at_level=stop_at_level)
|
||||
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
|
||||
return Album()
|
||||
|
||||
if obj_type == Album:
|
||||
return cls._fetch_album_from_source(source=source, stop_at_level=stop_at_level)
|
||||
|
||||
if obj_type == Label:
|
||||
return cls._fetch_label_from_source(source=source, stop_at_level=stop_at_level)
|
||||
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
|
||||
return Artist()
|
||||
|
||||
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
|
||||
return Label()
|
||||
|
||||
@classmethod
|
||||
def download(
|
||||
@ -461,26 +473,6 @@ class Page(threading.Thread):
|
||||
|
||||
return r
|
||||
|
||||
@classmethod
|
||||
def _fetch_song_from_source(cls, source: Source, stop_at_level: int = 1) -> Song:
|
||||
return Song()
|
||||
|
||||
@classmethod
|
||||
def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album:
|
||||
return Album()
|
||||
|
||||
@classmethod
|
||||
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
|
||||
return Artist()
|
||||
|
||||
@classmethod
|
||||
def _fetch_label_from_source(cls, source: Source, stop_at_level: int = 1) -> Label:
|
||||
return Label()
|
||||
|
||||
@classmethod
|
||||
def _get_type_of_url(cls, url: str) -> Optional[Union[Type[Song], Type[Album], Type[Artist], Type[Label]]]:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _download_song_to_targets(cls, source: Source, target: Target, desc: str = None) -> DownloadResult:
|
||||
return DownloadResult()
|
||||
|
@ -218,16 +218,14 @@ class EncyclopaediaMetallum(Page):
|
||||
for raw_artist in r.json()['aaData']
|
||||
]
|
||||
|
||||
|
||||
@classmethod
|
||||
def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]:
|
||||
def _fetch_artist_discography(self, ma_artist_id: str) -> List[Album]:
|
||||
discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
|
||||
|
||||
# make the request
|
||||
r = cls.CONNECTION.get(discography_url.format(ma_artist_id))
|
||||
r = self.connection.get(discography_url.format(ma_artist_id))
|
||||
if r is None:
|
||||
return []
|
||||
soup = cls.get_soup_from_response(r)
|
||||
soup = self.get_soup_from_response(r)
|
||||
|
||||
discography = []
|
||||
|
||||
@ -251,21 +249,20 @@ class EncyclopaediaMetallum(Page):
|
||||
Album(
|
||||
title=album_name,
|
||||
date=date_obj,
|
||||
album_type=cls.ALBUM_TYPE_MAP[raw_album_type],
|
||||
source_list=[Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)]
|
||||
album_type=ALBUM_TYPE_MAP[raw_album_type],
|
||||
source_list=[Source(self.SOURCE_TYPE, album_url)]
|
||||
)
|
||||
)
|
||||
|
||||
return discography
|
||||
|
||||
@classmethod
|
||||
def _fetch_artist_sources(cls, ma_artist_id: str) -> List[Source]:
|
||||
def _fetch_artist_sources(self, ma_artist_id: str) -> List[Source]:
|
||||
sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"
|
||||
r = cls.CONNECTION.get(sources_url.format(ma_artist_id))
|
||||
r = self.connection.get(sources_url.format(ma_artist_id))
|
||||
if r is None:
|
||||
return []
|
||||
|
||||
soup = cls.get_soup_from_response(r)
|
||||
soup = self.get_soup_from_response(r)
|
||||
|
||||
if soup.find("span", {"id": "noLinks"}) is not None:
|
||||
return []
|
||||
@ -289,12 +286,11 @@ class EncyclopaediaMetallum(Page):
|
||||
if url is None:
|
||||
continue
|
||||
|
||||
source_list.append(Source.match_url(url, referer_page=cls.SOURCE_TYPE))
|
||||
source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE))
|
||||
|
||||
return source_list
|
||||
|
||||
@classmethod
|
||||
def _parse_artist_attributes(cls, artist_soup: BeautifulSoup) -> Artist:
|
||||
def _parse_artist_attributes(self, artist_soup: BeautifulSoup) -> Artist:
|
||||
name: str = None
|
||||
country: pycountry.Countrie = None
|
||||
formed_in_year: int = None
|
||||
@ -311,7 +307,7 @@ class EncyclopaediaMetallum(Page):
|
||||
if title_text.count(bad_name_substring) == 1:
|
||||
name = title_text.replace(bad_name_substring, "")
|
||||
else:
|
||||
cls.LOGGER.debug(f"the title of the page is \"{title_text}\"")
|
||||
self.LOGGER.debug(f"the title of the page is \"{title_text}\"")
|
||||
|
||||
"""
|
||||
TODO
|
||||
@ -341,7 +337,7 @@ class EncyclopaediaMetallum(Page):
|
||||
|
||||
href = anchor.get("href")
|
||||
if href is not None:
|
||||
source_list.append(Source(cls.SOURCE_TYPE, href))
|
||||
source_list.append(Source(self.SOURCE_TYPE, href))
|
||||
|
||||
name = anchor.get_text(strip=True)
|
||||
|
||||
@ -400,35 +396,32 @@ class EncyclopaediaMetallum(Page):
|
||||
Label(
|
||||
name=label_name,
|
||||
source_list=[
|
||||
Source(cls.SOURCE_TYPE, label_url)
|
||||
Source(self.SOURCE_TYPE, label_url)
|
||||
]
|
||||
)
|
||||
],
|
||||
source_list=source_list
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _fetch_artist_attributes(cls, url: str) -> Artist:
|
||||
r = cls.CONNECTION.get(url)
|
||||
def _fetch_artist_attributes(self, url: str) -> Artist:
|
||||
r = self.connection.get(url)
|
||||
if r is None:
|
||||
return Artist()
|
||||
soup: BeautifulSoup = cls.get_soup_from_response(r)
|
||||
soup: BeautifulSoup = self.get_soup_from_response(r)
|
||||
|
||||
return cls._parse_artist_attributes(artist_soup=soup)
|
||||
return self._parse_artist_attributes(artist_soup=soup)
|
||||
|
||||
@classmethod
|
||||
def _fetch_band_notes(cls, ma_artist_id: str) -> Optional[FormattedText]:
|
||||
def _fetch_band_notes(self, ma_artist_id: str) -> Optional[FormattedText]:
|
||||
endpoint = "https://www.metal-archives.com/band/read-more/id/{}"
|
||||
|
||||
# make the request
|
||||
r = cls.CONNECTION.get(endpoint.format(ma_artist_id))
|
||||
r = self.connection.get(endpoint.format(ma_artist_id))
|
||||
if r is None:
|
||||
return FormattedText()
|
||||
|
||||
return FormattedText(html=r.text)
|
||||
|
||||
@classmethod
|
||||
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
|
||||
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
|
||||
"""
|
||||
What it could fetch, and what is implemented:
|
||||
|
||||
@ -440,28 +433,27 @@ class EncyclopaediaMetallum(Page):
|
||||
[x] band notes: https://www.metal-archives.com/band/read-more/id/3540372489
|
||||
"""
|
||||
|
||||
artist = cls._fetch_artist_attributes(source.url)
|
||||
artist = self._fetch_artist_attributes(source.url)
|
||||
|
||||
artist_id = source.url.split("/")[-1]
|
||||
|
||||
artist_sources = cls._fetch_artist_sources(artist_id)
|
||||
artist_sources = self._fetch_artist_sources(artist_id)
|
||||
artist.source_collection.extend(artist_sources)
|
||||
|
||||
band_notes = cls._fetch_band_notes(artist_id)
|
||||
band_notes = self._fetch_band_notes(artist_id)
|
||||
if band_notes is not None:
|
||||
artist.notes = band_notes
|
||||
|
||||
discography: List[Album] = cls._fetch_artist_discography(artist_id)
|
||||
discography: List[Album] = self._fetch_artist_discography(artist_id)
|
||||
if stop_at_level > 1:
|
||||
for album in discography:
|
||||
for source in album.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
|
||||
album.merge(cls._fetch_album_from_source(source, stop_at_level=stop_at_level-1))
|
||||
for source in album.source_collection.get_sources_from_page(self.SOURCE_TYPE):
|
||||
album.merge(self._fetch_album_from_source(source, stop_at_level=stop_at_level-1))
|
||||
artist.main_album_collection.extend(discography)
|
||||
|
||||
return artist
|
||||
|
||||
@classmethod
|
||||
def _parse_album_track_row(cls, track_row: BeautifulSoup) -> Song:
|
||||
def _parse_album_track_row(self, track_row: BeautifulSoup) -> Song:
|
||||
"""
|
||||
<tr class="even">
|
||||
<td width="20"><a class="anchor" name="5948442"> </a>1.</td> # id and tracksort
|
||||
@ -482,7 +474,7 @@ class EncyclopaediaMetallum(Page):
|
||||
track_id = track_sort_soup.find("a").get("name").strip()
|
||||
|
||||
if track_row.find("a", {"href": f"#{track_id}"}) is not None:
|
||||
source_list.append(Source(cls.SOURCE_TYPE, track_id))
|
||||
source_list.append(Source(self.SOURCE_TYPE, track_id))
|
||||
|
||||
title = row_list[1].text.strip()
|
||||
|
||||
@ -500,9 +492,7 @@ class EncyclopaediaMetallum(Page):
|
||||
source_list=source_list
|
||||
)
|
||||
|
||||
|
||||
@classmethod
|
||||
def _parse_album_attributes(cls, album_soup: BeautifulSoup, stop_at_level: int = 1) -> Album:
|
||||
def _parse_album_attributes(self, album_soup: BeautifulSoup, stop_at_level: int = 1) -> Album:
|
||||
tracklist: List[Song] = []
|
||||
artist_list = []
|
||||
album_name: str = None
|
||||
@ -522,12 +512,12 @@ class EncyclopaediaMetallum(Page):
|
||||
|
||||
href = anchor.get("href")
|
||||
if href is not None:
|
||||
source_list.append(Source(cls.SOURCE_TYPE, href.strip()))
|
||||
source_list.append(Source(self.SOURCE_TYPE, href.strip()))
|
||||
|
||||
album_name = anchor.get_text(strip=True)
|
||||
|
||||
elif len(album_soup_list) > 1:
|
||||
cls.LOGGER.debug("there are more than 1 album soups")
|
||||
self.LOGGER.debug("there are more than 1 album soups")
|
||||
|
||||
|
||||
artist_soup_list = album_info_soup.find_all("h2", {"class": "band_name"})
|
||||
@ -537,7 +527,7 @@ class EncyclopaediaMetallum(Page):
|
||||
|
||||
href = anchor.get("href")
|
||||
if href is not None:
|
||||
artist_sources.append(Source(cls.SOURCE_TYPE, href.strip()))
|
||||
artist_sources.append(Source(self.SOURCE_TYPE, href.strip()))
|
||||
|
||||
artist_name = anchor.get_text(strip=True)
|
||||
|
||||
@ -547,13 +537,13 @@ class EncyclopaediaMetallum(Page):
|
||||
))
|
||||
|
||||
elif len(artist_soup_list) > 1:
|
||||
cls.LOGGER.debug("there are more than 1 artist soups")
|
||||
self.LOGGER.debug("there are more than 1 artist soups")
|
||||
|
||||
_parse_album_info(album_info_soup=album_soup.find(id="album_info"))
|
||||
|
||||
tracklist_soup = album_soup.find("table", {"class": "table_lyrics"}).find("tbody")
|
||||
for track_soup in tracklist_soup.find_all("tr", {"class": ["even", "odd"]}):
|
||||
tracklist.append(cls._parse_album_track_row(track_row=track_soup))
|
||||
tracklist.append(self._parse_album_track_row(track_row=track_soup))
|
||||
|
||||
return Album(
|
||||
title=album_name,
|
||||
@ -562,8 +552,7 @@ class EncyclopaediaMetallum(Page):
|
||||
song_list=tracklist
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album:
|
||||
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
|
||||
"""
|
||||
I am preeeety sure I can get way more data than... nothing from there
|
||||
|
||||
@ -574,23 +563,22 @@ class EncyclopaediaMetallum(Page):
|
||||
|
||||
# <table class="display table_lyrics
|
||||
|
||||
r = cls.CONNECTION.get(source.url)
|
||||
r = self.connection.get(source.url)
|
||||
if r is None:
|
||||
return Album()
|
||||
|
||||
soup = cls.get_soup_from_response(r)
|
||||
soup = self.get_soup_from_response(r)
|
||||
|
||||
album = cls._parse_album_attributes(soup, stop_at_level=stop_at_level)
|
||||
album = self._parse_album_attributes(soup, stop_at_level=stop_at_level)
|
||||
|
||||
if stop_at_level > 1:
|
||||
for song in album.song_collection:
|
||||
for source in song.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
|
||||
song.merge(cls._fetch_song_from_source(source=source, stop_at_level=stop_at_level-1))
|
||||
for source in song.source_collection.get_sources_from_page(self.SOURCE_TYPE):
|
||||
song.merge(self._fetch_song_from_source(source=source, stop_at_level=stop_at_level-1))
|
||||
|
||||
return album
|
||||
|
||||
@classmethod
|
||||
def _fetch_lyrics(cls, song_id: str) -> Optional[Lyrics]:
|
||||
def _fetch_lyrics(self, song_id: str) -> Optional[Lyrics]:
|
||||
"""
|
||||
function toggleLyrics(songId) {
|
||||
var lyricsRow = $('#song' + songId);
|
||||
@ -614,7 +602,7 @@ class EncyclopaediaMetallum(Page):
|
||||
|
||||
endpoint = "https://www.metal-archives.com/release/ajax-view-lyrics/id/{id}".format(id=song_id)
|
||||
|
||||
r = cls.CONNECTION.get(endpoint)
|
||||
r = self.connection.get(endpoint)
|
||||
if r is None:
|
||||
return None
|
||||
|
||||
@ -622,22 +610,27 @@ class EncyclopaediaMetallum(Page):
|
||||
text=FormattedText(html=r.text),
|
||||
language=pycountry.languages.get(alpha_2="en"),
|
||||
source_list=[
|
||||
Source(cls.SOURCE_TYPE, endpoint)
|
||||
Source(self.SOURCE_TYPE, endpoint)
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _fetch_song_from_source(cls, source: Source, stop_at_level: int = 1) -> Song:
|
||||
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
|
||||
song_id = source.url
|
||||
|
||||
return Song(
|
||||
lyrics_list=[
|
||||
cls._fetch_lyrics(song_id=song_id)
|
||||
self._fetch_lyrics(song_id=song_id)
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _get_type_of_url(cls, url: str) -> Optional[Union[Type[Song], Type[Album], Type[Artist], Type[Label]]]:
|
||||
def get_source_type(self, source: Source):
|
||||
if self.SOURCE_TYPE != source.page_enum:
|
||||
return None
|
||||
|
||||
url = source.url
|
||||
if url is None:
|
||||
return None
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
path: List[str] = parsed_url.path.split("/")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user