diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 4a943b3..cd29db3 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -29,6 +29,7 @@ from ..utils.support_classes import Query, DownloadResult, DefaultTarget INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song] INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]] + def _clean_music_object(music_object: INDEPENDENT_DB_OBJECTS, collections: Dict[INDEPENDENT_DB_TYPES, Collection]): if type(music_object) == Label: return _clean_label(label=music_object, collections=collections) @@ -104,9 +105,11 @@ class Page(threading.Thread): def run(self) -> None: pass + + def get_source_type(self, source: Source) -> Optional[INDEPENDENT_DB_TYPES]: + return None - @classmethod - def get_soup_from_response(cls, r: requests.Response) -> BeautifulSoup: + def get_soup_from_response(self, r: requests.Response) -> BeautifulSoup: return BeautifulSoup(r.content, "html.parser") # to search stuff @@ -168,7 +171,7 @@ class Page(threading.Thread): new_music_object: DatabaseObject = type(music_object)() # only certain database objects, have a source list - if isinstance(music_object, INDEPENDENT_DB_TYPES): + if isinstance(music_object, INDEPENDENT_DB_OBJECTS): source: Source for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): new_music_object.merge( @@ -181,34 +184,43 @@ class Page(threading.Thread): return music_object - def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> DatabaseObject: - obj_type = self._get_type_of_url(source.url) + def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]: + obj_type = self.get_source_type(source) + if obj_type is None: return None + if enforce_type != obj_type and enforce_type is not None: + self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}") + return None + + music_object: DatabaseObject = None + + fetch_map = { + Song: self.fetch_song, + Album: self.fetch_album, + Artist: self.fetch_artist, + Label: self.fetch_label + } + + if obj_type in fetch_map: + music_object = fetch_map[obj_type](source, stop_at_level) - music_object = cls._fetch_object_from_source(source=source, obj_type=obj_type, stop_at_level=stop_at_level) - - if post_process: + if post_process and music_object is not None: return post_process_object(music_object) return music_object + + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: + return Song() - @classmethod - def _fetch_object_from_source(cls, source: Source, - obj_type: Union[Type[Song], Type[Album], Type[Artist], Type[Label]], - stop_at_level: int = 1) -> Union[Song, Album, Artist, Label]: - if obj_type == Artist: - return cls._fetch_artist_from_source(source=source, stop_at_level=stop_at_level) + def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: + return Album() - if obj_type == Song: - return cls._fetch_song_from_source(source=source, stop_at_level=stop_at_level) - - if obj_type == Album: - return cls._fetch_album_from_source(source=source, stop_at_level=stop_at_level) - - if obj_type == Label: - return cls._fetch_label_from_source(source=source, stop_at_level=stop_at_level) + def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: + return Artist() + def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: + return Label() @classmethod def download( @@ -461,26 +473,6 @@ class Page(threading.Thread): return r - @classmethod - def _fetch_song_from_source(cls, source: Source, stop_at_level: int = 1) -> Song: - return Song() - - @classmethod - def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album: - return Album() - - @classmethod - def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist: - return Artist() - - @classmethod - def _fetch_label_from_source(cls, source: Source, stop_at_level: int = 1) -> Label: - return Label() - - @classmethod - def _get_type_of_url(cls, url: str) -> Optional[Union[Type[Song], Type[Album], Type[Artist], Type[Label]]]: - return None - @classmethod def _download_song_to_targets(cls, source: Source, target: Target, desc: str = None) -> DownloadResult: return DownloadResult() diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 8a57fae..42e0e5f 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -218,16 +218,14 @@ class EncyclopaediaMetallum(Page): for raw_artist in r.json()['aaData'] ] - - @classmethod - def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]: + def _fetch_artist_discography(self, ma_artist_id: str) -> List[Album]: discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all" # make the request - r = cls.CONNECTION.get(discography_url.format(ma_artist_id)) + r = self.connection.get(discography_url.format(ma_artist_id)) if r is None: return [] - soup = cls.get_soup_from_response(r) + soup = self.get_soup_from_response(r) discography = [] @@ -251,21 +249,20 @@ class EncyclopaediaMetallum(Page): Album( title=album_name, date=date_obj, - album_type=cls.ALBUM_TYPE_MAP[raw_album_type], - source_list=[Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)] + album_type=ALBUM_TYPE_MAP[raw_album_type], + source_list=[Source(self.SOURCE_TYPE, album_url)] ) ) return discography - @classmethod - def _fetch_artist_sources(cls, ma_artist_id: str) -> List[Source]: + def _fetch_artist_sources(self, ma_artist_id: str) -> List[Source]: sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}" - r = cls.CONNECTION.get(sources_url.format(ma_artist_id)) + r = self.connection.get(sources_url.format(ma_artist_id)) if r is None: return [] - soup = cls.get_soup_from_response(r) + soup = self.get_soup_from_response(r) if soup.find("span", {"id": "noLinks"}) is not None: return [] @@ -289,12 +286,11 @@ class EncyclopaediaMetallum(Page): if url is None: continue - source_list.append(Source.match_url(url, referer_page=cls.SOURCE_TYPE)) + source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE)) return source_list - @classmethod - def _parse_artist_attributes(cls, artist_soup: BeautifulSoup) -> Artist: + def _parse_artist_attributes(self, artist_soup: BeautifulSoup) -> Artist: name: str = None country: pycountry.Countrie = None formed_in_year: int = None @@ -311,7 +307,7 @@ class EncyclopaediaMetallum(Page): if title_text.count(bad_name_substring) == 1: name = title_text.replace(bad_name_substring, "") else: - cls.LOGGER.debug(f"the title of the page is \"{title_text}\"") + self.LOGGER.debug(f"the title of the page is \"{title_text}\"") """ TODO @@ -341,7 +337,7 @@ class EncyclopaediaMetallum(Page): href = anchor.get("href") if href is not None: - source_list.append(Source(cls.SOURCE_TYPE, href)) + source_list.append(Source(self.SOURCE_TYPE, href)) name = anchor.get_text(strip=True) @@ -400,35 +396,32 @@ class EncyclopaediaMetallum(Page): Label( name=label_name, source_list=[ - Source(cls.SOURCE_TYPE, label_url) + Source(self.SOURCE_TYPE, label_url) ] ) ], source_list=source_list ) - @classmethod - def _fetch_artist_attributes(cls, url: str) -> Artist: - r = cls.CONNECTION.get(url) + def _fetch_artist_attributes(self, url: str) -> Artist: + r = self.connection.get(url) if r is None: return Artist() - soup: BeautifulSoup = cls.get_soup_from_response(r) + soup: BeautifulSoup = self.get_soup_from_response(r) - return cls._parse_artist_attributes(artist_soup=soup) + return self._parse_artist_attributes(artist_soup=soup) - @classmethod - def _fetch_band_notes(cls, ma_artist_id: str) -> Optional[FormattedText]: + def _fetch_band_notes(self, ma_artist_id: str) -> Optional[FormattedText]: endpoint = "https://www.metal-archives.com/band/read-more/id/{}" # make the request - r = cls.CONNECTION.get(endpoint.format(ma_artist_id)) + r = self.connection.get(endpoint.format(ma_artist_id)) if r is None: return FormattedText() return FormattedText(html=r.text) - @classmethod - def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist: + def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: """ What it could fetch, and what is implemented: @@ -440,28 +433,27 @@ class EncyclopaediaMetallum(Page): [x] band notes: https://www.metal-archives.com/band/read-more/id/3540372489 """ - artist = cls._fetch_artist_attributes(source.url) + artist = self._fetch_artist_attributes(source.url) artist_id = source.url.split("/")[-1] - artist_sources = cls._fetch_artist_sources(artist_id) + artist_sources = self._fetch_artist_sources(artist_id) artist.source_collection.extend(artist_sources) - band_notes = cls._fetch_band_notes(artist_id) + band_notes = self._fetch_band_notes(artist_id) if band_notes is not None: artist.notes = band_notes - discography: List[Album] = cls._fetch_artist_discography(artist_id) + discography: List[Album] = self._fetch_artist_discography(artist_id) if stop_at_level > 1: for album in discography: - for source in album.source_collection.get_sources_from_page(cls.SOURCE_TYPE): - album.merge(cls._fetch_album_from_source(source, stop_at_level=stop_at_level-1)) + for source in album.source_collection.get_sources_from_page(self.SOURCE_TYPE): + album.merge(self._fetch_album_from_source(source, stop_at_level=stop_at_level-1)) artist.main_album_collection.extend(discography) return artist - @classmethod - def _parse_album_track_row(cls, track_row: BeautifulSoup) -> Song: + def _parse_album_track_row(self, track_row: BeautifulSoup) -> Song: """ 1. # id and tracksort @@ -482,7 +474,7 @@ class EncyclopaediaMetallum(Page): track_id = track_sort_soup.find("a").get("name").strip() if track_row.find("a", {"href": f"#{track_id}"}) is not None: - source_list.append(Source(cls.SOURCE_TYPE, track_id)) + source_list.append(Source(self.SOURCE_TYPE, track_id)) title = row_list[1].text.strip() @@ -500,9 +492,7 @@ class EncyclopaediaMetallum(Page): source_list=source_list ) - - @classmethod - def _parse_album_attributes(cls, album_soup: BeautifulSoup, stop_at_level: int = 1) -> Album: + def _parse_album_attributes(self, album_soup: BeautifulSoup, stop_at_level: int = 1) -> Album: tracklist: List[Song] = [] artist_list = [] album_name: str = None @@ -522,12 +512,12 @@ class EncyclopaediaMetallum(Page): href = anchor.get("href") if href is not None: - source_list.append(Source(cls.SOURCE_TYPE, href.strip())) + source_list.append(Source(self.SOURCE_TYPE, href.strip())) album_name = anchor.get_text(strip=True) elif len(album_soup_list) > 1: - cls.LOGGER.debug("there are more than 1 album soups") + self.LOGGER.debug("there are more than 1 album soups") artist_soup_list = album_info_soup.find_all("h2", {"class": "band_name"}) @@ -537,7 +527,7 @@ class EncyclopaediaMetallum(Page): href = anchor.get("href") if href is not None: - artist_sources.append(Source(cls.SOURCE_TYPE, href.strip())) + artist_sources.append(Source(self.SOURCE_TYPE, href.strip())) artist_name = anchor.get_text(strip=True) @@ -547,13 +537,13 @@ class EncyclopaediaMetallum(Page): )) elif len(artist_soup_list) > 1: - cls.LOGGER.debug("there are more than 1 artist soups") + self.LOGGER.debug("there are more than 1 artist soups") _parse_album_info(album_info_soup=album_soup.find(id="album_info")) tracklist_soup = album_soup.find("table", {"class": "table_lyrics"}).find("tbody") for track_soup in tracklist_soup.find_all("tr", {"class": ["even", "odd"]}): - tracklist.append(cls._parse_album_track_row(track_row=track_soup)) + tracklist.append(self._parse_album_track_row(track_row=track_soup)) return Album( title=album_name, @@ -562,8 +552,7 @@ class EncyclopaediaMetallum(Page): song_list=tracklist ) - @classmethod - def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album: + def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: """ I am preeeety sure I can get way more data than... nothing from there @@ -574,23 +563,22 @@ class EncyclopaediaMetallum(Page): # Song: + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: song_id = source.url return Song( lyrics_list=[ - cls._fetch_lyrics(song_id=song_id) + self._fetch_lyrics(song_id=song_id) ] ) - @classmethod - def _get_type_of_url(cls, url: str) -> Optional[Union[Type[Song], Type[Album], Type[Artist], Type[Label]]]: + def get_source_type(self, source: Source): + if self.SOURCE_TYPE != source.page_enum: + return None + + url = source.url + if url is None: + return None + parsed_url = urlparse(url) path: List[str] = parsed_url.path.split("/")