From e4fd9faf12a0bc3a9506106c3505a59bab57292d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 21 May 2024 15:57:09 +0200 Subject: [PATCH] feat: detecting url type --- development/actual_donwload.py | 5 ++- music_kraken/pages/genius.py | 64 ++++------------------------------ 2 files changed, 8 insertions(+), 61 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index d91876e..ad4e316 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a I'm in a coffin", - "0", - "d: 0", + "s: #a Crystal F", + "10", ] diff --git a/music_kraken/pages/genius.py b/music_kraken/pages/genius.py index 1ff2eda..e6fa86f 100644 --- a/music_kraken/pages/genius.py +++ b/music_kraken/pages/genius.py @@ -48,8 +48,13 @@ class Genius(Page): def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: path = source.parsed_url.path.replace("/", "") + + if path.startswith("artists"): + return Artist + if path.startswith("albums"): + return Album - return super().get_source_type(source) + return Song def add_to_artwork(self, artwork: Artwork, url: str): if url is None: @@ -158,63 +163,6 @@ class Genius(Page): return results - def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: - return Label() - - def _parse_artist_details(self, soup: BeautifulSoup) -> Artist: - name: str = None - source_list: List[Source] = [] - contact_list: List[Contact] = [] - - band_name_location: BeautifulSoup = soup.find("p", {"id": "band-name-location"}) - if band_name_location is not None: - title_span = band_name_location.find("span", {"class": "title"}) - if title_span is not None: - name = title_span.text.strip() - - link_container: BeautifulSoup = soup.find("ol", {"id": "band-links"}) - if link_container is not None: - li: BeautifulSoup - for li in link_container.find_all("a"): - if li is None and li['href'] is not None: - continue - - source_list.append(Source.match_url(_parse_artist_url(li['href']), referrer_page=self.SOURCE_TYPE)) - - return Artist( - name=name, - source_list=source_list - ) - - def _parse_album(self, soup: BeautifulSoup, initial_source: Source) -> List[Album]: - title = None - source_list: List[Source] = [] - - a = soup.find("a") - if a is not None and a["href"] is not None: - source_list.append(Source(self.SOURCE_TYPE, _get_host(initial_source) + a["href"])) - - title_p = soup.find("p", {"class": "title"}) - if title_p is not None: - title = title_p.text.strip() - - return Album(title=title, source_list=source_list) - - def _parse_artist_data_blob(self, data_blob: dict, artist_url: str): - parsed_artist_url = urlparse(artist_url) - album_list: List[Album] = [] - - for album_json in data_blob.get("buyfulldisco", {}).get("tralbums", []): - album_list.append(Album( - title=album_json["title"].strip(), - source_list=[Source( - self.SOURCE_TYPE, - urlunparse((parsed_artist_url.scheme, parsed_artist_url.netloc, album_json["page_url"], "", "", "")) - )] - )) - - return album_list - def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: artist = Artist()