From 769d27dc5c6174c1c9a6d676cb4a56977fe5b9bf Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 21 May 2024 16:43:52 +0200 Subject: [PATCH] feat: album details --- development/actual_donwload.py | 1 + music_kraken/pages/genius.py | 134 +++++---------------------------- 2 files changed, 20 insertions(+), 115 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index ad4e316..1ee7563 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -8,6 +8,7 @@ if __name__ == "__main__": commands = [ "s: #a Crystal F", "10", + "1", ] diff --git a/music_kraken/pages/genius.py b/music_kraken/pages/genius.py index 15f1ac2..e17aea9 100644 --- a/music_kraken/pages/genius.py +++ b/music_kraken/pages/genius.py @@ -113,7 +113,7 @@ class Genius(Page): additional_sources.append(Source(ALL_SOURCE_TYPES.TWITTER, f"https://x.com/{data['twitter_name']}/")) return Artist( - name=data.get("name"), + name=data.get("name").strip(), source_list=[source], artwork=artwork, notes=notes, @@ -123,8 +123,12 @@ class Genius(Page): self.add_to_artwork(artwork, data.get("cover_art_thumbnail_url")) self.add_to_artwork(artwork, data.get("cover_art_url")) + for cover_art in data.get("cover_arts", []): + self.add_to_artwork(artwork, cover_art.get("image_url")) + self.add_to_artwork(artwork, cover_art.get("thumbnail_image_url")) + return Album( - title=data.get("name"), + title=data.get("name").strip(), source_list=[source], artist_list=[self.parse_api_object(data.get("artist"))], artwork=artwork, @@ -223,130 +227,30 @@ class Genius(Page): return artist - def _parse_track_element(self, track: dict, artwork: Artwork) -> Optional[Song]: - lyrics_list: List[Lyrics] = [] - - _lyrics: Optional[str] = track.get("item", {}).get("recordingOf", {}).get("lyrics", {}).get("text") - if _lyrics is not None: - lyrics_list.append(Lyrics(text=FormattedText(plain=_lyrics))) - - return Song( - title=clean_song_title(track["item"]["name"]), - source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])], - tracksort=int(track["position"]), - artwork=artwork, - ) - def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: - album = Album() + album: Album = Album() + # https://genius.com/api/artists/24527/albums?page=1 - r = self.connection.get(source.url, name=f"album_{urlparse(source.url).netloc.split('.')[0]}_{urlparse(source.url).path.replace('/', '').replace('album', '')}") + r = self.connection.get(source.url, name=source.url) if r is None: return album - soup = self.get_soup_from_response(r) - data_container = soup.find("script", {"type": "application/ld+json"}) - - if DEBUG: - dump_to_file("album_data.json", data_container.text, is_json=True, exit_after_dump=False) - - data = json.loads(data_container.text) - artist_data = data["byArtist"] - - artist_source_list = [] - if "@id" in artist_data: - artist_source_list = [Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] - album = Album( - title=data["name"].strip(), - source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], - date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), - artist_list=[Artist( - name=artist_data["name"].strip(), - source_list=artist_source_list - )] - ) - - artwork: Artwork = Artwork() - - def _get_artwork_url(_data: dict) -> Optional[str]: - if "image" in _data: - return _data["image"] - for _property in _data.get("additionalProperty", []): - if _property.get("name") == "art_id": - return f"https://f4.bcbits.com/img/a{_property.get('value')}_2.jpg" - - _artwork_url = _get_artwork_url(data) - if _artwork_url is not None: - artwork.append(url=_artwork_url, width=350, height=350) - else: - for album_release in data.get("albumRelease", []): - _artwork_url = _get_artwork_url(album_release) - if _artwork_url is not None: - artwork.append(url=_artwork_url, width=350, height=350) - break - - - for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): - if DEBUG: - dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False) - - try: - album.song_collection.append(self._parse_track_element(track_json, artwork=artwork)) - except KeyError: - continue + # find the content attribute in the meta tag which is contained in the head + data_container = soup.find("meta", {"itemprop": "page_data"}) + if data_container is not None: + content = data_container["content"] + dump_to_file("genius_itemprop_album.json", content, is_json=True, exit_after_dump=False) + data = json.loads(content) + album = self.parse_api_object(data.get("album", {})) + album.source_collection.append(source) + return album - def _fetch_lyrics(self, soup: BeautifulSoup) -> List[Lyrics]: - track_lyrics = soup.find("div", {"class": "lyricsText"}) - if track_lyrics: - return [Lyrics(text=FormattedText(html=track_lyrics.prettify()))] - - return [] - def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - r = self.connection.get(source.url, name=f"song_{urlparse(source.url).netloc.split('.')[0]}_{urlparse(source.url).path.replace('/', '').replace('track', '')}") - if r is None: - return Song() - - soup = self.get_soup_from_response(r) - - data_container = soup.find("script", {"type": "application/ld+json"}) - other_data = {} - - other_data_list = soup.select("script[data-tralbum]") - if len(other_data_list) > 0: - other_data = json.loads(other_data_list[0]["data-tralbum"]) - - dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False) - dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False) - dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False) - - data = json.loads(data_container.text) - album_data = data["inAlbum"] - artist_data = data["byArtist"] - - mp3_url = None - for key, value in other_data.get("trackinfo", [{}])[0].get("file", {"": None}).items(): - mp3_url = value - - song = Song( - title=clean_song_title(data["name"], artist_name=artist_data["name"]), - source_list=[source, Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)], - album_list=[Album( - title=album_data["name"].strip(), - date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), - source_list=[Source(self.SOURCE_TYPE, album_data["@id"])] - )], - artist_list=[Artist( - name=artist_data["name"].strip(), - source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] - )], - lyrics_list=self._fetch_lyrics(soup=soup) - ) - + song = Song() return song def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: