Compare commits
	
		
			21 Commits
		
	
	
		
			8cdb5c1f99
			...
			feature/mu
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 265c9f462f | |||
| 780daac0ef | |||
| 465af49057 | |||
| 2aa0f02fa5 | |||
| 7b0b830d64 | |||
| 1ba6c97f5a | |||
| c8cbfc7cb9 | |||
| 344da0a0bf | |||
| 49dc7093c8 | |||
| 90f70638b4 | |||
| 7b4eee858a | |||
| f61b34dd40 | |||
| 688b4fd357 | |||
| 769d27dc5c | |||
| f5d953d9ce | |||
| 46b64b8f8d | |||
| adfce16d2a | |||
| e4fd9faf12 | |||
| f6caee41a8 | |||
| 068c749c38 | |||
| c131924577 | 
							
								
								
									
										1
									
								
								.vscode/settings.json
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.vscode/settings.json
									
									
									
									
										vendored
									
									
								
							| @@ -27,6 +27,7 @@ | |||||||
|         "Gitea", |         "Gitea", | ||||||
|         "iframe", |         "iframe", | ||||||
|         "isrc", |         "isrc", | ||||||
|  |         "itemprop", | ||||||
|         "levenshtein", |         "levenshtein", | ||||||
|         "metallum", |         "metallum", | ||||||
|         "MUSICBRAINZ", |         "MUSICBRAINZ", | ||||||
|   | |||||||
| @@ -6,9 +6,10 @@ logging.getLogger().setLevel(logging.DEBUG) | |||||||
|  |  | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     commands = [ |     commands = [ | ||||||
|         "s: #a I'm in a coffin", |         "s: #a Crystal F", | ||||||
|         "0", |         "10", | ||||||
|         "d: 0", |         "1", | ||||||
|  |         "3", | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|      |      | ||||||
|   | |||||||
| @@ -93,6 +93,10 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): | |||||||
|             # resize the image to the preferred resolution |             # resize the image to the preferred resolution | ||||||
|             img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) |             img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) | ||||||
|  |  | ||||||
|  |             # https://stackoverflow.com/a/59476938/16804841 | ||||||
|  |             if img.mode != 'RGB': | ||||||
|  |                 img = img.convert('RGB') | ||||||
|  |  | ||||||
|             img.save(converted_target.file_path, "JPEG") |             img.save(converted_target.file_path, "JPEG") | ||||||
|  |  | ||||||
|         # https://stackoverflow.com/questions/70228440/mutagen-how-can-i-correctly-embed-album-art-into-mp3-file-so-that-i-can-see-t |         # https://stackoverflow.com/questions/70228440/mutagen-how-can-i-correctly-embed-album-art-into-mp3-file-so-that-i-can-see-t | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ from typing import List, Optional | |||||||
| from functools import lru_cache | from functools import lru_cache | ||||||
| import logging | import logging | ||||||
|  |  | ||||||
|  | from ..utils import output, BColors | ||||||
| from ..utils.config import main_settings | from ..utils.config import main_settings | ||||||
| from ..utils.string_processing import fit_to_file_system | from ..utils.string_processing import fit_to_file_system | ||||||
|  |  | ||||||
| @@ -136,13 +137,13 @@ class Cache: | |||||||
|         ) |         ) | ||||||
|         self._write_attribute(cache_attribute) |         self._write_attribute(cache_attribute) | ||||||
|  |  | ||||||
|         cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True) |         cache_path = fit_to_file_system(Path(module_path, name.replace("/", "_")), hidden_ok=True) | ||||||
|         with cache_path.open("wb") as content_file: |         with cache_path.open("wb") as content_file: | ||||||
|             self.logger.debug(f"writing cache to {cache_path}") |             self.logger.debug(f"writing cache to {cache_path}") | ||||||
|             content_file.write(content) |             content_file.write(content) | ||||||
|  |  | ||||||
|     def get(self, name: str) -> Optional[CacheResult]: |     def get(self, name: str) -> Optional[CacheResult]: | ||||||
|         path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True) |         path = fit_to_file_system(Path(self._dir, self.module, name.replace("/", "_")), hidden_ok=True) | ||||||
|  |  | ||||||
|         if not path.is_file(): |         if not path.is_file(): | ||||||
|             return None |             return None | ||||||
| @@ -165,7 +166,7 @@ class Cache: | |||||||
|             if ca.name == "": |             if ca.name == "": | ||||||
|                 continue |                 continue | ||||||
|  |  | ||||||
|             file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True) |             file = fit_to_file_system(Path(self._dir, ca.module, ca.name.replace("/", "_")), hidden_ok=True) | ||||||
|  |  | ||||||
|             if not ca.is_valid: |             if not ca.is_valid: | ||||||
|                 self.logger.debug(f"deleting cache {ca.id}") |                 self.logger.debug(f"deleting cache {ca.id}") | ||||||
| @@ -204,9 +205,12 @@ class Cache: | |||||||
|         for path in self._dir.iterdir(): |         for path in self._dir.iterdir(): | ||||||
|             if path.is_dir(): |             if path.is_dir(): | ||||||
|                 for file in path.iterdir(): |                 for file in path.iterdir(): | ||||||
|  |                     output(f"Deleting file {file}", color=BColors.GREY) | ||||||
|                     file.unlink() |                     file.unlink() | ||||||
|  |                 output(f"Deleting folder {path}", color=BColors.HEADER) | ||||||
|                 path.rmdir() |                 path.rmdir() | ||||||
|             else: |             else: | ||||||
|  |                 output(f"Deleting folder {path}", color=BColors.HEADER) | ||||||
|                 path.unlink() |                 path.unlink() | ||||||
|  |  | ||||||
|         self.cached_attributes.clear() |         self.cached_attributes.clear() | ||||||
|   | |||||||
| @@ -30,14 +30,16 @@ from ..utils.exception import MKMissingNameException | |||||||
| from ..utils.exception.download import UrlNotFoundException | from ..utils.exception.download import UrlNotFoundException | ||||||
| from ..utils.shared import DEBUG_PAGES | from ..utils.shared import DEBUG_PAGES | ||||||
|  |  | ||||||
| from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, INDEPENDENT_DB_OBJECTS | from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Musicbrainz, Genius, INDEPENDENT_DB_OBJECTS | ||||||
|  |  | ||||||
|  |  | ||||||
| ALL_PAGES: Set[Type[Page]] = { | ALL_PAGES: Set[Type[Page]] = { | ||||||
|     # EncyclopaediaMetallum, |     # EncyclopaediaMetallum, | ||||||
|  |     Genius, | ||||||
|     Musify, |     Musify, | ||||||
|     YoutubeMusic, |     YoutubeMusic, | ||||||
|     Bandcamp |     Bandcamp, | ||||||
|  |     Musicbrainz | ||||||
| } | } | ||||||
|  |  | ||||||
| if youtube_settings["use_youtube_alongside_youtube_music"]: | if youtube_settings["use_youtube_alongside_youtube_music"]: | ||||||
|   | |||||||
| @@ -59,4 +59,6 @@ class Artwork: | |||||||
|                 self._variant_mapping[key] = value |                 self._variant_mapping[key] = value | ||||||
|  |  | ||||||
|     def __eq__(self, other: Artwork) -> bool: |     def __eq__(self, other: Artwork) -> bool: | ||||||
|  |         if not isinstance(other, Artwork): | ||||||
|  |             return False | ||||||
|         return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) |         return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) | ||||||
|   | |||||||
| @@ -38,11 +38,19 @@ class FormattedText: | |||||||
|     def markdown(self) -> str: |     def markdown(self) -> str: | ||||||
|         return md(self.html).strip() |         return md(self.html).strip() | ||||||
|      |      | ||||||
|  |     @markdown.setter | ||||||
|  |     def markdown(self, value: str) -> None: | ||||||
|  |         self.html = mistune.markdown(value) | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def plain(self) -> str: |     def plain(self) -> str: | ||||||
|         md = self.markdown |         md = self.markdown | ||||||
|         return md.replace("\n\n", "\n") |         return md.replace("\n\n", "\n") | ||||||
|      |      | ||||||
|  |     @plain.setter | ||||||
|  |     def plain(self, value: str) -> None: | ||||||
|  |         self.html = mistune.markdown(plain_to_markdown(value)) | ||||||
|  |  | ||||||
|     def __str__(self) -> str: |     def __str__(self) -> str: | ||||||
|         return self.markdown |         return self.markdown | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,7 +1,9 @@ | |||||||
| from .encyclopaedia_metallum import EncyclopaediaMetallum | from .encyclopaedia_metallum import EncyclopaediaMetallum | ||||||
| from .musify import Musify | from .musify import Musify | ||||||
|  | from .musicbrainz import Musicbrainz | ||||||
| from .youtube import YouTube | from .youtube import YouTube | ||||||
| from .youtube_music import YoutubeMusic | from .youtube_music import YoutubeMusic | ||||||
| from .bandcamp import Bandcamp | from .bandcamp import Bandcamp | ||||||
|  | from .genius import Genius | ||||||
|  |  | ||||||
| from .abstract import Page, INDEPENDENT_DB_OBJECTS | from .abstract import Page, INDEPENDENT_DB_OBJECTS | ||||||
|   | |||||||
							
								
								
									
										297
									
								
								music_kraken/pages/genius.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										297
									
								
								music_kraken/pages/genius.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,297 @@ | |||||||
|  | from typing import List, Optional, Type | ||||||
|  | from urllib.parse import urlparse, urlunparse, urlencode | ||||||
|  | import json | ||||||
|  | from enum import Enum | ||||||
|  | from bs4 import BeautifulSoup | ||||||
|  | import pycountry | ||||||
|  |  | ||||||
|  | from ..objects import Source, DatabaseObject | ||||||
|  | from .abstract import Page | ||||||
|  | from ..objects import ( | ||||||
|  |     Artist, | ||||||
|  |     Source, | ||||||
|  |     SourceType, | ||||||
|  |     Song, | ||||||
|  |     Album, | ||||||
|  |     Label, | ||||||
|  |     Target, | ||||||
|  |     Contact, | ||||||
|  |     ID3Timestamp, | ||||||
|  |     Lyrics, | ||||||
|  |     FormattedText, | ||||||
|  |     Artwork, | ||||||
|  | ) | ||||||
|  | from ..connection import Connection | ||||||
|  | from ..utils import dump_to_file, traverse_json_path | ||||||
|  | from ..utils.enums import SourceType, ALL_SOURCE_TYPES | ||||||
|  | from ..utils.support_classes.download_result import DownloadResult | ||||||
|  | from ..utils.string_processing import clean_song_title | ||||||
|  | from ..utils.config import main_settings, logging_settings | ||||||
|  | from ..utils.shared import DEBUG | ||||||
|  |  | ||||||
|  | if DEBUG: | ||||||
|  |     from ..utils import dump_to_file | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Genius(Page): | ||||||
|  |     SOURCE_TYPE = ALL_SOURCE_TYPES.GENIUS | ||||||
|  |     HOST = "genius.com" | ||||||
|  |  | ||||||
|  |     def __init__(self, *args, **kwargs): | ||||||
|  |         self.connection: Connection = Connection( | ||||||
|  |             host="https://genius.com/", | ||||||
|  |             logger=self.LOGGER, | ||||||
|  |             module="genius", | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         super().__init__(*args, **kwargs) | ||||||
|  |  | ||||||
|  |     def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: | ||||||
|  |         path = source.parsed_url.path.replace("/", "") | ||||||
|  |          | ||||||
|  |         if path.startswith("artists"): | ||||||
|  |             return Artist | ||||||
|  |         if path.startswith("albums"): | ||||||
|  |             return Album | ||||||
|  |  | ||||||
|  |         return Song | ||||||
|  |  | ||||||
|  |     def add_to_artwork(self, artwork: Artwork, url: str): | ||||||
|  |         if url is None: | ||||||
|  |             return | ||||||
|  |          | ||||||
|  |         url_frags = url.split(".") | ||||||
|  |         if len(url_frags) < 2: | ||||||
|  |             artwork.append(url=url) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         dimensions = url_frags[-2].split("x") | ||||||
|  |         if len(dimensions) < 2: | ||||||
|  |             artwork.append(url=url) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         if len(dimensions) == 3: | ||||||
|  |             dimensions = dimensions[:-1] | ||||||
|  |          | ||||||
|  |         try: | ||||||
|  |             artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1])) | ||||||
|  |         except ValueError: | ||||||
|  |             artwork.append(url=url) | ||||||
|  |  | ||||||
|  |     def parse_api_object(self, data: dict) -> Optional[DatabaseObject]: | ||||||
|  |         if data is None: | ||||||
|  |             return None | ||||||
|  |         object_type = data.get("_type") | ||||||
|  |  | ||||||
|  |         artwork = Artwork() | ||||||
|  |         self.add_to_artwork(artwork, data.get("header_image_url")) | ||||||
|  |         self.add_to_artwork(artwork, data.get("image_url")) | ||||||
|  |          | ||||||
|  |         additional_sources: List[Source] = [] | ||||||
|  |         source: Source = Source(self.SOURCE_TYPE, data.get("url"), additional_data={ | ||||||
|  |             "id": data.get("id"), | ||||||
|  |             "slug": data.get("slug"), | ||||||
|  |             "api_path": data.get("api_path"), | ||||||
|  |         }) | ||||||
|  |  | ||||||
|  |         notes = FormattedText() | ||||||
|  |         description = data.get("description") or {} | ||||||
|  |         if "html" in description: | ||||||
|  |             notes.html = description["html"] | ||||||
|  |         elif "markdown" in description: | ||||||
|  |             notes.markdown = description["markdown"] | ||||||
|  |         elif "description_preview" in data: | ||||||
|  |             notes.plaintext = data["description_preview"] | ||||||
|  |  | ||||||
|  |         if source.url is None: | ||||||
|  |             return None | ||||||
|  |  | ||||||
|  |         if object_type == "artist": | ||||||
|  |             if data.get("instagram_name") is not None: | ||||||
|  |                 additional_sources.append(Source(ALL_SOURCE_TYPES.INSTAGRAM, f"https://www.instagram.com/{data['instagram_name']}/")) | ||||||
|  |             if data.get("facebook_name") is not None: | ||||||
|  |                 additional_sources.append(Source(ALL_SOURCE_TYPES.FACEBOOK, f"https://www.facebook.com/{data['facebook_name']}/")) | ||||||
|  |             if data.get("twitter_name") is not None: | ||||||
|  |                 additional_sources.append(Source(ALL_SOURCE_TYPES.TWITTER, f"https://x.com/{data['twitter_name']}/")) | ||||||
|  |  | ||||||
|  |             return Artist( | ||||||
|  |                 name=data["name"].strip() if data.get("name") is not None else None, | ||||||
|  |                 source_list=[source], | ||||||
|  |                 artwork=artwork, | ||||||
|  |                 notes=notes, | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |         if object_type == "album": | ||||||
|  |             self.add_to_artwork(artwork, data.get("cover_art_thumbnail_url")) | ||||||
|  |             self.add_to_artwork(artwork, data.get("cover_art_url")) | ||||||
|  |  | ||||||
|  |             for cover_art in data.get("cover_arts", []): | ||||||
|  |                 self.add_to_artwork(artwork, cover_art.get("image_url")) | ||||||
|  |                 self.add_to_artwork(artwork, cover_art.get("thumbnail_image_url")) | ||||||
|  |  | ||||||
|  |             return Album( | ||||||
|  |                 title=data.get("name").strip(), | ||||||
|  |                 source_list=[source], | ||||||
|  |                 artist_list=[self.parse_api_object(data.get("artist"))], | ||||||
|  |                 artwork=artwork, | ||||||
|  |                 date=ID3Timestamp(**(data.get("release_date_components") or {})), | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |         if object_type == "song": | ||||||
|  |             self.add_to_artwork(artwork, data.get("song_art_image_thumbnail_url")) | ||||||
|  |             self.add_to_artwork(artwork, data.get("song_art_image_url")) | ||||||
|  |  | ||||||
|  |             main_artist_list = [] | ||||||
|  |             featured_artist_list = [] | ||||||
|  |  | ||||||
|  |             _artist_name = None | ||||||
|  |             primary_artist = self.parse_api_object(data.get("primary_artist")) | ||||||
|  |             if primary_artist is not None: | ||||||
|  |                 _artist_name = primary_artist.name | ||||||
|  |                 main_artist_list.append(primary_artist) | ||||||
|  |             for feature_artist in (*(data.get("featured_artists") or []), *(data.get("producer_artists") or []), *(data.get("writer_artists") or [])): | ||||||
|  |                 artist = self.parse_api_object(feature_artist) | ||||||
|  |                 if artist is not None: | ||||||
|  |                     featured_artist_list.append(artist) | ||||||
|  |  | ||||||
|  |             return Song( | ||||||
|  |                 title=clean_song_title(data.get("title"), artist_name=_artist_name), | ||||||
|  |                 source_list=[source], | ||||||
|  |                 artwork=artwork, | ||||||
|  |                 feature_artist_list=featured_artist_list, | ||||||
|  |                 artist_list=main_artist_list, | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |         return None | ||||||
|  |  | ||||||
|  |     def general_search(self, search_query: str, **kwargs) -> List[DatabaseObject]: | ||||||
|  |         results = [] | ||||||
|  |  | ||||||
|  |         search_params = { | ||||||
|  |             "q": search_query, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         r = self.connection.get("https://genius.com/api/search/multi?" + urlencode(search_params), name=f"search_{search_query}") | ||||||
|  |         if r is None: | ||||||
|  |             return results | ||||||
|  |  | ||||||
|  |         dump_to_file("search_genius.json", r.text, is_json=True, exit_after_dump=False) | ||||||
|  |         data = r.json() | ||||||
|  |  | ||||||
|  |         for elements in traverse_json_path(data, "response.sections", default=[]): | ||||||
|  |             hits = elements.get("hits", []) | ||||||
|  |             for hit in hits: | ||||||
|  |                 parsed = self.parse_api_object(hit.get("result")) | ||||||
|  |                 if parsed is not None: | ||||||
|  |                     results.append(parsed) | ||||||
|  |  | ||||||
|  |         return results | ||||||
|  |  | ||||||
|  |     def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: | ||||||
|  |         artist: Artist = Artist() | ||||||
|  |         # https://genius.com/api/artists/24527/albums?page=1 | ||||||
|  |  | ||||||
|  |         r = self.connection.get(source.url, name=source.url) | ||||||
|  |         if r is None: | ||||||
|  |             return artist | ||||||
|  |         soup = self.get_soup_from_response(r) | ||||||
|  |  | ||||||
|  |         # find the content attribute in the meta tag which is contained in the head | ||||||
|  |         data_container = soup.find("meta", {"itemprop": "page_data"}) | ||||||
|  |         if data_container is not None: | ||||||
|  |             content = data_container["content"] | ||||||
|  |             dump_to_file("genius_itemprop_artist.json", content, is_json=True, exit_after_dump=False) | ||||||
|  |             data = json.loads(content) | ||||||
|  |  | ||||||
|  |             artist = self.parse_api_object(data.get("artist")) | ||||||
|  |              | ||||||
|  |             for e in (data.get("artist_albums") or []): | ||||||
|  |                 r = self.parse_api_object(e) | ||||||
|  |                 if not isinstance(r, Album): | ||||||
|  |                     continue | ||||||
|  |  | ||||||
|  |                 artist.album_collection.append(r) | ||||||
|  |              | ||||||
|  |             for e in (data.get("artist_songs") or []): | ||||||
|  |                 r = self.parse_api_object(e) | ||||||
|  |                 if not isinstance(r, Song): | ||||||
|  |                     continue | ||||||
|  |  | ||||||
|  |                 """ | ||||||
|  |                 TODO | ||||||
|  |                 fetch the album for these songs, because the api doesn't  | ||||||
|  |                 return them | ||||||
|  |                 """ | ||||||
|  |  | ||||||
|  |                 artist.album_collection.extend(r.album_collection) | ||||||
|  |  | ||||||
|  |         artist.source_collection.append(source) | ||||||
|  |  | ||||||
|  |         return artist | ||||||
|  |  | ||||||
|  |     def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: | ||||||
|  |         album: Album = Album() | ||||||
|  |         # https://genius.com/api/artists/24527/albums?page=1 | ||||||
|  |  | ||||||
|  |         r = self.connection.get(source.url, name=source.url) | ||||||
|  |         if r is None: | ||||||
|  |             return album | ||||||
|  |         soup = self.get_soup_from_response(r) | ||||||
|  |  | ||||||
|  |         # find the content attribute in the meta tag which is contained in the head | ||||||
|  |         data_container = soup.find("meta", {"itemprop": "page_data"}) | ||||||
|  |         if data_container is not None: | ||||||
|  |             content = data_container["content"] | ||||||
|  |             dump_to_file("genius_itemprop_album.json", content, is_json=True, exit_after_dump=False) | ||||||
|  |             data = json.loads(content) | ||||||
|  |  | ||||||
|  |             album = self.parse_api_object(data.get("album")) | ||||||
|  |  | ||||||
|  |             for e in data.get("album_appearances", []): | ||||||
|  |                 r = self.parse_api_object(e.get("song")) | ||||||
|  |                 if not isinstance(r, Song): | ||||||
|  |                     continue | ||||||
|  |  | ||||||
|  |                 album.song_collection.append(r) | ||||||
|  |  | ||||||
|  |         album.source_collection.append(source) | ||||||
|  |          | ||||||
|  |         return album | ||||||
|  |  | ||||||
|  |     def get_json_content_from_response(self, response, start: str, end: str) -> Optional[str]: | ||||||
|  |         content = response.text | ||||||
|  |         start_index = content.find(start) | ||||||
|  |         if start_index < 0: | ||||||
|  |             return None | ||||||
|  |         start_index += len(start) | ||||||
|  |         end_index = content.find(end, start_index) | ||||||
|  |         if end_index < 0: | ||||||
|  |             return None | ||||||
|  |         return content[start_index:end_index] | ||||||
|  |  | ||||||
|  |     def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: | ||||||
|  |         song: Song = Song() | ||||||
|  |  | ||||||
|  |         r = self.connection.get(source.url, name=source.url) | ||||||
|  |         if r is None: | ||||||
|  |             return song | ||||||
|  |  | ||||||
|  |         # get the contents that are between `JSON.parse('` and `');` | ||||||
|  |         content = self.get_json_content_from_response(r, start="window.__PRELOADED_STATE__ = JSON.parse('", end="');\n      window.__APP_CONFIG__ = ") | ||||||
|  |         if content is not None: | ||||||
|  |             content = content.replace("\\\\", "\\").replace('\\"', '"').replace("\\'", "'") | ||||||
|  |             data = json.loads(content) | ||||||
|  |  | ||||||
|  |             lyrics_html = traverse_json_path(data, "songPage.lyricsData.body.html", default=None) | ||||||
|  |             if lyrics_html is not None: | ||||||
|  |                 song.lyrics_collection.append(Lyrics(FormattedText(html=lyrics_html))) | ||||||
|  |  | ||||||
|  |             dump_to_file("genius_song_script_json.json", content, is_json=True, exit_after_dump=False) | ||||||
|  |  | ||||||
|  |         soup = self.get_soup_from_response(r) | ||||||
|  |         for lyrics in soup.find_all("div", {"data-lyrics-container": "true"}): | ||||||
|  |             lyrics_object = Lyrics(FormattedText(html=lyrics.prettify())) | ||||||
|  |             song.lyrics_collection.append(lyrics_object) | ||||||
|  |  | ||||||
|  |         song.source_collection.append(source) | ||||||
|  |         return song | ||||||
							
								
								
									
										145
									
								
								music_kraken/pages/musicbrainz.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										145
									
								
								music_kraken/pages/musicbrainz.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,145 @@ | |||||||
|  | from collections import defaultdict | ||||||
|  | from dataclasses import dataclass | ||||||
|  | from enum import Enum | ||||||
|  | from typing import List, Optional, Type, Union, Generator, Dict, Any | ||||||
|  | from urllib.parse import urlparse | ||||||
|  |  | ||||||
|  | import pycountry | ||||||
|  | import musicbrainzngs | ||||||
|  | from bs4 import BeautifulSoup | ||||||
|  |  | ||||||
|  | from ..connection import Connection | ||||||
|  | from .abstract import Page | ||||||
|  | from ..utils.enums import SourceType, ALL_SOURCE_TYPES | ||||||
|  | from ..utils.enums.album import AlbumType, AlbumStatus | ||||||
|  | from ..objects import ( | ||||||
|  |     Artist, | ||||||
|  |     Source, | ||||||
|  |     Song, | ||||||
|  |     Album, | ||||||
|  |     ID3Timestamp, | ||||||
|  |     FormattedText, | ||||||
|  |     Label, | ||||||
|  |     Target, | ||||||
|  |     DatabaseObject, | ||||||
|  |     Lyrics, | ||||||
|  |     Artwork | ||||||
|  | ) | ||||||
|  | from ..utils.config import logging_settings, main_settings | ||||||
|  | from ..utils import string_processing, shared | ||||||
|  | from ..utils.string_processing import clean_song_title | ||||||
|  | from ..utils.support_classes.query import Query | ||||||
|  | from ..utils.support_classes.download_result import DownloadResult | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Musicbrainz(Page): | ||||||
|  |     SOURCE_TYPE = ALL_SOURCE_TYPES.MUSICBRAINZ | ||||||
|  |  | ||||||
|  |     HOST = "https://musicbrainz.org" | ||||||
|  |  | ||||||
|  |     def __init__(self, *args, **kwargs): | ||||||
|  |         musicbrainzngs.set_useragent("mk", "1") | ||||||
|  |  | ||||||
|  |         super().__init__(*args, **kwargs) | ||||||
|  |      | ||||||
|  |     def general_search(self, search_query: str) -> List[DatabaseObject]: | ||||||
|  |         search_results = [] | ||||||
|  |  | ||||||
|  |         #Artist | ||||||
|  |         search_results += self.artist_search(search_query).copy() | ||||||
|  |  | ||||||
|  |         #Album | ||||||
|  |         search_results += self.album_search(search_query).copy() | ||||||
|  |  | ||||||
|  |         #Song | ||||||
|  |         search_results += self.song_search(search_query).copy() | ||||||
|  |  | ||||||
|  |         return search_results | ||||||
|  |  | ||||||
|  |     def artist_search(self, search_query: str) -> List[Artist]: | ||||||
|  |         artist_list = [] | ||||||
|  |          | ||||||
|  |         #Artist | ||||||
|  |         artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list'] | ||||||
|  |         artist_source_list: List[Source] = [] | ||||||
|  |         for artist_dict in artist_dict_list: | ||||||
|  |             artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id'])) | ||||||
|  |             artist_list.append(Artist( | ||||||
|  |                 name=artist_dict['name'], | ||||||
|  |                 source_list=artist_source_list | ||||||
|  |             )) | ||||||
|  |    | ||||||
|  |         return artist_list | ||||||
|  |  | ||||||
|  |     def song_search(self, search_query: str) -> List[Song]: | ||||||
|  |         song_list = [] | ||||||
|  |  | ||||||
|  |         #Song | ||||||
|  |         song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list'] | ||||||
|  |         song_source_list: List[Source] = []  | ||||||
|  |         for song_dict in song_dict_list: | ||||||
|  |             song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))  | ||||||
|  |             song_list.append(Song( | ||||||
|  |                 title=song_dict['title'], | ||||||
|  |                 source_list=song_source_list | ||||||
|  |             ))  | ||||||
|  |  | ||||||
|  |         return song_list | ||||||
|  |      | ||||||
|  |     def album_search(self, search_query: str) -> List[Album]: | ||||||
|  |         album_list = [] | ||||||
|  |  | ||||||
|  |         #Album | ||||||
|  |         album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list'] | ||||||
|  |         album_source_list: List[Source] = [] | ||||||
|  |         for album_dict in album_dict_list: | ||||||
|  |             album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id'])) | ||||||
|  |             album_list.append(Album( | ||||||
|  |                 title=album_dict['title'], | ||||||
|  |                 source_list=album_source_list | ||||||
|  |             )) | ||||||
|  |  | ||||||
|  |         return album_list | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: | ||||||
|  |         album_list = [] | ||||||
|  |  | ||||||
|  |         #Album | ||||||
|  |         album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list'] | ||||||
|  |         album_source_list: List[Source] = [] | ||||||
|  |         for album_dict in album_dict_list: | ||||||
|  |             album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id'])) | ||||||
|  |             album_list.append(Album( | ||||||
|  |                 title=album_dict['title'], | ||||||
|  |                 source_list=album_source_list | ||||||
|  |             )) | ||||||
|  |  | ||||||
|  |     def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: | ||||||
|  |         artist_list = [] | ||||||
|  |          | ||||||
|  |         #Artist | ||||||
|  |         artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list'] | ||||||
|  |         artist_source_list: List[Source] = [] | ||||||
|  |         for artist_dict in artist_dict_list: | ||||||
|  |             artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id'])) | ||||||
|  |             artist_list.append(Artist( | ||||||
|  |                 name=artist_dict['name'], | ||||||
|  |                 source_list=artist_source_list, | ||||||
|  |             )) | ||||||
|  |  | ||||||
|  |     def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: | ||||||
|  |         song_list = [] | ||||||
|  |  | ||||||
|  |         #Song | ||||||
|  |         song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list'] | ||||||
|  |         song_source_list: List[Source] = []  | ||||||
|  |         for song_dict in song_dict_list: | ||||||
|  |             song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))  | ||||||
|  |             song_list.append(Song( | ||||||
|  |                 title=song_dict['title'], | ||||||
|  |                 source_list=song_source_list | ||||||
|  |             ))  | ||||||
|  |      | ||||||
|  |          | ||||||
| @@ -549,6 +549,11 @@ class YoutubeMusic(SuperYouTube): | |||||||
|         return album |         return album | ||||||
|  |  | ||||||
|     def fetch_lyrics(self, video_id: str, playlist_id: str = None) -> str: |     def fetch_lyrics(self, video_id: str, playlist_id: str = None) -> str: | ||||||
|  |         """ | ||||||
|  |         1. fetches the tabs of a song, to get the browse id | ||||||
|  |         2. finds the browse id of the lyrics | ||||||
|  |         3. fetches the lyrics with the browse id | ||||||
|  |         """ | ||||||
|         request_data = { |         request_data = { | ||||||
|             "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}, |             "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}, | ||||||
|             "videoId": video_id, |             "videoId": video_id, | ||||||
| @@ -575,6 +580,7 @@ class YoutubeMusic(SuperYouTube): | |||||||
|             pageType = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType", default="") |             pageType = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType", default="") | ||||||
|             if pageType in ("MUSIC_TAB_TYPE_LYRICS", "MUSIC_PAGE_TYPE_TRACK_LYRICS") or "lyrics" in pageType.lower(): |             if pageType in ("MUSIC_TAB_TYPE_LYRICS", "MUSIC_PAGE_TYPE_TRACK_LYRICS") or "lyrics" in pageType.lower(): | ||||||
|                 browse_id = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseId", default=None) |                 browse_id = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseId", default=None) | ||||||
|  |                 if browse_id is not None: | ||||||
|                     break |                     break | ||||||
|  |  | ||||||
|         if browse_id is None: |         if browse_id is None: | ||||||
| @@ -721,7 +727,6 @@ class YoutubeMusic(SuperYouTube): | |||||||
|  |  | ||||||
|         self.download_values_by_url[source.url] = { |         self.download_values_by_url[source.url] = { | ||||||
|             "url": _best_format.get("url"), |             "url": _best_format.get("url"), | ||||||
|             "chunk_size": _best_format.get("downloader_options", {}).get("http_chunk_size", main_settings["chunk_size"]), |  | ||||||
|             "headers": _best_format.get("http_headers", {}), |             "headers": _best_format.get("http_headers", {}), | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -59,6 +59,11 @@ Reference for the logging formats: https://docs.python.org/3/library/logging.htm | |||||||
|         description="The logger for the musify scraper.", |         description="The logger for the musify scraper.", | ||||||
|         default_value="musify" |         default_value="musify" | ||||||
|     ), |     ), | ||||||
|  |     LoggerAttribute( | ||||||
|  |         name="musicbrainz_logger", | ||||||
|  |         description="The logger for the musicbrainz scraper.", | ||||||
|  |         default_value="musicbrainz" | ||||||
|  |     ), | ||||||
|     LoggerAttribute( |     LoggerAttribute( | ||||||
|         name="youtube_logger", |         name="youtube_logger", | ||||||
|         description="The logger for the youtube scraper.", |         description="The logger for the youtube scraper.", | ||||||
|   | |||||||
| @@ -15,11 +15,11 @@ __stage__ = os.getenv("STAGE", "prod") | |||||||
| DEBUG = (__stage__ == "dev") and True | DEBUG = (__stage__ == "dev") and True | ||||||
| DEBUG_LOGGING = DEBUG and False | DEBUG_LOGGING = DEBUG and False | ||||||
| DEBUG_TRACE = DEBUG and True | DEBUG_TRACE = DEBUG and True | ||||||
| DEBUG_OBJECT_TRACE = DEBUG and True | DEBUG_OBJECT_TRACE = DEBUG and False | ||||||
| DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False | DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False | ||||||
| DEBUG_YOUTUBE_INITIALIZING = DEBUG and False | DEBUG_YOUTUBE_INITIALIZING = DEBUG and False | ||||||
| DEBUG_PAGES = DEBUG and False | DEBUG_PAGES = DEBUG and False | ||||||
| DEBUG_DUMP = DEBUG and False | DEBUG_DUMP = DEBUG and True | ||||||
| DEBUG_PRINT_ID = DEBUG and True | DEBUG_PRINT_ID = DEBUG and True | ||||||
|  |  | ||||||
| if DEBUG: | if DEBUG: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user