Compare commits
	
		
			24 Commits
		
	
	
		
			83a3334f1a
			...
			feature/mu
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 265c9f462f | |||
| 780daac0ef | |||
| 465af49057 | |||
| 2aa0f02fa5 | |||
| 7b0b830d64 | |||
| 1ba6c97f5a | |||
| c8cbfc7cb9 | |||
| 344da0a0bf | |||
| 49dc7093c8 | |||
| 90f70638b4 | |||
| 7b4eee858a | |||
| f61b34dd40 | |||
| 688b4fd357 | |||
| 769d27dc5c | |||
| f5d953d9ce | |||
| 46b64b8f8d | |||
| adfce16d2a | |||
| e4fd9faf12 | |||
| f6caee41a8 | |||
| 068c749c38 | |||
| c131924577 | |||
| 8cdb5c1f99 | |||
| 356ba658ce | |||
| 000a6c0dba | 
							
								
								
									
										1
									
								
								.vscode/settings.json
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.vscode/settings.json
									
									
									
									
										vendored
									
									
								
							| @@ -27,6 +27,7 @@ | ||||
|         "Gitea", | ||||
|         "iframe", | ||||
|         "isrc", | ||||
|         "itemprop", | ||||
|         "levenshtein", | ||||
|         "metallum", | ||||
|         "MUSICBRAINZ", | ||||
|   | ||||
| @@ -6,9 +6,10 @@ logging.getLogger().setLevel(logging.DEBUG) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     commands = [ | ||||
|         "s: #a I'm in a coffin", | ||||
|         "0", | ||||
|         "d: 0", | ||||
|         "s: #a Crystal F", | ||||
|         "10", | ||||
|         "1", | ||||
|         "3", | ||||
|     ] | ||||
|  | ||||
|      | ||||
|   | ||||
| @@ -93,6 +93,10 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): | ||||
|             # resize the image to the preferred resolution | ||||
|             img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) | ||||
|  | ||||
|             # https://stackoverflow.com/a/59476938/16804841 | ||||
|             if img.mode != 'RGB': | ||||
|                 img = img.convert('RGB') | ||||
|  | ||||
|             img.save(converted_target.file_path, "JPEG") | ||||
|  | ||||
|         # https://stackoverflow.com/questions/70228440/mutagen-how-can-i-correctly-embed-album-art-into-mp3-file-so-that-i-can-see-t | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from typing import List, Optional | ||||
| from functools import lru_cache | ||||
| import logging | ||||
|  | ||||
| from ..utils import output, BColors | ||||
| from ..utils.config import main_settings | ||||
| from ..utils.string_processing import fit_to_file_system | ||||
|  | ||||
| @@ -136,13 +137,13 @@ class Cache: | ||||
|         ) | ||||
|         self._write_attribute(cache_attribute) | ||||
|  | ||||
|         cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True) | ||||
|         cache_path = fit_to_file_system(Path(module_path, name.replace("/", "_")), hidden_ok=True) | ||||
|         with cache_path.open("wb") as content_file: | ||||
|             self.logger.debug(f"writing cache to {cache_path}") | ||||
|             content_file.write(content) | ||||
|  | ||||
|     def get(self, name: str) -> Optional[CacheResult]: | ||||
|         path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True) | ||||
|         path = fit_to_file_system(Path(self._dir, self.module, name.replace("/", "_")), hidden_ok=True) | ||||
|  | ||||
|         if not path.is_file(): | ||||
|             return None | ||||
| @@ -165,7 +166,7 @@ class Cache: | ||||
|             if ca.name == "": | ||||
|                 continue | ||||
|  | ||||
|             file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True) | ||||
|             file = fit_to_file_system(Path(self._dir, ca.module, ca.name.replace("/", "_")), hidden_ok=True) | ||||
|  | ||||
|             if not ca.is_valid: | ||||
|                 self.logger.debug(f"deleting cache {ca.id}") | ||||
| @@ -204,9 +205,12 @@ class Cache: | ||||
|         for path in self._dir.iterdir(): | ||||
|             if path.is_dir(): | ||||
|                 for file in path.iterdir(): | ||||
|                     output(f"Deleting file {file}", color=BColors.GREY) | ||||
|                     file.unlink() | ||||
|                 output(f"Deleting folder {path}", color=BColors.HEADER) | ||||
|                 path.rmdir() | ||||
|             else: | ||||
|                 output(f"Deleting folder {path}", color=BColors.HEADER) | ||||
|                 path.unlink() | ||||
|  | ||||
|         self.cached_attributes.clear() | ||||
|   | ||||
| @@ -30,14 +30,16 @@ from ..utils.exception import MKMissingNameException | ||||
| from ..utils.exception.download import UrlNotFoundException | ||||
| from ..utils.shared import DEBUG_PAGES | ||||
|  | ||||
| from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, INDEPENDENT_DB_OBJECTS | ||||
| from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Musicbrainz, Genius, INDEPENDENT_DB_OBJECTS | ||||
|  | ||||
|  | ||||
| ALL_PAGES: Set[Type[Page]] = { | ||||
|     # EncyclopaediaMetallum, | ||||
|     Genius, | ||||
|     Musify, | ||||
|     YoutubeMusic, | ||||
|     Bandcamp | ||||
|     Bandcamp, | ||||
|     Musicbrainz | ||||
| } | ||||
|  | ||||
| if youtube_settings["use_youtube_alongside_youtube_music"]: | ||||
|   | ||||
| @@ -59,4 +59,6 @@ class Artwork: | ||||
|                 self._variant_mapping[key] = value | ||||
|  | ||||
|     def __eq__(self, other: Artwork) -> bool: | ||||
|         if not isinstance(other, Artwork): | ||||
|             return False | ||||
|         return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) | ||||
|   | ||||
| @@ -115,7 +115,6 @@ class Collection(Generic[T]): | ||||
|         self._data.append(other) | ||||
|         other._inner._is_in_collection.add(self) | ||||
|  | ||||
|         """ | ||||
|         for attribute, a in self.sync_on_append.items(): | ||||
|             # syncing two collections by reference | ||||
|             b = other.__getattribute__(attribute) | ||||
| @@ -134,7 +133,6 @@ class Collection(Generic[T]): | ||||
|                 a._collection_for[synced_with] = key | ||||
|  | ||||
|             a.extend(b_data, **kwargs) | ||||
|         """ | ||||
|  | ||||
|         # all of the existing hooks to get the defined datastructures | ||||
|         for collection_attribute, generator in self.extend_object_to_attribute.items(): | ||||
| @@ -162,24 +160,6 @@ class Collection(Generic[T]): | ||||
|  | ||||
|         object_trace(f"Appending {other.option_string} to {self}") | ||||
|  | ||||
|         for attribute, a in self.sync_on_append.items(): | ||||
|             # syncing two collections by reference | ||||
|             b = other.__getattribute__(attribute) | ||||
|             if a is b: | ||||
|                 continue | ||||
|  | ||||
|             object_trace(f"Syncing [{a}] = [{b}]") | ||||
|  | ||||
|             b_data = b.data.copy() | ||||
|             b_collection_for = b._collection_for.copy() | ||||
|  | ||||
|             del b | ||||
|  | ||||
|             for synced_with, key in b_collection_for.items(): | ||||
|                 synced_with.__setattr__(key, a) | ||||
|                 a._collection_for[synced_with] = key | ||||
|  | ||||
|             a.extend(b_data, **kwargs) | ||||
|  | ||||
|         # switching collection in the case of push to | ||||
|         for c in self.push_to: | ||||
|   | ||||
| @@ -38,11 +38,19 @@ class FormattedText: | ||||
|     def markdown(self) -> str: | ||||
|         return md(self.html).strip() | ||||
|      | ||||
|     @markdown.setter | ||||
|     def markdown(self, value: str) -> None: | ||||
|         self.html = mistune.markdown(value) | ||||
|  | ||||
|     @property | ||||
|     def plain(self) -> str: | ||||
|         md = self.markdown | ||||
|         return md.replace("\n\n", "\n") | ||||
|      | ||||
|     @plain.setter | ||||
|     def plain(self, value: str) -> None: | ||||
|         self.html = mistune.markdown(plain_to_markdown(value)) | ||||
|  | ||||
|     def __str__(self) -> str: | ||||
|         return self.markdown | ||||
|  | ||||
|   | ||||
| @@ -219,7 +219,8 @@ class Song(Base): | ||||
|  | ||||
|     @property | ||||
|     def option_string(self) -> str: | ||||
|         r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value | ||||
|         r = "song " | ||||
|         r += OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value | ||||
|         r += get_collection_string(self.album_collection, " from {}", ignore_titles={self.title}) | ||||
|         r += get_collection_string(self.artist_collection, " by {}") | ||||
|         r += get_collection_string(self.feature_artist_collection, " feat. {}" if len(self.artist_collection) > 0 else " by {}") | ||||
| @@ -369,9 +370,11 @@ class Album(Base): | ||||
|  | ||||
|     @property | ||||
|     def option_string(self) -> str: | ||||
|         r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value | ||||
|         r = "album " | ||||
|         r += OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value | ||||
|         r += get_collection_string(self.artist_collection, " by {}") | ||||
|         r += get_collection_string(self.feature_artist_collection, " feat. {}" if len(self.artist_collection) > 0 else " by {}") | ||||
|         if len(self.artist_collection) <= 0: | ||||
|             r += get_collection_string(self.feature_artist_collection, " by {}") | ||||
|         r += get_collection_string(self.label_collection, " under {}") | ||||
|  | ||||
|         if len(self.song_collection) > 0: | ||||
| @@ -627,7 +630,8 @@ class Artist(Base): | ||||
|  | ||||
|     @property | ||||
|     def option_string(self) -> str: | ||||
|         r = OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value | ||||
|         r = "artist " | ||||
|         r += OPTION_FOREGROUND.value + self.title_string + BColors.ENDC.value + OPTION_BACKGROUND.value | ||||
|         r += get_collection_string(self.label_collection, " under {}") | ||||
|          | ||||
|         r += OPTION_BACKGROUND.value | ||||
| @@ -720,4 +724,4 @@ class Label(Base): | ||||
|  | ||||
|     @property | ||||
|     def option_string(self): | ||||
|         return OPTION_FOREGROUND.value + self.name + BColors.ENDC.value | ||||
|         return "label " + OPTION_FOREGROUND.value + self.name + BColors.ENDC.value | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| from .encyclopaedia_metallum import EncyclopaediaMetallum | ||||
| from .musify import Musify | ||||
| from .musicbrainz import Musicbrainz | ||||
| from .youtube import YouTube | ||||
| from .youtube_music import YoutubeMusic | ||||
| from .bandcamp import Bandcamp | ||||
| from .genius import Genius | ||||
|  | ||||
| from .abstract import Page, INDEPENDENT_DB_OBJECTS | ||||
|   | ||||
							
								
								
									
										297
									
								
								music_kraken/pages/genius.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										297
									
								
								music_kraken/pages/genius.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,297 @@ | ||||
| from typing import List, Optional, Type | ||||
| from urllib.parse import urlparse, urlunparse, urlencode | ||||
| import json | ||||
| from enum import Enum | ||||
| from bs4 import BeautifulSoup | ||||
| import pycountry | ||||
|  | ||||
| from ..objects import Source, DatabaseObject | ||||
| from .abstract import Page | ||||
| from ..objects import ( | ||||
|     Artist, | ||||
|     Source, | ||||
|     SourceType, | ||||
|     Song, | ||||
|     Album, | ||||
|     Label, | ||||
|     Target, | ||||
|     Contact, | ||||
|     ID3Timestamp, | ||||
|     Lyrics, | ||||
|     FormattedText, | ||||
|     Artwork, | ||||
| ) | ||||
| from ..connection import Connection | ||||
| from ..utils import dump_to_file, traverse_json_path | ||||
| from ..utils.enums import SourceType, ALL_SOURCE_TYPES | ||||
| from ..utils.support_classes.download_result import DownloadResult | ||||
| from ..utils.string_processing import clean_song_title | ||||
| from ..utils.config import main_settings, logging_settings | ||||
| from ..utils.shared import DEBUG | ||||
|  | ||||
| if DEBUG: | ||||
|     from ..utils import dump_to_file | ||||
|  | ||||
|  | ||||
| class Genius(Page): | ||||
|     SOURCE_TYPE = ALL_SOURCE_TYPES.GENIUS | ||||
|     HOST = "genius.com" | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         self.connection: Connection = Connection( | ||||
|             host="https://genius.com/", | ||||
|             logger=self.LOGGER, | ||||
|             module="genius", | ||||
|         ) | ||||
|  | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: | ||||
|         path = source.parsed_url.path.replace("/", "") | ||||
|          | ||||
|         if path.startswith("artists"): | ||||
|             return Artist | ||||
|         if path.startswith("albums"): | ||||
|             return Album | ||||
|  | ||||
|         return Song | ||||
|  | ||||
|     def add_to_artwork(self, artwork: Artwork, url: str): | ||||
|         if url is None: | ||||
|             return | ||||
|          | ||||
|         url_frags = url.split(".") | ||||
|         if len(url_frags) < 2: | ||||
|             artwork.append(url=url) | ||||
|             return | ||||
|  | ||||
|         dimensions = url_frags[-2].split("x") | ||||
|         if len(dimensions) < 2: | ||||
|             artwork.append(url=url) | ||||
|             return | ||||
|  | ||||
|         if len(dimensions) == 3: | ||||
|             dimensions = dimensions[:-1] | ||||
|          | ||||
|         try: | ||||
|             artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1])) | ||||
|         except ValueError: | ||||
|             artwork.append(url=url) | ||||
|  | ||||
|     def parse_api_object(self, data: dict) -> Optional[DatabaseObject]: | ||||
|         if data is None: | ||||
|             return None | ||||
|         object_type = data.get("_type") | ||||
|  | ||||
|         artwork = Artwork() | ||||
|         self.add_to_artwork(artwork, data.get("header_image_url")) | ||||
|         self.add_to_artwork(artwork, data.get("image_url")) | ||||
|          | ||||
|         additional_sources: List[Source] = [] | ||||
|         source: Source = Source(self.SOURCE_TYPE, data.get("url"), additional_data={ | ||||
|             "id": data.get("id"), | ||||
|             "slug": data.get("slug"), | ||||
|             "api_path": data.get("api_path"), | ||||
|         }) | ||||
|  | ||||
|         notes = FormattedText() | ||||
|         description = data.get("description") or {} | ||||
|         if "html" in description: | ||||
|             notes.html = description["html"] | ||||
|         elif "markdown" in description: | ||||
|             notes.markdown = description["markdown"] | ||||
|         elif "description_preview" in data: | ||||
|             notes.plaintext = data["description_preview"] | ||||
|  | ||||
|         if source.url is None: | ||||
|             return None | ||||
|  | ||||
|         if object_type == "artist": | ||||
|             if data.get("instagram_name") is not None: | ||||
|                 additional_sources.append(Source(ALL_SOURCE_TYPES.INSTAGRAM, f"https://www.instagram.com/{data['instagram_name']}/")) | ||||
|             if data.get("facebook_name") is not None: | ||||
|                 additional_sources.append(Source(ALL_SOURCE_TYPES.FACEBOOK, f"https://www.facebook.com/{data['facebook_name']}/")) | ||||
|             if data.get("twitter_name") is not None: | ||||
|                 additional_sources.append(Source(ALL_SOURCE_TYPES.TWITTER, f"https://x.com/{data['twitter_name']}/")) | ||||
|  | ||||
|             return Artist( | ||||
|                 name=data["name"].strip() if data.get("name") is not None else None, | ||||
|                 source_list=[source], | ||||
|                 artwork=artwork, | ||||
|                 notes=notes, | ||||
|             ) | ||||
|  | ||||
|         if object_type == "album": | ||||
|             self.add_to_artwork(artwork, data.get("cover_art_thumbnail_url")) | ||||
|             self.add_to_artwork(artwork, data.get("cover_art_url")) | ||||
|  | ||||
|             for cover_art in data.get("cover_arts", []): | ||||
|                 self.add_to_artwork(artwork, cover_art.get("image_url")) | ||||
|                 self.add_to_artwork(artwork, cover_art.get("thumbnail_image_url")) | ||||
|  | ||||
|             return Album( | ||||
|                 title=data.get("name").strip(), | ||||
|                 source_list=[source], | ||||
|                 artist_list=[self.parse_api_object(data.get("artist"))], | ||||
|                 artwork=artwork, | ||||
|                 date=ID3Timestamp(**(data.get("release_date_components") or {})), | ||||
|             ) | ||||
|  | ||||
|         if object_type == "song": | ||||
|             self.add_to_artwork(artwork, data.get("song_art_image_thumbnail_url")) | ||||
|             self.add_to_artwork(artwork, data.get("song_art_image_url")) | ||||
|  | ||||
|             main_artist_list = [] | ||||
|             featured_artist_list = [] | ||||
|  | ||||
|             _artist_name = None | ||||
|             primary_artist = self.parse_api_object(data.get("primary_artist")) | ||||
|             if primary_artist is not None: | ||||
|                 _artist_name = primary_artist.name | ||||
|                 main_artist_list.append(primary_artist) | ||||
|             for feature_artist in (*(data.get("featured_artists") or []), *(data.get("producer_artists") or []), *(data.get("writer_artists") or [])): | ||||
|                 artist = self.parse_api_object(feature_artist) | ||||
|                 if artist is not None: | ||||
|                     featured_artist_list.append(artist) | ||||
|  | ||||
|             return Song( | ||||
|                 title=clean_song_title(data.get("title"), artist_name=_artist_name), | ||||
|                 source_list=[source], | ||||
|                 artwork=artwork, | ||||
|                 feature_artist_list=featured_artist_list, | ||||
|                 artist_list=main_artist_list, | ||||
|             ) | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def general_search(self, search_query: str, **kwargs) -> List[DatabaseObject]: | ||||
|         results = [] | ||||
|  | ||||
|         search_params = { | ||||
|             "q": search_query, | ||||
|         } | ||||
|  | ||||
|         r = self.connection.get("https://genius.com/api/search/multi?" + urlencode(search_params), name=f"search_{search_query}") | ||||
|         if r is None: | ||||
|             return results | ||||
|  | ||||
|         dump_to_file("search_genius.json", r.text, is_json=True, exit_after_dump=False) | ||||
|         data = r.json() | ||||
|  | ||||
|         for elements in traverse_json_path(data, "response.sections", default=[]): | ||||
|             hits = elements.get("hits", []) | ||||
|             for hit in hits: | ||||
|                 parsed = self.parse_api_object(hit.get("result")) | ||||
|                 if parsed is not None: | ||||
|                     results.append(parsed) | ||||
|  | ||||
|         return results | ||||
|  | ||||
|     def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: | ||||
|         artist: Artist = Artist() | ||||
|         # https://genius.com/api/artists/24527/albums?page=1 | ||||
|  | ||||
|         r = self.connection.get(source.url, name=source.url) | ||||
|         if r is None: | ||||
|             return artist | ||||
|         soup = self.get_soup_from_response(r) | ||||
|  | ||||
|         # find the content attribute in the meta tag which is contained in the head | ||||
|         data_container = soup.find("meta", {"itemprop": "page_data"}) | ||||
|         if data_container is not None: | ||||
|             content = data_container["content"] | ||||
|             dump_to_file("genius_itemprop_artist.json", content, is_json=True, exit_after_dump=False) | ||||
|             data = json.loads(content) | ||||
|  | ||||
|             artist = self.parse_api_object(data.get("artist")) | ||||
|              | ||||
|             for e in (data.get("artist_albums") or []): | ||||
|                 r = self.parse_api_object(e) | ||||
|                 if not isinstance(r, Album): | ||||
|                     continue | ||||
|  | ||||
|                 artist.album_collection.append(r) | ||||
|              | ||||
|             for e in (data.get("artist_songs") or []): | ||||
|                 r = self.parse_api_object(e) | ||||
|                 if not isinstance(r, Song): | ||||
|                     continue | ||||
|  | ||||
|                 """ | ||||
|                 TODO | ||||
|                 fetch the album for these songs, because the api doesn't  | ||||
|                 return them | ||||
|                 """ | ||||
|  | ||||
|                 artist.album_collection.extend(r.album_collection) | ||||
|  | ||||
|         artist.source_collection.append(source) | ||||
|  | ||||
|         return artist | ||||
|  | ||||
|     def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: | ||||
|         album: Album = Album() | ||||
|         # https://genius.com/api/artists/24527/albums?page=1 | ||||
|  | ||||
|         r = self.connection.get(source.url, name=source.url) | ||||
|         if r is None: | ||||
|             return album | ||||
|         soup = self.get_soup_from_response(r) | ||||
|  | ||||
|         # find the content attribute in the meta tag which is contained in the head | ||||
|         data_container = soup.find("meta", {"itemprop": "page_data"}) | ||||
|         if data_container is not None: | ||||
|             content = data_container["content"] | ||||
|             dump_to_file("genius_itemprop_album.json", content, is_json=True, exit_after_dump=False) | ||||
|             data = json.loads(content) | ||||
|  | ||||
|             album = self.parse_api_object(data.get("album")) | ||||
|  | ||||
|             for e in data.get("album_appearances", []): | ||||
|                 r = self.parse_api_object(e.get("song")) | ||||
|                 if not isinstance(r, Song): | ||||
|                     continue | ||||
|  | ||||
|                 album.song_collection.append(r) | ||||
|  | ||||
|         album.source_collection.append(source) | ||||
|          | ||||
|         return album | ||||
|  | ||||
|     def get_json_content_from_response(self, response, start: str, end: str) -> Optional[str]: | ||||
|         content = response.text | ||||
|         start_index = content.find(start) | ||||
|         if start_index < 0: | ||||
|             return None | ||||
|         start_index += len(start) | ||||
|         end_index = content.find(end, start_index) | ||||
|         if end_index < 0: | ||||
|             return None | ||||
|         return content[start_index:end_index] | ||||
|  | ||||
|     def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: | ||||
|         song: Song = Song() | ||||
|  | ||||
|         r = self.connection.get(source.url, name=source.url) | ||||
|         if r is None: | ||||
|             return song | ||||
|  | ||||
|         # get the contents that are between `JSON.parse('` and `');` | ||||
|         content = self.get_json_content_from_response(r, start="window.__PRELOADED_STATE__ = JSON.parse('", end="');\n      window.__APP_CONFIG__ = ") | ||||
|         if content is not None: | ||||
|             content = content.replace("\\\\", "\\").replace('\\"', '"').replace("\\'", "'") | ||||
|             data = json.loads(content) | ||||
|  | ||||
|             lyrics_html = traverse_json_path(data, "songPage.lyricsData.body.html", default=None) | ||||
|             if lyrics_html is not None: | ||||
|                 song.lyrics_collection.append(Lyrics(FormattedText(html=lyrics_html))) | ||||
|  | ||||
|             dump_to_file("genius_song_script_json.json", content, is_json=True, exit_after_dump=False) | ||||
|  | ||||
|         soup = self.get_soup_from_response(r) | ||||
|         for lyrics in soup.find_all("div", {"data-lyrics-container": "true"}): | ||||
|             lyrics_object = Lyrics(FormattedText(html=lyrics.prettify())) | ||||
|             song.lyrics_collection.append(lyrics_object) | ||||
|  | ||||
|         song.source_collection.append(source) | ||||
|         return song | ||||
							
								
								
									
										145
									
								
								music_kraken/pages/musicbrainz.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										145
									
								
								music_kraken/pages/musicbrainz.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,145 @@ | ||||
| from collections import defaultdict | ||||
| from dataclasses import dataclass | ||||
| from enum import Enum | ||||
| from typing import List, Optional, Type, Union, Generator, Dict, Any | ||||
| from urllib.parse import urlparse | ||||
|  | ||||
| import pycountry | ||||
| import musicbrainzngs | ||||
| from bs4 import BeautifulSoup | ||||
|  | ||||
| from ..connection import Connection | ||||
| from .abstract import Page | ||||
| from ..utils.enums import SourceType, ALL_SOURCE_TYPES | ||||
| from ..utils.enums.album import AlbumType, AlbumStatus | ||||
| from ..objects import ( | ||||
|     Artist, | ||||
|     Source, | ||||
|     Song, | ||||
|     Album, | ||||
|     ID3Timestamp, | ||||
|     FormattedText, | ||||
|     Label, | ||||
|     Target, | ||||
|     DatabaseObject, | ||||
|     Lyrics, | ||||
|     Artwork | ||||
| ) | ||||
| from ..utils.config import logging_settings, main_settings | ||||
| from ..utils import string_processing, shared | ||||
| from ..utils.string_processing import clean_song_title | ||||
| from ..utils.support_classes.query import Query | ||||
| from ..utils.support_classes.download_result import DownloadResult | ||||
|  | ||||
|  | ||||
|  | ||||
| class Musicbrainz(Page): | ||||
|     SOURCE_TYPE = ALL_SOURCE_TYPES.MUSICBRAINZ | ||||
|  | ||||
|     HOST = "https://musicbrainz.org" | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         musicbrainzngs.set_useragent("mk", "1") | ||||
|  | ||||
|         super().__init__(*args, **kwargs) | ||||
|      | ||||
|     def general_search(self, search_query: str) -> List[DatabaseObject]: | ||||
|         search_results = [] | ||||
|  | ||||
|         #Artist | ||||
|         search_results += self.artist_search(search_query).copy() | ||||
|  | ||||
|         #Album | ||||
|         search_results += self.album_search(search_query).copy() | ||||
|  | ||||
|         #Song | ||||
|         search_results += self.song_search(search_query).copy() | ||||
|  | ||||
|         return search_results | ||||
|  | ||||
|     def artist_search(self, search_query: str) -> List[Artist]: | ||||
|         artist_list = [] | ||||
|          | ||||
|         #Artist | ||||
|         artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list'] | ||||
|         artist_source_list: List[Source] = [] | ||||
|         for artist_dict in artist_dict_list: | ||||
|             artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id'])) | ||||
|             artist_list.append(Artist( | ||||
|                 name=artist_dict['name'], | ||||
|                 source_list=artist_source_list | ||||
|             )) | ||||
|    | ||||
|         return artist_list | ||||
|  | ||||
|     def song_search(self, search_query: str) -> List[Song]: | ||||
|         song_list = [] | ||||
|  | ||||
|         #Song | ||||
|         song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list'] | ||||
|         song_source_list: List[Source] = []  | ||||
|         for song_dict in song_dict_list: | ||||
|             song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))  | ||||
|             song_list.append(Song( | ||||
|                 title=song_dict['title'], | ||||
|                 source_list=song_source_list | ||||
|             ))  | ||||
|  | ||||
|         return song_list | ||||
|      | ||||
|     def album_search(self, search_query: str) -> List[Album]: | ||||
|         album_list = [] | ||||
|  | ||||
|         #Album | ||||
|         album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list'] | ||||
|         album_source_list: List[Source] = [] | ||||
|         for album_dict in album_dict_list: | ||||
|             album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id'])) | ||||
|             album_list.append(Album( | ||||
|                 title=album_dict['title'], | ||||
|                 source_list=album_source_list | ||||
|             )) | ||||
|  | ||||
|         return album_list | ||||
|  | ||||
|  | ||||
|     def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: | ||||
|         album_list = [] | ||||
|  | ||||
|         #Album | ||||
|         album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list'] | ||||
|         album_source_list: List[Source] = [] | ||||
|         for album_dict in album_dict_list: | ||||
|             album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id'])) | ||||
|             album_list.append(Album( | ||||
|                 title=album_dict['title'], | ||||
|                 source_list=album_source_list | ||||
|             )) | ||||
|  | ||||
|     def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: | ||||
|         artist_list = [] | ||||
|          | ||||
|         #Artist | ||||
|         artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list'] | ||||
|         artist_source_list: List[Source] = [] | ||||
|         for artist_dict in artist_dict_list: | ||||
|             artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id'])) | ||||
|             artist_list.append(Artist( | ||||
|                 name=artist_dict['name'], | ||||
|                 source_list=artist_source_list, | ||||
|             )) | ||||
|  | ||||
|     def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: | ||||
|         song_list = [] | ||||
|  | ||||
|         #Song | ||||
|         song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list'] | ||||
|         song_source_list: List[Source] = []  | ||||
|         for song_dict in song_dict_list: | ||||
|             song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))  | ||||
|             song_list.append(Song( | ||||
|                 title=song_dict['title'], | ||||
|                 source_list=song_source_list | ||||
|             ))  | ||||
|      | ||||
|          | ||||
| @@ -549,6 +549,11 @@ class YoutubeMusic(SuperYouTube): | ||||
|         return album | ||||
|  | ||||
|     def fetch_lyrics(self, video_id: str, playlist_id: str = None) -> str: | ||||
|         """ | ||||
|         1. fetches the tabs of a song, to get the browse id | ||||
|         2. finds the browse id of the lyrics | ||||
|         3. fetches the lyrics with the browse id | ||||
|         """ | ||||
|         request_data = { | ||||
|             "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}, | ||||
|             "videoId": video_id, | ||||
| @@ -575,7 +580,8 @@ class YoutubeMusic(SuperYouTube): | ||||
|             pageType = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType", default="") | ||||
|             if pageType in ("MUSIC_TAB_TYPE_LYRICS", "MUSIC_PAGE_TYPE_TRACK_LYRICS") or "lyrics" in pageType.lower(): | ||||
|                 browse_id = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseId", default=None) | ||||
|                 break | ||||
|                 if browse_id is not None: | ||||
|                     break | ||||
|  | ||||
|         if browse_id is None: | ||||
|             return None | ||||
| @@ -589,6 +595,8 @@ class YoutubeMusic(SuperYouTube): | ||||
|             }, | ||||
|             name=f"fetch_song_lyrics_{video_id}.json" | ||||
|         ) | ||||
|         if r is None: | ||||
|             return None | ||||
|  | ||||
|         dump_to_file(f"fetch_song_lyrics_{video_id}.json", r.text, is_json=True, exit_after_dump=False) | ||||
|  | ||||
| @@ -719,7 +727,6 @@ class YoutubeMusic(SuperYouTube): | ||||
|  | ||||
|         self.download_values_by_url[source.url] = { | ||||
|             "url": _best_format.get("url"), | ||||
|             "chunk_size": _best_format.get("downloader_options", {}).get("http_chunk_size", main_settings["chunk_size"]), | ||||
|             "headers": _best_format.get("http_headers", {}), | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -59,6 +59,11 @@ Reference for the logging formats: https://docs.python.org/3/library/logging.htm | ||||
|         description="The logger for the musify scraper.", | ||||
|         default_value="musify" | ||||
|     ), | ||||
|     LoggerAttribute( | ||||
|         name="musicbrainz_logger", | ||||
|         description="The logger for the musicbrainz scraper.", | ||||
|         default_value="musicbrainz" | ||||
|     ), | ||||
|     LoggerAttribute( | ||||
|         name="youtube_logger", | ||||
|         description="The logger for the youtube scraper.", | ||||
|   | ||||
| @@ -19,7 +19,7 @@ DEBUG_OBJECT_TRACE = DEBUG and False | ||||
| DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False | ||||
| DEBUG_YOUTUBE_INITIALIZING = DEBUG and False | ||||
| DEBUG_PAGES = DEBUG and False | ||||
| DEBUG_DUMP = DEBUG and False | ||||
| DEBUG_DUMP = DEBUG and True | ||||
| DEBUG_PRINT_ID = DEBUG and True | ||||
|  | ||||
| if DEBUG: | ||||
|   | ||||
| @@ -3,92 +3,94 @@ import unittest | ||||
| from music_kraken.objects import Song, Album, Artist, Collection, Country | ||||
|  | ||||
| class TestCollection(unittest.TestCase): | ||||
|     @staticmethod | ||||
|     def complicated_object() -> Artist: | ||||
|         return Artist( | ||||
|             name="artist", | ||||
|             country=Country.by_alpha_2("DE"), | ||||
|             album_list=[ | ||||
|                 Album( | ||||
|                     title="album", | ||||
|                     song_list=[ | ||||
|                         Song( | ||||
|                             title="song", | ||||
|                             album_list=[ | ||||
|                                 Album(title="album", albumsort=123), | ||||
|                             ], | ||||
|                         ), | ||||
|                         Song( | ||||
|                             title="other_song", | ||||
|                             album_list=[ | ||||
|                                 Album(title="album", albumsort=423), | ||||
|                             ], | ||||
|                         ), | ||||
|                     ] | ||||
|                 ), | ||||
|                 Album(title="album", barcode="1234567890123"), | ||||
|     def test_song_contains_album(self): | ||||
|         """ | ||||
|         Tests that every song contains the album it is added to in its album_collection | ||||
|         """ | ||||
|  | ||||
|         a_1 = Album( | ||||
|             title="album", | ||||
|             song_list= [ | ||||
|                 Song(title="song"), | ||||
|             ] | ||||
|         ) | ||||
|         a_2 = a_1.song_collection[0].album_collection[0] | ||||
|         self.assertTrue(a_1.id == a_2.id) | ||||
|  | ||||
|     def test_song_album_relation(self): | ||||
|     def test_album_contains_song(self): | ||||
|         """ | ||||
|         Tests that | ||||
|         album = album.any_song.one_album | ||||
|         is the same object | ||||
|         Tests that every album contains the song it is added to in its song_collection | ||||
|         """ | ||||
|         s_1 = Song( | ||||
|             title="song", | ||||
|             album_list=[ | ||||
|                 Album(title="album"), | ||||
|             ] | ||||
|         ) | ||||
|         s_2 = s_1.album_collection[0].song_collection[0] | ||||
|         self.assertTrue(s_1.id == s_2.id) | ||||
|  | ||||
|  | ||||
|     def test_auto_add_artist_to_album_feature_artist(self): | ||||
|         """ | ||||
|         Tests that every artist is added to the album's feature_artist_collection per default | ||||
|         """ | ||||
|  | ||||
|         a = self.complicated_object().album_collection[0] | ||||
|         b = a.song_collection[0].album_collection[0] | ||||
|         c = a.song_collection[1].album_collection[0] | ||||
|         d = b.song_collection[0].album_collection[0] | ||||
|         e = d.song_collection[0].album_collection[0] | ||||
|         f = e.song_collection[0].album_collection[0] | ||||
|         g = f.song_collection[0].album_collection[0] | ||||
|         a_1 = Artist( | ||||
|             name="artist", | ||||
|             album_list=[ | ||||
|                 Album(title="album") | ||||
|             ] | ||||
|         ) | ||||
|         a_2 = a_1.album_collection[0].feature_artist_collection[0] | ||||
|  | ||||
|         self.assertTrue(a.id == b.id == c.id == d.id == e.id == f.id == g.id) | ||||
|         self.assertTrue(a.title == b.title == c.title == d.title == e.title == f.title == g.title == "album") | ||||
|         self.assertTrue(a.barcode == b.barcode == c.barcode == d.barcode == e.barcode == f.barcode == g.barcode == "1234567890123") | ||||
|         self.assertTrue(a.albumsort == b.albumsort == c.albumsort == d.albumsort == e.albumsort == f.albumsort == g.albumsort == 123) | ||||
|         self.assertTrue(a_1.id == a_2.id) | ||||
|      | ||||
|         d.title = "new_title" | ||||
|  | ||||
|         self.assertTrue(a.title == b.title == c.title == d.title == e.title == f.title == g.title == "new_title") | ||||
|  | ||||
|     def test_album_artist_relation(self): | ||||
|     def test_auto_add_artist_to_album_feature_artist_push(self): | ||||
|         """ | ||||
|         Tests that | ||||
|         artist = artist.any_album.any_song.one_artist | ||||
|         is the same object | ||||
|         Tests that every artist is added to the album's feature_artist_collection per default but pulled into the album's artist_collection if a merge exitst | ||||
|         """ | ||||
|  | ||||
|         a = self.complicated_object() | ||||
|         b = a.album_collection[0].artist_collection[0] | ||||
|         c = b.album_collection[0].artist_collection[0] | ||||
|         d = c.album_collection[0].artist_collection[0] | ||||
|  | ||||
|         self.assertTrue(a.id == b.id == c.id == d.id) | ||||
|         self.assertTrue(a.name == b.name == c.name == d.name == "artist") | ||||
|         self.assertTrue(a.country == b.country == c.country == d.country) | ||||
|  | ||||
|     def test_artist_artist_relation(self): | ||||
|         artist = Artist( | ||||
|         a_1 = Artist( | ||||
|             name="artist", | ||||
|             album_list=[ | ||||
|                 Album( | ||||
|                     title="album", | ||||
|                     song_list=[ | ||||
|                         Song(title="song"), | ||||
|                     ], | ||||
|                     artist_list=[ | ||||
|                         Artist(name="artist"), | ||||
|                     ] | ||||
|                 ) | ||||
|             ] | ||||
|         ) | ||||
|         a_2 = a_1.album_collection[0].artist_collection[0] | ||||
|  | ||||
|         self.assertTrue(artist.id == artist.album_collection[0].song_collection[0].artist_collection[0].id) | ||||
|         self.assertTrue(a_1.id == a_2.id) | ||||
|  | ||||
|  | ||||
|     def test_artist_artist_relation(self): | ||||
|         """ | ||||
|         Tests the proper syncing between album.artist_collection and song.artist_collection | ||||
|         """ | ||||
|  | ||||
|         album = Album( | ||||
|             title="album", | ||||
|             song_list=[ | ||||
|                 Song(title="song"), | ||||
|             ], | ||||
|             artist_list=[ | ||||
|                 Artist(name="artist"), | ||||
|             ] | ||||
|         ) | ||||
|         a_1 = album.artist_collection[0] | ||||
|         a_2 = album.song_collection[0].artist_collection[0] | ||||
|  | ||||
|         self.assertTrue(a_1.id == a_2.id) | ||||
|  | ||||
|     def test_artist_collection_sync(self): | ||||
|         """ | ||||
|         tests the actual implementation of the test above | ||||
|         """ | ||||
|  | ||||
|         album_1 = Album( | ||||
|             title="album", | ||||
|             song_list=[ | ||||
| @@ -113,15 +115,5 @@ class TestCollection(unittest.TestCase): | ||||
|  | ||||
|         self.assertTrue(id(album_1.artist_collection) == id(album_1.artist_collection) == id(album_1.song_collection[0].artist_collection) == id(album_1.song_collection[0].artist_collection)) | ||||
|  | ||||
|     def test_song_artist_relations(self): | ||||
|         a = self.complicated_object() | ||||
|         b = a.album_collection[0].song_collection[0].artist_collection[0] | ||||
|         c = b.album_collection[0].song_collection[0].artist_collection[0] | ||||
|         d = c.album_collection[0].song_collection[0].artist_collection[0] | ||||
|  | ||||
|         self.assertTrue(a.id == b.id == c.id == d.id) | ||||
|         self.assertTrue(a.name == b.name == c.name == d.name == "artist") | ||||
|         self.assertTrue(a.country == b.country == c.country == d.country) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user