diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 1db24be..ed3d782 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -30,7 +30,7 @@ from ..utils.exception import MKMissingNameException from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES -from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS +from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Musicbrainz, Genius, INDEPENDENT_DB_OBJECTS ALL_PAGES: Set[Type[Page]] = { @@ -38,7 +38,8 @@ ALL_PAGES: Set[Type[Page]] = { Genius, Musify, YoutubeMusic, - Bandcamp + Bandcamp, + Musicbrainz } if youtube_settings["use_youtube_alongside_youtube_music"]: diff --git a/music_kraken/pages/__init__.py b/music_kraken/pages/__init__.py index ba24501..85d4611 100644 --- a/music_kraken/pages/__init__.py +++ b/music_kraken/pages/__init__.py @@ -1,5 +1,6 @@ from .encyclopaedia_metallum import EncyclopaediaMetallum from .musify import Musify +from .musicbrainz import Musicbrainz from .youtube import YouTube from .youtube_music import YoutubeMusic from .bandcamp import Bandcamp diff --git a/music_kraken/pages/musicbrainz.py b/music_kraken/pages/musicbrainz.py index 65d23f3..75e37ff 100644 --- a/music_kraken/pages/musicbrainz.py +++ b/music_kraken/pages/musicbrainz.py @@ -33,110 +33,113 @@ from ..utils.support_classes.download_result import DownloadResult -class MusicbrainzTypes(Enum): - ARTIST = "artist" - RELEASE = "release" - SONG = "track" - - -@dataclass -class MusicbrainzUrl: - source_type: MusicbrainzTypes - name_without_id: str - name_with_id: str - musicbrainz_id: str - url: str - - class Musicbrainz(Page): SOURCE_TYPE = ALL_SOURCE_TYPES.MUSICBRAINZ - - HOST = "https://musicbrainz.org/" + + HOST = "https://musicbrainz.org" def __init__(self, *args, **kwargs): - self.connection: Connection = Connection( - host="https://musicbrainz.org/", - logger=self.LOGGER, - module="musicbrainz", - ) + musicbrainzngs.set_useragent("mk", "1") - self.stream_connection: Connection = Connection( - host="https://musicbrainz.org/", - logger=self.LOGGER, - semantic_not_found=False, - ) - super().__init__(*args, **kwargs) - - def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: - if source.url is None: - return None - - musicbrainz_url = parse_url(source.url) - - # Has no labels, because afaik musicbrainz has no Labels - musicbrainz_type_to_database_type = { - musicbrainzTypes.SONG: Song, - musicbrainzTypes.RELEASE: Album, - musicbrainzTypes.ARTIST: Artist - } - - return musicbrainz_type_to_database_type.get(musicbrainz_url.source_type) - - def parse_url(url: str) -> MusicbrainzUrl: - parsed = urlparse(url) - - path = parsed.path.split("/") - - split_name = path[2].split("-") - url_id = split_name[-1] - name_for_url = "-".join(split_name[:-1]) - - try: - type_enum = MusicbrainzTypes(path[1]) - except ValueError as e: - logging_settings["musicbrainz_logger"].warning(f"{path[1]} is not yet implemented, add it to MusicbrainzTypes") - raise e - - return MusicbrainzUrl( - source_type=type_enum, - name_without_id=name_for_url, - name_with_id=path[2], - musicbrainz_id=url_id, - url=url - ) def general_search(self, search_query: str) -> List[DatabaseObject]: search_results = [] - r = self.connection.get(f"https://musicbrainz.org/search?query={search_query}&type=artist&method=indexed", name="search_" + search_query) - if r is None: - return [] - search_soup: BeautifulSoup = self.get_soup_from_response(r) + #Artist + search_results += self.artist_search(search_query).copy() - def artist_search(self, artist: Artist) -> List[Artist]: + #Album + search_results += self.album_search(search_query).copy() + + #Song + search_results += self.song_search(search_query).copy() + + return search_results + + def artist_search(self, search_query: str) -> List[Artist]: artist_list = [] - r = self.connection.get(f"https://musicbrainz.org/search?query={artist.name}&type=artist&method=indexed", name="search_" + artist.name) - if r is None: - return [] + #Artist + artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list'] + artist_source_list: List[Source] = [] + for artist_dict in artist_dict_list: + artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id'])) + artist_list.append(Artist( + name=artist_dict['name'], + source_list=artist_source_list + )) return artist_list - def song_search(self, song: Song) -> List[Song]: + def song_search(self, search_query: str) -> List[Song]: song_list = [] - r = self.connection.get(f"https://musicbrainz.org/search?query={song.title_string}&type=recording&method=indexed", name="search_" + song.title_string) - if r is None: - return [] + #Song + song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list'] + song_source_list: List[Source] = [] + for song_dict in song_dict_list: + song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id'])) + song_list.append(Song( + title=song_dict['title'], + source_list=song_source_list + )) return song_list - def album_search(self, album: Album) -> List[Album]: + def album_search(self, search_query: str) -> List[Album]: album_list = [] - r = self.connection.get(f"https://musicbrainz.org/search?query={album.title_string}&type=release_group&method=indexed", name="search_" + album.title_string) - if r is None: - return [] + #Album + album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list'] + album_source_list: List[Source] = [] + for album_dict in album_dict_list: + album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id'])) + album_list.append(Album( + title=album_dict['title'], + source_list=album_source_list + )) - return album_list \ No newline at end of file + return album_list + + + def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: + album_list = [] + + #Album + album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list'] + album_source_list: List[Source] = [] + for album_dict in album_dict_list: + album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id'])) + album_list.append(Album( + title=album_dict['title'], + source_list=album_source_list + )) + + def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: + artist_list = [] + + #Artist + artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list'] + artist_source_list: List[Source] = [] + for artist_dict in artist_dict_list: + artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id'])) + artist_list.append(Artist( + name=artist_dict['name'], + source_list=artist_source_list, + )) + + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: + song_list = [] + + #Song + song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list'] + song_source_list: List[Source] = [] + for song_dict in song_dict_list: + song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id'])) + song_list.append(Song( + title=song_dict['title'], + source_list=song_source_list + )) + + \ No newline at end of file diff --git a/music_kraken/utils/config/config_files/logging_config.py b/music_kraken/utils/config/config_files/logging_config.py index c068fe3..36d92aa 100644 --- a/music_kraken/utils/config/config_files/logging_config.py +++ b/music_kraken/utils/config/config_files/logging_config.py @@ -59,6 +59,11 @@ Reference for the logging formats: https://docs.python.org/3/library/logging.htm description="The logger for the musify scraper.", default_value="musify" ), + LoggerAttribute( + name="musicbrainz_logger", + description="The logger for the musicbrainz scraper.", + default_value="musicbrainz" + ), LoggerAttribute( name="youtube_logger", description="The logger for the youtube scraper.",