feat: musicbrainz overall search
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Luna 2024-09-16 13:12:50 +02:00
parent 780daac0ef
commit 265c9f462f
4 changed files with 95 additions and 85 deletions

View File

@ -30,7 +30,7 @@ from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Musicbrainz, Genius, INDEPENDENT_DB_OBJECTS
ALL_PAGES: Set[Type[Page]] = {
@ -38,7 +38,8 @@ ALL_PAGES: Set[Type[Page]] = {
Genius,
Musify,
YoutubeMusic,
Bandcamp
Bandcamp,
Musicbrainz
}
if youtube_settings["use_youtube_alongside_youtube_music"]:

View File

@ -1,5 +1,6 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum
from .musify import Musify
from .musicbrainz import Musicbrainz
from .youtube import YouTube
from .youtube_music import YoutubeMusic
from .bandcamp import Bandcamp

View File

@ -33,110 +33,113 @@ from ..utils.support_classes.download_result import DownloadResult
class MusicbrainzTypes(Enum):
ARTIST = "artist"
RELEASE = "release"
SONG = "track"
@dataclass
class MusicbrainzUrl:
source_type: MusicbrainzTypes
name_without_id: str
name_with_id: str
musicbrainz_id: str
url: str
class Musicbrainz(Page):
SOURCE_TYPE = ALL_SOURCE_TYPES.MUSICBRAINZ
HOST = "https://musicbrainz.org/"
HOST = "https://musicbrainz.org"
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://musicbrainz.org/",
logger=self.LOGGER,
module="musicbrainz",
)
musicbrainzngs.set_useragent("mk", "1")
self.stream_connection: Connection = Connection(
host="https://musicbrainz.org/",
logger=self.LOGGER,
semantic_not_found=False,
)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
if source.url is None:
return None
musicbrainz_url = parse_url(source.url)
# Has no labels, because afaik musicbrainz has no Labels
musicbrainz_type_to_database_type = {
musicbrainzTypes.SONG: Song,
musicbrainzTypes.RELEASE: Album,
musicbrainzTypes.ARTIST: Artist
}
return musicbrainz_type_to_database_type.get(musicbrainz_url.source_type)
def parse_url(url: str) -> MusicbrainzUrl:
parsed = urlparse(url)
path = parsed.path.split("/")
split_name = path[2].split("-")
url_id = split_name[-1]
name_for_url = "-".join(split_name[:-1])
try:
type_enum = MusicbrainzTypes(path[1])
except ValueError as e:
logging_settings["musicbrainz_logger"].warning(f"{path[1]} is not yet implemented, add it to MusicbrainzTypes")
raise e
return MusicbrainzUrl(
source_type=type_enum,
name_without_id=name_for_url,
name_with_id=path[2],
musicbrainz_id=url_id,
url=url
)
def general_search(self, search_query: str) -> List[DatabaseObject]:
search_results = []
r = self.connection.get(f"https://musicbrainz.org/search?query={search_query}&type=artist&method=indexed", name="search_" + search_query)
if r is None:
return []
search_soup: BeautifulSoup = self.get_soup_from_response(r)
#Artist
search_results += self.artist_search(search_query).copy()
def artist_search(self, artist: Artist) -> List[Artist]:
#Album
search_results += self.album_search(search_query).copy()
#Song
search_results += self.song_search(search_query).copy()
return search_results
def artist_search(self, search_query: str) -> List[Artist]:
artist_list = []
r = self.connection.get(f"https://musicbrainz.org/search?query={artist.name}&type=artist&method=indexed", name="search_" + artist.name)
if r is None:
return []
#Artist
artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list']
artist_source_list: List[Source] = []
for artist_dict in artist_dict_list:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id']))
artist_list.append(Artist(
name=artist_dict['name'],
source_list=artist_source_list
))
return artist_list
def song_search(self, song: Song) -> List[Song]:
def song_search(self, search_query: str) -> List[Song]:
song_list = []
r = self.connection.get(f"https://musicbrainz.org/search?query={song.title_string}&type=recording&method=indexed", name="search_" + song.title_string)
if r is None:
return []
#Song
song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list']
song_source_list: List[Source] = []
for song_dict in song_dict_list:
song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))
song_list.append(Song(
title=song_dict['title'],
source_list=song_source_list
))
return song_list
def album_search(self, album: Album) -> List[Album]:
def album_search(self, search_query: str) -> List[Album]:
album_list = []
r = self.connection.get(f"https://musicbrainz.org/search?query={album.title_string}&type=release_group&method=indexed", name="search_" + album.title_string)
if r is None:
return []
#Album
album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list']
album_source_list: List[Source] = []
for album_dict in album_dict_list:
album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id']))
album_list.append(Album(
title=album_dict['title'],
source_list=album_source_list
))
return album_list
return album_list
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
album_list = []
#Album
album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list']
album_source_list: List[Source] = []
for album_dict in album_dict_list:
album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id']))
album_list.append(Album(
title=album_dict['title'],
source_list=album_source_list
))
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist_list = []
#Artist
artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list']
artist_source_list: List[Source] = []
for artist_dict in artist_dict_list:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id']))
artist_list.append(Artist(
name=artist_dict['name'],
source_list=artist_source_list,
))
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song_list = []
#Song
song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list']
song_source_list: List[Source] = []
for song_dict in song_dict_list:
song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))
song_list.append(Song(
title=song_dict['title'],
source_list=song_source_list
))

View File

@ -59,6 +59,11 @@ Reference for the logging formats: https://docs.python.org/3/library/logging.htm
description="The logger for the musify scraper.",
default_value="musify"
),
LoggerAttribute(
name="musicbrainz_logger",
description="The logger for the musicbrainz scraper.",
default_value="musicbrainz"
),
LoggerAttribute(
name="youtube_logger",
description="The logger for the youtube scraper.",