started implementation of fetching from ma artis sources

This commit is contained in:
Hellow 2023-02-01 00:07:13 +01:00
parent dde8271707
commit e48285c3b0
3 changed files with 58 additions and 6 deletions

View File

@ -21,6 +21,14 @@ class SourcePages(Enum):
GENIUS = "genius" GENIUS = "genius"
MUSICBRAINZ = "musicbrainz" MUSICBRAINZ = "musicbrainz"
ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum" ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum"
BANDCAMP = "bandcamp"
DEEZER = "deezer"
SPOTIFY = "spotify"
# This has nothing to do with audio, but bands can be here
INSTAGRAM = "instagram"
FACEBOOK = "facebook"
TWITTER = "twitter" # I will use nitter though lol
@classmethod @classmethod
def get_homepage(cls, attribute) -> str: def get_homepage(cls, attribute) -> str:
@ -29,7 +37,13 @@ class SourcePages(Enum):
cls.MUSIFY: "https://musify.club/", cls.MUSIFY: "https://musify.club/",
cls.MUSICBRAINZ: "https://musicbrainz.org/", cls.MUSICBRAINZ: "https://musicbrainz.org/",
cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/", cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/",
cls.GENIUS: "https://genius.com/" cls.GENIUS: "https://genius.com/",
cls.BANDCAMP: "https://bandcamp.com/",
cls.DEEZER: "https://www.deezer.com/",
cls.INSTAGRAM: "https://www.instagram.com/",
cls.FACEBOOK: "https://www.facebook.com/",
cls.SPOTIFY: "https://open.spotify.com/",
cls.TWITTER: "https://twitter.com/"
} }
return homepage_map[attribute] return homepage_map[attribute]
@ -89,16 +103,33 @@ class SourceAttribute:
it adds the source_list attribute to a class it adds the source_list attribute to a class
""" """
_source_dict: Dict[object, List[Source]] _source_dict: Dict[object, List[Source]]
source_url_map: Dict[str, Source]
def __new__(cls, **kwargs): def __new__(cls, **kwargs):
new = object.__new__(cls) new = object.__new__(cls)
new._source_dict = {page_enum: list() for page_enum in SourcePages} new._source_dict = {page_enum: list() for page_enum in SourcePages}
new.source_url_map = dict()
return new return new
def match_source_with_url(self, url: str) -> bool:
"""
this function returns true, if a source with this url exists,
else it returns false
:param url:
:return source_with_url_exists:
"""
return url in self.source_url_map
def match_source(self, source: Source) -> bool:
return self.match_source_with_url(source.url)
def add_source(self, source: Source): def add_source(self, source: Source):
""" """
adds a new Source to the sources adds a new Source to the sources
""" """
if self.match_source(source):
return
self.source_url_map[source.url] = source
self._source_dict[source.page_enum].append(source) self._source_dict[source.page_enum].append(source)
def get_sources_from_page(self, page_enum) -> List[Source]: def get_sources_from_page(self, page_enum) -> List[Source]:

View File

@ -200,10 +200,10 @@ class EncyclopaediaMetallum(Page):
) )
@classmethod @classmethod
def add_dicography(cls, artist: Artist, ma_artist_id: str) -> Artist: def fetch_artist_discography(cls, artist: Artist, ma_artist_id: str) -> Artist:
""" """
TODO TODO
I'd guess this funktion has quite some posibility for otimizations I'd guess this funktion has quite some possibility for optimizations
in form of performance and clean code in form of performance and clean code
""" """
discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all" discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
@ -217,16 +217,18 @@ class EncyclopaediaMetallum(Page):
album_by_url[source.url] = album album_by_url[source.url] = album
old_discography = artist.main_albums.copy() old_discography = artist.main_albums.copy()
# save the ids of the albums, that are added to this set, so I can # save the ids of the albums, that are added to this set, so I can
# efficiently add all leftover albums from the discograpy to the new one # efficiently add all leftover albums from the discography to the new one
used_ids = set() used_ids = set()
new_discography: List[Album] = [] new_discography: List[Album] = []
# make the request
r = cls.API_SESSION.get(discography_url.format(ma_artist_id)) r = cls.API_SESSION.get(discography_url.format(ma_artist_id))
if r.status_code != 200: if r.status_code != 200:
LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}") LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}")
return artist return artist
# parse the html
soup = BeautifulSoup(r.text, 'html.parser') soup = BeautifulSoup(r.text, 'html.parser')
tbody_soup = soup.find('tbody') tbody_soup = soup.find('tbody')
@ -264,11 +266,27 @@ class EncyclopaediaMetallum(Page):
new_discography.append(album_obj) new_discography.append(album_obj)
# add the albums back, which weren't on this page
for old_object in old_discography: for old_object in old_discography:
if old_object.id not in used_ids: if old_object.id not in used_ids:
new_discography.append(old_object) new_discography.append(old_object)
artist.main_albums = new_discography artist.main_albums = new_discography
return artist
@classmethod
def fetch_artist_sources(cls, artist: Artist, ma_artist_id: str) -> Artist:
sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"
# make the request
r = cls.API_SESSION.get(sources_url.format(ma_artist_id))
if r.status_code != 200:
LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")
return artist
print(r.text)
return artist return artist
@classmethod @classmethod
@ -297,6 +315,9 @@ class EncyclopaediaMetallum(Page):
# SIMPLE METADATA # SIMPLE METADATA
# DISCOGRAPHY # DISCOGRAPHY
artist = cls.add_dicography(artist, artist_id) artist = cls.fetch_artist_discography(artist, artist_id)
# External Sources
artist = cls.fetch_artist_sources(artist, artist_id)
return artist return artist

Binary file not shown.