started implementation of fetching from ma artis sources

This commit is contained in:
Hellow 2023-02-01 00:07:13 +01:00
parent dde8271707
commit e48285c3b0
3 changed files with 58 additions and 6 deletions

View File

@ -21,6 +21,14 @@ class SourcePages(Enum):
GENIUS = "genius"
MUSICBRAINZ = "musicbrainz"
ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum"
BANDCAMP = "bandcamp"
DEEZER = "deezer"
SPOTIFY = "spotify"
# This has nothing to do with audio, but bands can be here
INSTAGRAM = "instagram"
FACEBOOK = "facebook"
TWITTER = "twitter" # I will use nitter though lol
@classmethod
def get_homepage(cls, attribute) -> str:
@ -29,7 +37,13 @@ class SourcePages(Enum):
cls.MUSIFY: "https://musify.club/",
cls.MUSICBRAINZ: "https://musicbrainz.org/",
cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/",
cls.GENIUS: "https://genius.com/"
cls.GENIUS: "https://genius.com/",
cls.BANDCAMP: "https://bandcamp.com/",
cls.DEEZER: "https://www.deezer.com/",
cls.INSTAGRAM: "https://www.instagram.com/",
cls.FACEBOOK: "https://www.facebook.com/",
cls.SPOTIFY: "https://open.spotify.com/",
cls.TWITTER: "https://twitter.com/"
}
return homepage_map[attribute]
@ -89,16 +103,33 @@ class SourceAttribute:
it adds the source_list attribute to a class
"""
_source_dict: Dict[object, List[Source]]
source_url_map: Dict[str, Source]
def __new__(cls, **kwargs):
new = object.__new__(cls)
new._source_dict = {page_enum: list() for page_enum in SourcePages}
new.source_url_map = dict()
return new
def match_source_with_url(self, url: str) -> bool:
"""
this function returns true, if a source with this url exists,
else it returns false
:param url:
:return source_with_url_exists:
"""
return url in self.source_url_map
def match_source(self, source: Source) -> bool:
return self.match_source_with_url(source.url)
def add_source(self, source: Source):
"""
adds a new Source to the sources
"""
if self.match_source(source):
return
self.source_url_map[source.url] = source
self._source_dict[source.page_enum].append(source)
def get_sources_from_page(self, page_enum) -> List[Source]:

View File

@ -200,10 +200,10 @@ class EncyclopaediaMetallum(Page):
)
@classmethod
def add_dicography(cls, artist: Artist, ma_artist_id: str) -> Artist:
def fetch_artist_discography(cls, artist: Artist, ma_artist_id: str) -> Artist:
"""
TODO
I'd guess this funktion has quite some posibility for otimizations
I'd guess this funktion has quite some possibility for optimizations
in form of performance and clean code
"""
discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
@ -217,16 +217,18 @@ class EncyclopaediaMetallum(Page):
album_by_url[source.url] = album
old_discography = artist.main_albums.copy()
# save the ids of the albums, that are added to this set, so I can
# efficiently add all leftover albums from the discograpy to the new one
# efficiently add all leftover albums from the discography to the new one
used_ids = set()
new_discography: List[Album] = []
# make the request
r = cls.API_SESSION.get(discography_url.format(ma_artist_id))
if r.status_code != 200:
LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}")
return artist
# parse the html
soup = BeautifulSoup(r.text, 'html.parser')
tbody_soup = soup.find('tbody')
@ -264,11 +266,27 @@ class EncyclopaediaMetallum(Page):
new_discography.append(album_obj)
# add the albums back, which weren't on this page
for old_object in old_discography:
if old_object.id not in used_ids:
new_discography.append(old_object)
artist.main_albums = new_discography
return artist
@classmethod
def fetch_artist_sources(cls, artist: Artist, ma_artist_id: str) -> Artist:
sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"
# make the request
r = cls.API_SESSION.get(sources_url.format(ma_artist_id))
if r.status_code != 200:
LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")
return artist
print(r.text)
return artist
@classmethod
@ -297,6 +315,9 @@ class EncyclopaediaMetallum(Page):
# SIMPLE METADATA
# DISCOGRAPHY
artist = cls.add_dicography(artist, artist_id)
artist = cls.fetch_artist_discography(artist, artist_id)
# External Sources
artist = cls.fetch_artist_sources(artist, artist_id)
return artist

Binary file not shown.