added the fetching of the sources from ma regarding artists

This commit is contained in:
Hellow2 2023-02-01 09:10:05 +01:00
parent e48285c3b0
commit f1e92a8464
3 changed files with 53 additions and 2 deletions

View File

@ -12,6 +12,11 @@ from music_kraken.pages import (
test_db = Database("test.db")
# test_db.reset()
def print_source(source_obj):
print("---source---")
for source in source_obj.source_list:
print(source)
def print_song(song_: Song):
print(str(song_.metadata))
print("----album--")
@ -31,6 +36,7 @@ def print_artist(artist: Artist):
print("---discography---")
for album in artist.discography:
print(album)
print_source(artist)
# only_smile = EncyclopaediaMetallum.search_by_query("only smile")

View File

@ -66,6 +66,37 @@ class Source(DatabaseObject, SongAttribute, MetadataAttribute):
self.url = url
@classmethod
def match_url(cls, url: str):
"""
this shouldn't be used, unlesse you are not certain what the source is for
the reason is that it is more inefficient
"""
if url.startswith("https://www.youtube"):
return cls(SourcePages.YOUTUBE, url)
if url.startswith("https://www.deezer"):
return cls(SourcePages.DEEZER, url)
if url.startswith("https://open.spotify.com"):
return cls(SourcePages.SPOTIFY, url)
if "bandcamp" in url:
return cls(SourcePages.BANDCAMP, url)
if url.startswith("https://www.metal-archives.com/"):
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url)
# the less important once
if url.startswith("https://www.facebook"):
return cls(SourcePages.FACEBOOK, url)
if url.startswith("https://www.instagram"):
return cls(SourcePages.INSTAGRAM, url)
if url.startswith("https://twitter"):
return cls(SourcePages.TWITTER, url)
def get_song_metadata(self) -> MetadataAttribute.Metadata:
return MetadataAttribute.Metadata({
Mapping.FILE_WEBPAGE_URL: [self.url],
@ -87,7 +118,7 @@ class Source(DatabaseObject, SongAttribute, MetadataAttribute):
return super().get_metadata()
def __str__(self):
return f"{self.page_enum}: {self.url}"
return self.__repr__()
def __repr__(self) -> str:
return f"Src({self.page_enum.value}: {self.url})"

View File

@ -285,7 +285,21 @@ class EncyclopaediaMetallum(Page):
LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")
return artist
print(r.text)
soup = BeautifulSoup(r.text, 'html.parser')
artist_source = soup.find("div", {"id": "band_links_Official"})
merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"})
label_source = soup.find("div", {"id": "band_links_Labels"})
for tr in artist_source.find_all("td"):
a = tr.find("a")
url = a.get("href")
source = Source.match_url(url)
if source is None:
continue
artist.add_source(source)
return artist