started implementation of fetching from ma artis sources

2023-02-01 00:07:13 +01:00
parent dde8271707
commit e48285c3b0
3 changed files with 58 additions and 6 deletions
--- a/src/music_kraken/database/objects/source.py
+++ b/src/music_kraken/database/objects/source.py
@@ -21,6 +21,14 @@ class SourcePages(Enum):
    GENIUS = "genius"
    MUSICBRAINZ = "musicbrainz"
    ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum"
    BANDCAMP = "bandcamp"
    DEEZER = "deezer"
    SPOTIFY = "spotify"
    # This has nothing to do with audio, but bands can be here
    INSTAGRAM = "instagram"
    FACEBOOK = "facebook"
    TWITTER = "twitter" # I will use nitter though lol
    @classmethod
    def get_homepage(cls, attribute) -> str:
@@ -29,7 +37,13 @@ class SourcePages(Enum):
            cls.MUSIFY: "https://musify.club/",
            cls.MUSICBRAINZ: "https://musicbrainz.org/",
            cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/",
-            cls.GENIUS: "https://genius.com/"
+            cls.GENIUS: "https://genius.com/",
            cls.BANDCAMP: "https://bandcamp.com/",
            cls.DEEZER: "https://www.deezer.com/",
            cls.INSTAGRAM: "https://www.instagram.com/",
            cls.FACEBOOK: "https://www.facebook.com/",
            cls.SPOTIFY: "https://open.spotify.com/",
            cls.TWITTER: "https://twitter.com/"
        }
        return homepage_map[attribute]
@@ -89,16 +103,33 @@ class SourceAttribute:
    it adds the source_list attribute to a class
    """
    _source_dict: Dict[object, List[Source]]
    source_url_map: Dict[str, Source]
    def __new__(cls, **kwargs):
        new = object.__new__(cls)
        new._source_dict = {page_enum: list() for page_enum in SourcePages}
        new.source_url_map = dict()
        return new
    def match_source_with_url(self, url: str) -> bool:
        """
        this function returns true, if a source with this url exists,
        else it returns false
        :param url:
        :return source_with_url_exists:
        """
        return url in self.source_url_map
    def match_source(self, source: Source) -> bool:
        return self.match_source_with_url(source.url)
    def add_source(self, source: Source):
        """
        adds a new Source to the sources
        """
        if self.match_source(source):
            return
        self.source_url_map[source.url] = source
        self._source_dict[source.page_enum].append(source)
    def get_sources_from_page(self, page_enum) -> List[Source]:
--- a/src/music_kraken/pages/encyclopaedia_metallum.py
+++ b/src/music_kraken/pages/encyclopaedia_metallum.py
@@ -200,10 +200,10 @@ class EncyclopaediaMetallum(Page):
        )
    @classmethod
-    def add_dicography(cls, artist: Artist, ma_artist_id: str) -> Artist:
+    def fetch_artist_discography(cls, artist: Artist, ma_artist_id: str) -> Artist:
        """
        TODO
-        I'd guess this funktion has quite some posibility for otimizations
+        I'd guess this funktion has quite some possibility for optimizations
        in form of performance and clean code
        """
        discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
@@ -217,16 +217,18 @@ class EncyclopaediaMetallum(Page):
                album_by_url[source.url] = album
        old_discography = artist.main_albums.copy()
        # save the ids of the albums, that are added to this set, so I can
-        # efficiently add all leftover albums from the discograpy to the new one
+        # efficiently add all leftover albums from the discography to the new one
        used_ids = set()
        new_discography: List[Album] = []
        # make the request
        r = cls.API_SESSION.get(discography_url.format(ma_artist_id))
        if r.status_code != 200:
            LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}")
            return artist
        # parse the html
        soup = BeautifulSoup(r.text, 'html.parser')
        tbody_soup = soup.find('tbody')
@@ -264,11 +266,27 @@ class EncyclopaediaMetallum(Page):
            new_discography.append(album_obj)
        # add the albums back, which weren't on this page
        for old_object in old_discography:
            if old_object.id not in used_ids:
                new_discography.append(old_object)
        artist.main_albums = new_discography
        return artist
    @classmethod
    def fetch_artist_sources(cls, artist: Artist, ma_artist_id: str) -> Artist:
        sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"
        # make the request
        r = cls.API_SESSION.get(sources_url.format(ma_artist_id))
        if r.status_code != 200:
            LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")
            return artist
        print(r.text)
        return artist
    @classmethod
@@ -297,6 +315,9 @@ class EncyclopaediaMetallum(Page):
        # SIMPLE METADATA
        # DISCOGRAPHY
-        artist = cls.add_dicography(artist, artist_id)
+        artist = cls.fetch_artist_discography(artist, artist_id)
        # External Sources
        artist = cls.fetch_artist_sources(artist, artist_id)
        return artist
--- a/src/test.db
+++ b/src/test.db