music-kraken-core/src/music_kraken/pages/encyclopaedia_metallum.py

from typing import List
import requests
from bs4 import BeautifulSoup

from ..utils.shared import (
    ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER
)

from .abstract import Page
from ..database import (
    MusicObject,
    Artist,
    Source,
    SourcePages,
    Song,
    Album,
    ID3Timestamp
)
from ..utils import (
    string_processing
)


class EncyclopaediaMetallum(Page):
    API_SESSION: requests.Session = requests.Session()
    API_SESSION.headers = {
        "Host": "www.metal-archives.com",
        "Connection": "keep-alive"
    }

    SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM

    @classmethod
    def search_by_query(cls, query: str) -> List[MusicObject]:
        query_obj = cls.Query(query)

        if query_obj.is_raw:
            return cls.simple_search(query_obj)
        return cls.advanced_search(query_obj)

    @classmethod
    def advanced_search(cls, query: Page.Query) -> List[MusicObject]:
        if query.song is not None:
            return cls.search_for_song(query=query)
        if query.album is not None:
            return cls.search_for_album(query=query)
        if query.artist is not None:
            return cls.search_for_artist(query=query)
        return []

    @classmethod
    def search_for_song(cls, query: Page.Query) -> List[Song]:
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \
                   "artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \
                   "&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
                   "=1674550595663"

        r = cls.API_SESSION.get(endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str))
        if r.status_code != 200:
            LOGGER.warning(
                f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")
            return []

        return [cls.get_song_from_json(
            artist_html=raw_song[0],
            album_html=raw_song[1],
            release_type=raw_song[2],
            title=raw_song[3],
            lyrics_html=raw_song[4]
        ) for raw_song in r.json()['aaData']]

    @classmethod
    def search_for_album(cls, query: Page.Query) -> List[Album]:
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
                   "artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
                   "=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
                   "=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
                   "=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"

        r = cls.API_SESSION.get(endpoint.format(artist=query.artist_str, album=query.album_str))
        if r.status_code != 200:
            LOGGER.warning(
                f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")
            return []

        return [cls.get_album_from_json(
            artist_html=raw_album[0],
            album_html=raw_album[1],
            release_type=[2]
        ) for raw_album in r.json()['aaData']]

    @classmethod
    def search_for_artist(cls, query: Page.Query) -> List[Artist]:
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674565459976"

        r = cls.API_SESSION.get(endpoint.format(artist=query.artist))
        if r.status_code != 200:
            LOGGER.warning(f"code {r.status_code} at {endpoint.format(artist=query.artist)}")
            return []

        return [
            cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
            for raw_artist in r.json()['aaData']
        ]

    @classmethod
    def simple_search(cls, query: Page.Query) -> List[Artist]:
        """
        Searches the default endpoint from metal archives, which intern searches only
        for bands, but it is the default, thus I am rolling with it
        """
        endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"

        r = cls.API_SESSION.get(endpoint.format(query=query))
        if r.status_code != 200:
            LOGGER.warning(f"code {r.status_code} at {endpoint.format(query=query.query)}")
            return []

        return [
            cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
            for raw_artist in r.json()['aaData']
        ]

    @classmethod
    def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:
        """
        TODO parse the country to a standart
        """
        # parse the html
        # parse the html for the band name and link on metal-archives
        soup = BeautifulSoup(artist_html, 'html.parser')
        anchor = soup.find('a')
        artist_name = anchor.text
        artist_url = anchor.get('href')
        artist_id = artist_url.split("/")[-1]

        notes = f"{artist_name} is a {genre} band from {country}"

        anchor.decompose()
        strong = soup.find('strong')
        if strong is not None:
            strong.decompose()
            akronyms_ = soup.text[2:-2].split(', ')
            notes += f"aka {akronyms_}"
        notes += "."

        return Artist(
            id_=artist_id,
            name=artist_name,
            source_list=[
                Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
            ],
            notes=notes
        )

    @classmethod
    def get_album_from_json(cls, album_html=None, release_type=None, artist_html=None) -> Album:
        # parse the html
        # <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
        soup = BeautifulSoup(album_html, 'html.parser')
        anchor = soup.find('a')
        album_name = anchor.text
        album_url = anchor.get('href')
        album_id = album_url.split("/")[-1]

        """
        TODO implement release type
        """
        return Album(
            id_=album_id,
            title=album_name,
            source_list=[
                Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
            ],
            artists=[
                cls.get_artist_from_json(artist_html=artist_html)
            ]
        )

    @classmethod
    def get_song_from_json(cls, artist_html=None, album_html=None, release_type=None, title=None,
                           lyrics_html=None) -> Song:
        song_id = None
        if lyrics_html is not None:
            soup = BeautifulSoup(lyrics_html, 'html.parser')
            anchor = soup.find('a')
            raw_song_id = anchor.get('id')
            song_id = raw_song_id.replace("lyricsLink_", "")

        return Song(
            id_=song_id,
            title=title,
            main_artist_list=[
                cls.get_artist_from_json(artist_html=artist_html)
            ],
            album=cls.get_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html),
            source_list=[
                Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
            ]
        )

    @classmethod
    def fetch_artist_discography(cls, artist: Artist, ma_artist_id: str) -> Artist:
        """
        TODO
        I'd guess this funktion has quite some possibility for optimizations
        in form of performance and clean code
        """
        discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
        
        # prepare tracklist
        album_by_url = dict()
        album_by_name = dict()
        for album in artist.main_albums:
            album_by_name[string_processing.unify(album.title)] = album
            for source in album.get_sources_from_page(cls.SOURCE_TYPE):
                album_by_url[source.url] = album
        old_discography = artist.main_albums.copy()
        # save the ids of the albums, that are added to this set, so I can
        # efficiently add all leftover albums from the discography to the new one
        used_ids = set()

        new_discography: List[Album] = []

        # make the request
        r = cls.API_SESSION.get(discography_url.format(ma_artist_id))
        if r.status_code != 200:
            LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}")
            return artist

        # parse the html
        soup = BeautifulSoup(r.text, 'html.parser')

        tbody_soup = soup.find('tbody')
        for tr_soup in tbody_soup.find_all('tr'):
            td_list = tr_soup.findChildren(recursive=False)

            album_soup = td_list[0]
            album_name = album_soup.text
            album_url = album_soup.find('a').get('href')
            album_id = album_url.split('/')[-1]
            album_type = td_list[1].text
            album_year = td_list[2].text
            
            unified_name = string_processing.unify(album_name)

            album_obj: Album = Album(id_=album_id)

            if album_url in album_by_url:
                album_obj = album_by_url[album_url]
                used_ids.add(album_obj.id)

            elif unified_name in album_by_name:
                album_obj = album_by_name[unified_name]
                album_obj.add_source(Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url))
                used_ids.add(album_obj.id)
            else:
                album_obj.add_source(Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url))

            album_obj.title = album_name
            album_obj.album_type = album_type
            try:
                album_obj.date = ID3Timestamp(year=int(album_year))
            except ValueError():
                pass
            
            new_discography.append(album_obj)

        # add the albums back, which weren't on this page
        for old_object in old_discography:
            if old_object.id not in used_ids:
                new_discography.append(old_object)

        artist.main_albums = new_discography

        return artist

    @classmethod
    def fetch_artist_sources(cls, artist: Artist, ma_artist_id: str) -> Artist:
        sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"

        # make the request
        r = cls.API_SESSION.get(sources_url.format(ma_artist_id))
        if r.status_code != 200:
            LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")
            return artist

        soup = BeautifulSoup(r.text, 'html.parser')

        artist_source = soup.find("div", {"id": "band_links_Official"})
        """
        TODO
        add a Label object to add the label sources from
        TODO
        maybe do merchandice stuff
        """
        merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"})
        label_source = soup.find("div", {"id": "band_links_Labels"})

        for tr in artist_source.find_all("td"):
            a = tr.find("a")
            url = a.get("href")

            source = Source.match_url(url)
            if source is None:
                continue

            artist.add_source(source)

        return artist

    @classmethod
    def fetch_artist_attributes(cls, artist: Artist, url: str) -> Artist:
        r = cls.API_SESSION.get(url)
        if r.status_code != 200:
            LOGGER.warning(f"code {r.status_code} at {url}")
            return artist
        
        return artist

    @classmethod
    def fetch_artist_details(cls, artist: Artist) -> Artist:
        source_list = artist.get_sources_from_page(cls.SOURCE_TYPE)
        if len(source_list) == 0:
            return artist

        # taking the fist source, cuz I only need one and multiple sources don't make that much sense
        source = source_list[0]
        artist_id = source.url.split("/")[-1]
        print(source)
        print("id", artist_id)

        """
        [] https://www.metal-archives.com/bands/Ghost_Bath/3540372489
        [x] https://www.metal-archives.com/band/discography/id/3540372489/tab/all
        ---review---
        [] https://www.metal-archives.com/review/ajax-list-band/id/3540372489/json/1?sEcho=1&iColumns=4&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&iSortCol_0=3&sSortDir_0=desc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&_=1675155257133
        ---simmilar-bands---
        [] https://www.metal-archives.com/band/ajax-recommendations/id/3540372489
        ---external-sources---
        [x] https://www.metal-archives.com/link/ajax-list/type/band/id/3540372489
        """

        # SIMPLE METADATA
        artist = cls.fetch_artist_attributes(artist, source.url)

        # DISCOGRAPHY
        artist = cls.fetch_artist_discography(artist, artist_id)

        # EXTERNAL SOURCES
        artist = cls.fetch_artist_sources(artist, artist_id)

        return artist
asddasd 2023-01-23 23:16:10 +00:00			`from typing import List`
started ma 2023-01-24 08:40:01 +00:00			`import requests`
			`from bs4 import BeautifulSoup`

			`from ..utils.shared import (`
			`ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER`
			`)`
asddasd 2023-01-23 23:16:10 +00:00
			`from .abstract import Page`
started ma 2023-01-24 08:40:01 +00:00			`from ..database import (`
			`MusicObject,`
			`Artist,`
			`Source,`
continued ma 2023-01-24 09:51:41 +00:00			`SourcePages,`
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`Song,`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`Album,`
			`ID3Timestamp`
			`)`
			`from ..utils import (`
			`string_processing`
started ma 2023-01-24 08:40:01 +00:00			`)`
asddasd 2023-01-23 23:16:10 +00:00

			`class EncyclopaediaMetallum(Page):`
started ma 2023-01-24 08:40:01 +00:00			`API_SESSION: requests.Session = requests.Session()`
			`API_SESSION.headers = {`
			`"Host": "www.metal-archives.com",`
			`"Connection": "keep-alive"`
			`}`

continued 2023-01-24 17:15:07 +00:00			`SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM`

asddasd 2023-01-23 23:16:10 +00:00			`@classmethod`
			`def search_by_query(cls, query: str) -> List[MusicObject]:`
			`query_obj = cls.Query(query)`

			`if query_obj.is_raw:`
			`return cls.simple_search(query_obj)`
continued ma 2023-01-24 09:51:41 +00:00			`return cls.advanced_search(query_obj)`
asddasd 2023-01-23 23:16:10 +00:00
			`@classmethod`
continued ma 2023-01-24 09:51:41 +00:00			`def advanced_search(cls, query: Page.Query) -> List[MusicObject]:`
			`if query.song is not None:`
			`return cls.search_for_song(query=query)`
finished search of metal encyclopedia 2023-01-24 13:29:23 +00:00			`if query.album is not None:`
			`return cls.search_for_album(query=query)`
			`if query.artist is not None:`
			`return cls.search_for_artist(query=query)`
continued ma 2023-01-24 09:51:41 +00:00			`return []`

			`@classmethod`
			`def search_for_song(cls, query: Page.Query) -> List[Song]:`
yes 2023-01-30 22:54:21 +00:00			`endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \`
			`"artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \`
			`"&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \`
			`"=1674550595663"`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00
continued ma 2023-01-24 09:51:41 +00:00			`r = cls.API_SESSION.get(endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str))`
			`if r.status_code != 200:`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00			`LOGGER.warning(`
			`f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")`
continued ma 2023-01-24 09:51:41 +00:00			`return []`

metal enzyclopedie 2023-01-24 11:09:47 +00:00			`return [cls.get_song_from_json(`
			`artist_html=raw_song[0],`
			`album_html=raw_song[1],`
			`release_type=raw_song[2],`
continued 2023-01-24 17:15:07 +00:00			`title=raw_song[3],`
			`lyrics_html=raw_song[4]`
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`) for raw_song in r.json()['aaData']]`
continued ma 2023-01-24 09:51:41 +00:00
finished search of metal encyclopedia 2023-01-24 13:29:23 +00:00			`@classmethod`
			`def search_for_album(cls, query: Page.Query) -> List[Album]:`
yes 2023-01-30 22:54:21 +00:00			`endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \`
			`"artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \`
			`"=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \`
			`"=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \`
			`"=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00
finished search of metal encyclopedia 2023-01-24 13:29:23 +00:00			`r = cls.API_SESSION.get(endpoint.format(artist=query.artist_str, album=query.album_str))`
			`if r.status_code != 200:`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00			`LOGGER.warning(`
			`f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")`
finished search of metal encyclopedia 2023-01-24 13:29:23 +00:00			`return []`

			`return [cls.get_album_from_json(`
			`artist_html=raw_album[0],`
			`album_html=raw_album[1],`
			`release_type=[2]`
			`) for raw_album in r.json()['aaData']]`

			`@classmethod`
			`def search_for_artist(cls, query: Page.Query) -> List[Artist]:`
			`endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674565459976"`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00
finished search of metal encyclopedia 2023-01-24 13:29:23 +00:00			`r = cls.API_SESSION.get(endpoint.format(artist=query.artist))`
			`if r.status_code != 200:`
			`LOGGER.warning(f"code {r.status_code} at {endpoint.format(artist=query.artist)}")`
			`return []`

			`return [`
yes 2023-01-30 22:54:21 +00:00			`cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])`
finished search of metal encyclopedia 2023-01-24 13:29:23 +00:00			`for raw_artist in r.json()['aaData']`
			`]`

continued ma 2023-01-24 09:51:41 +00:00			`@classmethod`
			`def simple_search(cls, query: Page.Query) -> List[Artist]:`
started ma 2023-01-24 08:40:01 +00:00			`"""`
			`Searches the default endpoint from metal archives, which intern searches only`
			`for bands, but it is the default, thus I am rolling with it`
			`"""`
			`endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"`

			`r = cls.API_SESSION.get(endpoint.format(query=query))`
			`if r.status_code != 200:`
			`LOGGER.warning(f"code {r.status_code} at {endpoint.format(query=query.query)}")`
			`return []`

continued ma 2023-01-24 09:51:41 +00:00			`return [`
yes 2023-01-30 22:54:21 +00:00			`cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])`
continued ma 2023-01-24 09:51:41 +00:00			`for raw_artist in r.json()['aaData']`
			`]`
started ma 2023-01-24 08:40:01 +00:00
			`@classmethod`
yes 2023-01-30 22:54:21 +00:00			`def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:`
started ma 2023-01-24 08:40:01 +00:00			`"""`
			`TODO parse the country to a standart`
			`"""`
			`# parse the html`
			`# parse the html for the band name and link on metal-archives`
yes 2023-01-30 22:54:21 +00:00			`soup = BeautifulSoup(artist_html, 'html.parser')`
started ma 2023-01-24 08:40:01 +00:00			`anchor = soup.find('a')`
			`artist_name = anchor.text`
			`artist_url = anchor.get('href')`
yes 2023-01-30 22:54:21 +00:00			`artist_id = artist_url.split("/")[-1]`
started ma 2023-01-24 08:40:01 +00:00
			`notes = f"{artist_name} is a {genre} band from {country}"`

			`anchor.decompose()`
			`strong = soup.find('strong')`
			`if strong is not None:`
			`strong.decompose()`
			`akronyms_ = soup.text[2:-2].split(', ')`
			`notes += f"aka {akronyms_}"`
			`notes += "."`

			`return Artist(`
			`id_=artist_id,`
			`name=artist_name,`
yes 2023-01-30 22:54:21 +00:00			`source_list=[`
started ma 2023-01-24 08:40:01 +00:00			`Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)`
			`],`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00			`notes=notes`
started ma 2023-01-24 08:40:01 +00:00			`)`
metal enzyclopedie 2023-01-24 11:09:47 +00:00
			`@classmethod`
finished search of metal encyclopedia 2023-01-24 13:29:23 +00:00			`def get_album_from_json(cls, album_html=None, release_type=None, artist_html=None) -> Album:`
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`# parse the html`
			`# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'`
			`soup = BeautifulSoup(album_html, 'html.parser')`
			`anchor = soup.find('a')`
			`album_name = anchor.text`
			`album_url = anchor.get('href')`
yes 2023-01-30 22:54:21 +00:00			`album_id = album_url.split("/")[-1]`
metal enzyclopedie 2023-01-24 11:09:47 +00:00
			`"""`
			`TODO implement release type`
			`"""`
			`return Album(`
			`id_=album_id,`
			`title=album_name,`
yes 2023-01-30 22:54:21 +00:00			`source_list=[`
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)`
yes 2023-01-30 22:54:21 +00:00			`],`
			`artists=[`
			`cls.get_artist_from_json(artist_html=artist_html)`
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`]`
			`)`

			`@classmethod`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00			`def get_song_from_json(cls, artist_html=None, album_html=None, release_type=None, title=None,`
			`lyrics_html=None) -> Song:`
continued 2023-01-24 17:15:07 +00:00			`song_id = None`
			`if lyrics_html is not None:`
			`soup = BeautifulSoup(lyrics_html, 'html.parser')`
			`anchor = soup.find('a')`
			`raw_song_id = anchor.get('id')`
			`song_id = raw_song_id.replace("lyricsLink_", "")`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`return Song(`
continued 2023-01-24 17:15:07 +00:00			`id_=song_id,`
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`title=title,`
			`main_artist_list=[`
yes 2023-01-30 22:54:21 +00:00			`cls.get_artist_from_json(artist_html=artist_html)`
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`],`
yes 2023-01-30 22:54:21 +00:00			`album=cls.get_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html),`
			`source_list=[`
			`Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)`
			`]`
metal enzyclopedie 2023-01-24 11:09:47 +00:00			`)`
continued 2023-01-24 17:15:07 +00:00
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`@classmethod`
started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00			`def fetch_artist_discography(cls, artist: Artist, ma_artist_id: str) -> Artist:`
finished fetching of discography and merging those 2023-01-31 12:27:28 +00:00			`"""`
			`TODO`
started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00			`I'd guess this funktion has quite some possibility for optimizations`
finished fetching of discography and merging those 2023-01-31 12:27:28 +00:00			`in form of performance and clean code`
			`"""`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"`

			`# prepare tracklist`
			`album_by_url = dict()`
			`album_by_name = dict()`
			`for album in artist.main_albums:`
			`album_by_name[string_processing.unify(album.title)] = album`
			`for source in album.get_sources_from_page(cls.SOURCE_TYPE):`
finished fetching of discography and merging those 2023-01-31 12:27:28 +00:00			`album_by_url[source.url] = album`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`old_discography = artist.main_albums.copy()`
			`# save the ids of the albums, that are added to this set, so I can`
started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00			`# efficiently add all leftover albums from the discography to the new one`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`used_ids = set()`

			`new_discography: List[Album] = []`
started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00
			`# make the request`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`r = cls.API_SESSION.get(discography_url.format(ma_artist_id))`
			`if r.status_code != 200:`
			`LOGGER.warning(f"code {r.status_code} at {discography_url.format(ma_artist_id)}")`
			`return artist`

started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00			`# parse the html`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`soup = BeautifulSoup(r.text, 'html.parser')`

			`tbody_soup = soup.find('tbody')`
			`for tr_soup in tbody_soup.find_all('tr'):`
			`td_list = tr_soup.findChildren(recursive=False)`

			`album_soup = td_list[0]`
			`album_name = album_soup.text`
			`album_url = album_soup.find('a').get('href')`
			`album_id = album_url.split('/')[-1]`
			`album_type = td_list[1].text`
			`album_year = td_list[2].text`

			`unified_name = string_processing.unify(album_name)`

			`album_obj: Album = Album(id_=album_id)`
finished fetching of discography and merging those 2023-01-31 12:27:28 +00:00
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`if album_url in album_by_url:`
			`album_obj = album_by_url[album_url]`
			`used_ids.add(album_obj.id)`
finished fetching of discography and merging those 2023-01-31 12:27:28 +00:00
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`elif unified_name in album_by_name:`
			`album_obj = album_by_name[unified_name]`
			`album_obj.add_source(Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url))`
			`used_ids.add(album_obj.id)`
			`else:`
			`album_obj.add_source(Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url))`

			`album_obj.title = album_name`
			`album_obj.album_type = album_type`
			`try:`
			`album_obj.date = ID3Timestamp(year=int(album_year))`
			`except ValueError():`
			`pass`

			`new_discography.append(album_obj)`

started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00			`# add the albums back, which weren't on this page`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`for old_object in old_discography:`
			`if old_object.id not in used_ids:`
			`new_discography.append(old_object)`

			`artist.main_albums = new_discography`
started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00
			`return artist`

			`@classmethod`
			`def fetch_artist_sources(cls, artist: Artist, ma_artist_id: str) -> Artist:`
			`sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"`

			`# make the request`
			`r = cls.API_SESSION.get(sources_url.format(ma_artist_id))`
			`if r.status_code != 200:`
			`LOGGER.warning(f"code {r.status_code} at {sources_url.format(ma_artist_id)}")`
			`return artist`

added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`soup = BeautifulSoup(r.text, 'html.parser')`

			`artist_source = soup.find("div", {"id": "band_links_Official"})`
added the fetching of the sources from ma regarding artists 2023-02-01 08:13:30 +00:00			`"""`
			`TODO`
			`add a Label object to add the label sources from`
			`TODO`
			`maybe do merchandice stuff`
			`"""`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"})`
			`label_source = soup.find("div", {"id": "band_links_Labels"})`

			`for tr in artist_source.find_all("td"):`
			`a = tr.find("a")`
			`url = a.get("href")`

			`source = Source.match_url(url)`
			`if source is None:`
			`continue`

			`artist.add_source(source)`
started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`return artist`

started implementing simple attributes 2023-02-01 08:18:06 +00:00			`@classmethod`
			`def fetch_artist_attributes(cls, artist: Artist, url: str) -> Artist:`
ma easy attributes 2023-02-01 08:41:25 +00:00			`r = cls.API_SESSION.get(url)`
			`if r.status_code != 200:`
			`LOGGER.warning(f"code {r.status_code} at {url}")`
			`return artist`

started implementing simple attributes 2023-02-01 08:18:06 +00:00			`return artist`

continued 2023-01-24 17:15:07 +00:00			`@classmethod`
			`def fetch_artist_details(cls, artist: Artist) -> Artist:`
yes 2023-01-30 22:54:21 +00:00			`source_list = artist.get_sources_from_page(cls.SOURCE_TYPE)`
			`if len(source_list) == 0:`
continued 2023-01-24 17:15:07 +00:00			`return artist`
finished database integration and simmilar 2023-01-30 17:27:49 +00:00
yes 2023-01-30 22:54:21 +00:00			`# taking the fist source, cuz I only need one and multiple sources don't make that much sense`
			`source = source_list[0]`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`artist_id = source.url.split("/")[-1]`
yes 2023-01-30 22:54:21 +00:00			`print(source)`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`print("id", artist_id)`

			`"""`
started implementing simple attributes 2023-02-01 08:18:06 +00:00			`[] https://www.metal-archives.com/bands/Ghost_Bath/3540372489`
			`[x] https://www.metal-archives.com/band/discography/id/3540372489/tab/all`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`---review---`
started implementing simple attributes 2023-02-01 08:18:06 +00:00			`[] https://www.metal-archives.com/review/ajax-list-band/id/3540372489/json/1?sEcho=1&iColumns=4&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&iSortCol_0=3&sSortDir_0=desc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&_=1675155257133`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`---simmilar-bands---`
started implementing simple attributes 2023-02-01 08:18:06 +00:00			`[] https://www.metal-archives.com/band/ajax-recommendations/id/3540372489`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`---external-sources---`
started implementing simple attributes 2023-02-01 08:18:06 +00:00			`[x] https://www.metal-archives.com/link/ajax-list/type/band/id/3540372489`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`"""`

			`# SIMPLE METADATA`
started implementing simple attributes 2023-02-01 08:18:06 +00:00			`artist = cls.fetch_artist_attributes(artist, source.url)`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00
			`# DISCOGRAPHY`
started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00			`artist = cls.fetch_artist_discography(artist, artist_id)`

ma easy attributes 2023-02-01 08:41:25 +00:00			`# EXTERNAL SOURCES`
started implementation of fetching from ma artis sources 2023-01-31 23:07:13 +00:00			`artist = cls.fetch_artist_sources(artist, artist_id)`
yes 2023-01-30 22:54:21 +00:00
finished database integration and simmilar 2023-01-30 17:27:49 +00:00			`return artist`