From 5545d1190a0ab08932a8f176fc4a5d8d67c971d2 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 24 Jan 2023 09:40:01 +0100 Subject: [PATCH] started ma --- src/goof.py | 3 +- src/music_kraken/database/objects/song.py | 5 +- src/music_kraken/pages/__init__.py | 8 +++ .../pages/encyclopaedia_metallum.py | 70 ++++++++++++++++++- src/music_kraken/utils/shared.py | 2 + 5 files changed, 84 insertions(+), 4 deletions(-) diff --git a/src/goof.py b/src/goof.py index 33d740e..83e8324 100644 --- a/src/goof.py +++ b/src/goof.py @@ -2,4 +2,5 @@ from music_kraken.pages import ( EncyclopaediaMetallum ) -EncyclopaediaMetallum.search_by_query("Ghost Bath") +print(EncyclopaediaMetallum.search_by_query("Ghost Bath")) +EncyclopaediaMetallum.search_by_query("#a Ghost Bath #r Self Loather") diff --git a/src/music_kraken/database/objects/song.py b/src/music_kraken/database/objects/song.py index 6eb7ce1..6087497 100644 --- a/src/music_kraken/database/objects/song.py +++ b/src/music_kraken/database/objects/song.py @@ -343,10 +343,13 @@ class Artist(DatabaseObject, ID3Metadata): sources: List[Source] = None, main_songs: List[Song] = None, feature_songs: List[Song] = None, - main_albums: List[Album] = None + main_albums: List[Album] = None, + notes: str = None ): DatabaseObject.__init__(self, id_=id_) + self.notes = notes + if main_albums is None: main_albums = [] if feature_songs is None: diff --git a/src/music_kraken/pages/__init__.py b/src/music_kraken/pages/__init__.py index 406ec88..614efb1 100644 --- a/src/music_kraken/pages/__init__.py +++ b/src/music_kraken/pages/__init__.py @@ -1,3 +1,11 @@ from .encyclopaedia_metallum import EncyclopaediaMetallum EncyclopaediaMetallum = EncyclopaediaMetallum + +MetadataPages = { + EncyclopaediaMetallum +} + +AudioPages = { + +} diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index f3aaac7..8983de3 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -1,10 +1,28 @@ from typing import List +import requests +from bs4 import BeautifulSoup + +from ..utils.shared import ( + ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER +) from .abstract import Page -from ..database import MusicObject +from ..database import ( + MusicObject, + Artist, + Source, + SourcePages +) class EncyclopaediaMetallum(Page): + API_SESSION: requests.Session = requests.Session() + API_SESSION.headers = { + "Host": "www.metal-archives.com", + "Connection": "keep-alive" + } + + @classmethod def search_by_query(cls, query: str) -> List[MusicObject]: query_obj = cls.Query(query) @@ -15,4 +33,52 @@ class EncyclopaediaMetallum(Page): @classmethod def simple_search(cls, query: Page.Query): - pass + """ + Searches the default endpoint from metal archives, which intern searches only + for bands, but it is the default, thus I am rolling with it + """ + endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2" + + r = cls.API_SESSION.get(endpoint.format(query=query)) + if r.status_code != 200: + LOGGER.warning(f"code {r.status_code} at {endpoint.format(query=query.query)}") + return [] + + print(r.json()) + return cls.get_many_artists_from_json(r.json()['aaData']) + + @classmethod + def get_artist_from_json(cls, html, genre, country) -> Artist: + """ + TODO parse the country to a standart + """ + # parse the html + # parse the html for the band name and link on metal-archives + soup = BeautifulSoup(html, 'html.parser') + anchor = soup.find('a') + artist_name = anchor.text + artist_url = anchor.get('href') + artist_id = int(artist_url.split("/")[-1]) + + notes = f"{artist_name} is a {genre} band from {country}" + + anchor.decompose() + strong = soup.find('strong') + if strong is not None: + strong.decompose() + akronyms_ = soup.text[2:-2].split(', ') + notes += f"aka {akronyms_}" + notes += "." + + return Artist( + id_=artist_id, + name=artist_name, + sources=[ + Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url) + ], + notes = notes + ) + + @classmethod + def get_many_artists_from_json(cls, raw_artist_list: list) -> List[Artist]: + return [cls.get_artist_from_json(raw_artist) for raw_artist in raw_artist_list] diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index fdd7fa9..7c0b37c 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -41,6 +41,8 @@ LYRICS_LOGGER = logging.getLogger("lyrics") GENIUS_LOGGER = logging.getLogger("genius") TAGGING_LOGGER = logging.getLogger("tagging") +ENCYCLOPAEDIA_METALLUM_LOGGER = logging.getLogger("ma") + NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea" MUSIC_DIR = os.path.join(os.path.expanduser("~"), "Music")