From 70c7d831c9f1bd1ef68e7b0eb76ce64973a2668b Mon Sep 17 00:00:00 2001
From: Hellow <74311245+HeIIow2@users.noreply.github.com>
Date: Tue, 23 May 2023 16:50:54 +0200
Subject: [PATCH] rewritten specific search for me
---
src/music_kraken/pages/abstract.py | 11 +-
.../pages/encyclopaedia_metallum.py | 215 +++++++++---------
2 files changed, 115 insertions(+), 111 deletions(-)
diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py
index 4e5495f..8398caa 100644
--- a/src/music_kraken/pages/abstract.py
+++ b/src/music_kraken/pages/abstract.py
@@ -44,13 +44,6 @@ class Page(threading.Thread):
# to search stuff
def search(self, query: Query) -> List[DatabaseObject]:
- results = []
-
- if query.is_raw:
- for search_query in query.default_search:
- results.extend(self.general_search(search_query))
- return results
-
music_object = query.music_object
search_functions = {
@@ -67,9 +60,9 @@ class Page(threading.Thread):
r = []
for default_query in query.default_search:
- results.extend(self.general_search(default_query))
+ r.extend(self.general_search(default_query))
- return results
+ return r
def general_search(self, search_query: str) -> List[DatabaseObject]:
return []
diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py
index 3675102..a7e79ed 100644
--- a/src/music_kraken/pages/encyclopaedia_metallum.py
+++ b/src/music_kraken/pages/encyclopaedia_metallum.py
@@ -34,14 +34,80 @@ ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
})
-class EncyclopaediaMetallum(Page):
- CONNECTION: Connection = Connection(
- host="https://www.metal-archives.com/",
- logger=ENCYCLOPAEDIA_METALLUM_LOGGER
+def _song_from_json(artist_html=None, album_html=None, release_type=None, title=None,
+ lyrics_html=None) -> Song:
+ song_id = None
+ if lyrics_html is not None:
+ soup = BeautifulSoup(lyrics_html, 'html.parser')
+ anchor = soup.find('a')
+ raw_song_id = anchor.get('id')
+ song_id = raw_song_id.replace("lyricsLink_", "")
+
+ return Song(
+ title=title,
+ main_artist_list=[
+ _artist_from_json(artist_html=artist_html)
+ ],
+ album_list=[
+ _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
+ ],
+ source_list=[
+ Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
+ ]
)
+
+def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
+ """
+ TODO parse the country to a standart
+ """
+ # parse the html
+ # parse the html for the band name and link on metal-archives
+ soup = BeautifulSoup(artist_html, 'html.parser')
+ anchor = soup.find('a')
+ artist_name = anchor.text
+ artist_url = anchor.get('href')
+ artist_id = artist_url.split("/")[-1]
+
+ anchor.decompose()
+ strong = soup.find('strong')
+ if strong is not None:
+ strong.decompose()
+ akronyms_ = soup.text[2:-2].split(', ')
+
+ return Artist(
+ name=artist_name,
+ source_list=[
+ Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
+ ]
+ )
+
+
+def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Album:
+ # parse the html
+ # Self Loather'
+ soup = BeautifulSoup(album_html, 'html.parser')
+ anchor = soup.find('a')
+ album_name = anchor.text
+ album_url = anchor.get('href')
+ album_id = album_url.split("/")[-1]
+
+ album_type = ALBUM_TYPE_MAP[release_type.strip()]
+
+ return Album(
+ title=album_name,
+ album_type=album_type,
+ source_list=[
+ Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
+ ],
+ artist_list=[
+ _artist_from_json(artist_html=artist_html)
+ ]
+ )
+
+
+class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
-
LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
def __init__(self):
@@ -66,46 +132,62 @@ class EncyclopaediaMetallum(Page):
Is not good.
"""
- r = self.connection.get(
- endpoint.format(song=song.title, artist=query.artist_str, album=query.album_str)
- )
- if r is None:
- return []
+ song_title = song.title
+ album_titles = ["*"] if song.album_collection.empty else [album.title for album in song.album_collection]
+ artist_titles = ["*"] if song.main_artist_collection.empty else [artist.name for artist in song.main_artist_collection]
- return [cls.get_song_from_json(
- artist_html=raw_song[0],
- album_html=raw_song[1],
- release_type=raw_song[2],
- title=raw_song[3],
- lyrics_html=raw_song[4]
- ) for raw_song in r.json()['aaData']]
+ search_results = []
- @classmethod
- def search_for_album(cls, query: Query) -> List[Album]:
+ for artist in artist_titles:
+ for album in album_titles:
+ r = self.connection.get(
+ endpoint.format(song=song_title, artist=artist, album=album)
+ )
+
+ if r is None:
+ return []
+
+ search_results.extend(_song_from_json(
+ artist_html=raw_song[0],
+ album_html=raw_song[1],
+ release_type=raw_song[2],
+ title=raw_song[3],
+ lyrics_html=raw_song[4]
+ ) for raw_song in r.json()['aaData'])
+
+ return search_results
+
+ def album_search(self, album: Album) -> List[Album]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
"artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
"=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
"=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
"=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
- r = cls.CONNECTION.get(endpoint.format(artist=query.artist_str, album=query.album_str))
- if r is None:
- return []
- return [cls.get_album_from_json(
- artist_html=raw_album[0],
- album_html=raw_album[1],
- release_type=raw_album[2]
- ) for raw_album in r.json()['aaData']]
+ album_title = album.title
+ artist_titles = ["*"] if album.artist_collection.empty else [artist.name for artist in album.artist_collection]
- @classmethod
- def search_for_artist(cls, query: Query) -> List[Artist]:
+ search_results = []
+
+ for artist in artist_titles:
+ r = self.connection.get(endpoint.format(artist=artist, album=album_title))
+ if r is None:
+ return []
+
+ search_results.extend(_album_from_json(
+ artist_html=raw_album[0],
+ album_html=raw_album[1],
+ release_type=raw_album[2]
+ ) for raw_album in r.json()['aaData'])
+
+ def artist_search(self, artist: Artist) -> List[Artist]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={" \
"artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location" \
"=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \
"&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
- r = cls.CONNECTION.get(endpoint.format(artist=query.artist))
+ r = self.connection.get(endpoint.format(artist=artist.name))
if r is None:
return []
@@ -116,7 +198,7 @@ class EncyclopaediaMetallum(Page):
return []
return [
- cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
+ _artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
for raw_artist in r.json()['aaData']
]
@@ -137,77 +219,6 @@ class EncyclopaediaMetallum(Page):
for raw_artist in r.json()['aaData']
])
- @classmethod
- def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:
- """
- TODO parse the country to a standart
- """
- # parse the html
- # parse the html for the band name and link on metal-archives
- soup = BeautifulSoup(artist_html, 'html.parser')
- anchor = soup.find('a')
- artist_name = anchor.text
- artist_url = anchor.get('href')
- artist_id = artist_url.split("/")[-1]
-
- anchor.decompose()
- strong = soup.find('strong')
- if strong is not None:
- strong.decompose()
- akronyms_ = soup.text[2:-2].split(', ')
-
- return Artist(
- name=artist_name,
- source_list=[
- Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
- ]
- )
-
- @classmethod
- def get_album_from_json(cls, album_html=None, release_type=None, artist_html=None) -> Album:
- # parse the html
- # Self Loather'
- soup = BeautifulSoup(album_html, 'html.parser')
- anchor = soup.find('a')
- album_name = anchor.text
- album_url = anchor.get('href')
- album_id = album_url.split("/")[-1]
-
- album_type = cls.ALBUM_TYPE_MAP[release_type.strip()]
-
- return Album(
- title=album_name,
- album_type=album_type,
- source_list=[
- Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
- ],
- artist_list=[
- cls.get_artist_from_json(artist_html=artist_html)
- ]
- )
-
- @classmethod
- def get_song_from_json(cls, artist_html=None, album_html=None, release_type=None, title=None,
- lyrics_html=None) -> Song:
- song_id = None
- if lyrics_html is not None:
- soup = BeautifulSoup(lyrics_html, 'html.parser')
- anchor = soup.find('a')
- raw_song_id = anchor.get('id')
- song_id = raw_song_id.replace("lyricsLink_", "")
-
- return Song(
- title=title,
- main_artist_list=[
- cls.get_artist_from_json(artist_html=artist_html)
- ],
- album_list=[
- cls.get_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
- ],
- source_list=[
- Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
- ]
- )
@classmethod
def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]: