rewritten specific search for me

This commit is contained in:
Hellow 2023-05-23 16:50:54 +02:00
parent 4cc36087b6
commit 70c7d831c9
2 changed files with 115 additions and 111 deletions

View File

@ -44,13 +44,6 @@ class Page(threading.Thread):
# to search stuff # to search stuff
def search(self, query: Query) -> List[DatabaseObject]: def search(self, query: Query) -> List[DatabaseObject]:
results = []
if query.is_raw:
for search_query in query.default_search:
results.extend(self.general_search(search_query))
return results
music_object = query.music_object music_object = query.music_object
search_functions = { search_functions = {
@ -67,9 +60,9 @@ class Page(threading.Thread):
r = [] r = []
for default_query in query.default_search: for default_query in query.default_search:
results.extend(self.general_search(default_query)) r.extend(self.general_search(default_query))
return results return r
def general_search(self, search_query: str) -> List[DatabaseObject]: def general_search(self, search_query: str) -> List[DatabaseObject]:
return [] return []

View File

@ -34,14 +34,80 @@ ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
}) })
class EncyclopaediaMetallum(Page): def _song_from_json(artist_html=None, album_html=None, release_type=None, title=None,
CONNECTION: Connection = Connection( lyrics_html=None) -> Song:
host="https://www.metal-archives.com/", song_id = None
logger=ENCYCLOPAEDIA_METALLUM_LOGGER if lyrics_html is not None:
soup = BeautifulSoup(lyrics_html, 'html.parser')
anchor = soup.find('a')
raw_song_id = anchor.get('id')
song_id = raw_song_id.replace("lyricsLink_", "")
return Song(
title=title,
main_artist_list=[
_artist_from_json(artist_html=artist_html)
],
album_list=[
_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
],
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
]
) )
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
"""
TODO parse the country to a standart
"""
# parse the html
# parse the html for the band name and link on metal-archives
soup = BeautifulSoup(artist_html, 'html.parser')
anchor = soup.find('a')
artist_name = anchor.text
artist_url = anchor.get('href')
artist_id = artist_url.split("/")[-1]
anchor.decompose()
strong = soup.find('strong')
if strong is not None:
strong.decompose()
akronyms_ = soup.text[2:-2].split(', ')
return Artist(
name=artist_name,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
]
)
def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Album:
# parse the html
# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
soup = BeautifulSoup(album_html, 'html.parser')
anchor = soup.find('a')
album_name = anchor.text
album_url = anchor.get('href')
album_id = album_url.split("/")[-1]
album_type = ALBUM_TYPE_MAP[release_type.strip()]
return Album(
title=album_name,
album_type=album_type,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
],
artist_list=[
_artist_from_json(artist_html=artist_html)
]
)
class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
def __init__(self): def __init__(self):
@ -66,46 +132,62 @@ class EncyclopaediaMetallum(Page):
Is not good. Is not good.
""" """
r = self.connection.get( song_title = song.title
endpoint.format(song=song.title, artist=query.artist_str, album=query.album_str) album_titles = ["*"] if song.album_collection.empty else [album.title for album in song.album_collection]
) artist_titles = ["*"] if song.main_artist_collection.empty else [artist.name for artist in song.main_artist_collection]
if r is None:
return []
return [cls.get_song_from_json( search_results = []
artist_html=raw_song[0],
album_html=raw_song[1],
release_type=raw_song[2],
title=raw_song[3],
lyrics_html=raw_song[4]
) for raw_song in r.json()['aaData']]
@classmethod for artist in artist_titles:
def search_for_album(cls, query: Query) -> List[Album]: for album in album_titles:
r = self.connection.get(
endpoint.format(song=song_title, artist=artist, album=album)
)
if r is None:
return []
search_results.extend(_song_from_json(
artist_html=raw_song[0],
album_html=raw_song[1],
release_type=raw_song[2],
title=raw_song[3],
lyrics_html=raw_song[4]
) for raw_song in r.json()['aaData'])
return search_results
def album_search(self, album: Album) -> List[Album]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \ endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
"artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \ "artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
"=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \ "=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
"=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \ "=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
"=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747" "=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
r = cls.CONNECTION.get(endpoint.format(artist=query.artist_str, album=query.album_str))
if r is None:
return []
return [cls.get_album_from_json( album_title = album.title
artist_html=raw_album[0], artist_titles = ["*"] if album.artist_collection.empty else [artist.name for artist in album.artist_collection]
album_html=raw_album[1],
release_type=raw_album[2]
) for raw_album in r.json()['aaData']]
@classmethod search_results = []
def search_for_artist(cls, query: Query) -> List[Artist]:
for artist in artist_titles:
r = self.connection.get(endpoint.format(artist=artist, album=album_title))
if r is None:
return []
search_results.extend(_album_from_json(
artist_html=raw_album[0],
album_html=raw_album[1],
release_type=raw_album[2]
) for raw_album in r.json()['aaData'])
def artist_search(self, artist: Artist) -> List[Artist]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={" \ endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={" \
"artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location" \ "artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location" \
"=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \ "=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \
"&mDataProp_1=1&mDataProp_2=2&_=1674565459976" "&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
r = cls.CONNECTION.get(endpoint.format(artist=query.artist)) r = self.connection.get(endpoint.format(artist=artist.name))
if r is None: if r is None:
return [] return []
@ -116,7 +198,7 @@ class EncyclopaediaMetallum(Page):
return [] return []
return [ return [
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2]) _artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
for raw_artist in r.json()['aaData'] for raw_artist in r.json()['aaData']
] ]
@ -137,77 +219,6 @@ class EncyclopaediaMetallum(Page):
for raw_artist in r.json()['aaData'] for raw_artist in r.json()['aaData']
]) ])
@classmethod
def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:
"""
TODO parse the country to a standart
"""
# parse the html
# parse the html for the band name and link on metal-archives
soup = BeautifulSoup(artist_html, 'html.parser')
anchor = soup.find('a')
artist_name = anchor.text
artist_url = anchor.get('href')
artist_id = artist_url.split("/")[-1]
anchor.decompose()
strong = soup.find('strong')
if strong is not None:
strong.decompose()
akronyms_ = soup.text[2:-2].split(', ')
return Artist(
name=artist_name,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
]
)
@classmethod
def get_album_from_json(cls, album_html=None, release_type=None, artist_html=None) -> Album:
# parse the html
# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
soup = BeautifulSoup(album_html, 'html.parser')
anchor = soup.find('a')
album_name = anchor.text
album_url = anchor.get('href')
album_id = album_url.split("/")[-1]
album_type = cls.ALBUM_TYPE_MAP[release_type.strip()]
return Album(
title=album_name,
album_type=album_type,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
],
artist_list=[
cls.get_artist_from_json(artist_html=artist_html)
]
)
@classmethod
def get_song_from_json(cls, artist_html=None, album_html=None, release_type=None, title=None,
lyrics_html=None) -> Song:
song_id = None
if lyrics_html is not None:
soup = BeautifulSoup(lyrics_html, 'html.parser')
anchor = soup.find('a')
raw_song_id = anchor.get('id')
song_id = raw_song_id.replace("lyricsLink_", "")
return Song(
title=title,
main_artist_list=[
cls.get_artist_from_json(artist_html=artist_html)
],
album_list=[
cls.get_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
],
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
]
)
@classmethod @classmethod
def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]: def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]: