rewritten specific search for me
This commit is contained in:
parent
4cc36087b6
commit
70c7d831c9
@ -44,13 +44,6 @@ class Page(threading.Thread):
|
|||||||
|
|
||||||
# to search stuff
|
# to search stuff
|
||||||
def search(self, query: Query) -> List[DatabaseObject]:
|
def search(self, query: Query) -> List[DatabaseObject]:
|
||||||
results = []
|
|
||||||
|
|
||||||
if query.is_raw:
|
|
||||||
for search_query in query.default_search:
|
|
||||||
results.extend(self.general_search(search_query))
|
|
||||||
return results
|
|
||||||
|
|
||||||
music_object = query.music_object
|
music_object = query.music_object
|
||||||
|
|
||||||
search_functions = {
|
search_functions = {
|
||||||
@ -67,9 +60,9 @@ class Page(threading.Thread):
|
|||||||
|
|
||||||
r = []
|
r = []
|
||||||
for default_query in query.default_search:
|
for default_query in query.default_search:
|
||||||
results.extend(self.general_search(default_query))
|
r.extend(self.general_search(default_query))
|
||||||
|
|
||||||
return results
|
return r
|
||||||
|
|
||||||
def general_search(self, search_query: str) -> List[DatabaseObject]:
|
def general_search(self, search_query: str) -> List[DatabaseObject]:
|
||||||
return []
|
return []
|
||||||
|
@ -34,111 +34,30 @@ ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
class EncyclopaediaMetallum(Page):
|
def _song_from_json(artist_html=None, album_html=None, release_type=None, title=None,
|
||||||
CONNECTION: Connection = Connection(
|
lyrics_html=None) -> Song:
|
||||||
host="https://www.metal-archives.com/",
|
song_id = None
|
||||||
logger=ENCYCLOPAEDIA_METALLUM_LOGGER
|
if lyrics_html is not None:
|
||||||
)
|
soup = BeautifulSoup(lyrics_html, 'html.parser')
|
||||||
|
anchor = soup.find('a')
|
||||||
|
raw_song_id = anchor.get('id')
|
||||||
|
song_id = raw_song_id.replace("lyricsLink_", "")
|
||||||
|
|
||||||
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
|
return Song(
|
||||||
|
title=title,
|
||||||
LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
|
main_artist_list=[
|
||||||
|
_artist_from_json(artist_html=artist_html)
|
||||||
def __init__(self):
|
],
|
||||||
self.connection: Connection = Connection(
|
album_list=[
|
||||||
host="https://www.metal-archives.com/",
|
_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
|
||||||
logger=ENCYCLOPAEDIA_METALLUM_LOGGER
|
],
|
||||||
)
|
source_list=[
|
||||||
|
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
def song_search(self, song: Song) -> List[Song]:
|
|
||||||
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \
|
|
||||||
"artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \
|
|
||||||
"&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
|
|
||||||
"=1674550595663"
|
|
||||||
|
|
||||||
"""
|
|
||||||
The difficult question I am facing is, that if I try every artist, with every song, with every album,
|
|
||||||
I end up with a quadratic runtime complecety O(n^2), where every step means one web request.
|
|
||||||
|
|
||||||
This.
|
|
||||||
Is not good.
|
|
||||||
"""
|
|
||||||
|
|
||||||
r = self.connection.get(
|
|
||||||
endpoint.format(song=song.title, artist=query.artist_str, album=query.album_str)
|
|
||||||
)
|
|
||||||
if r is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
return [cls.get_song_from_json(
|
|
||||||
artist_html=raw_song[0],
|
|
||||||
album_html=raw_song[1],
|
|
||||||
release_type=raw_song[2],
|
|
||||||
title=raw_song[3],
|
|
||||||
lyrics_html=raw_song[4]
|
|
||||||
) for raw_song in r.json()['aaData']]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def search_for_album(cls, query: Query) -> List[Album]:
|
|
||||||
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
|
|
||||||
"artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
|
|
||||||
"=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
|
|
||||||
"=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
|
|
||||||
"=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
|
|
||||||
|
|
||||||
r = cls.CONNECTION.get(endpoint.format(artist=query.artist_str, album=query.album_str))
|
|
||||||
if r is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
return [cls.get_album_from_json(
|
|
||||||
artist_html=raw_album[0],
|
|
||||||
album_html=raw_album[1],
|
|
||||||
release_type=raw_album[2]
|
|
||||||
) for raw_album in r.json()['aaData']]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def search_for_artist(cls, query: Query) -> List[Artist]:
|
|
||||||
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={" \
|
|
||||||
"artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location" \
|
|
||||||
"=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \
|
|
||||||
"&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
|
|
||||||
|
|
||||||
r = cls.CONNECTION.get(endpoint.format(artist=query.artist))
|
|
||||||
|
|
||||||
if r is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
data_key = 'aaData'
|
|
||||||
parsed_data = r.json()
|
|
||||||
if data_key not in parsed_data:
|
|
||||||
return []
|
|
||||||
|
|
||||||
return [
|
|
||||||
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
|
|
||||||
for raw_artist in r.json()['aaData']
|
|
||||||
]
|
]
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _raw_search(cls, query: str) -> Options:
|
|
||||||
"""
|
|
||||||
Searches the default endpoint from metal archives, which intern searches only
|
|
||||||
for bands, but it is the default, thus I am rolling with it
|
|
||||||
"""
|
|
||||||
endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"
|
|
||||||
|
|
||||||
r = cls.CONNECTION.get(endpoint.format(query=query))
|
def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
|
||||||
if r is None:
|
|
||||||
return Options()
|
|
||||||
|
|
||||||
return Options([
|
|
||||||
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
|
|
||||||
for raw_artist in r.json()['aaData']
|
|
||||||
])
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:
|
|
||||||
"""
|
"""
|
||||||
TODO parse the country to a standart
|
TODO parse the country to a standart
|
||||||
"""
|
"""
|
||||||
@ -163,8 +82,8 @@ class EncyclopaediaMetallum(Page):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_album_from_json(cls, album_html=None, release_type=None, artist_html=None) -> Album:
|
def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Album:
|
||||||
# parse the html
|
# parse the html
|
||||||
# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
|
# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
|
||||||
soup = BeautifulSoup(album_html, 'html.parser')
|
soup = BeautifulSoup(album_html, 'html.parser')
|
||||||
@ -173,7 +92,7 @@ class EncyclopaediaMetallum(Page):
|
|||||||
album_url = anchor.get('href')
|
album_url = anchor.get('href')
|
||||||
album_id = album_url.split("/")[-1]
|
album_id = album_url.split("/")[-1]
|
||||||
|
|
||||||
album_type = cls.ALBUM_TYPE_MAP[release_type.strip()]
|
album_type = ALBUM_TYPE_MAP[release_type.strip()]
|
||||||
|
|
||||||
return Album(
|
return Album(
|
||||||
title=album_name,
|
title=album_name,
|
||||||
@ -182,32 +101,124 @@ class EncyclopaediaMetallum(Page):
|
|||||||
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
|
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
|
||||||
],
|
],
|
||||||
artist_list=[
|
artist_list=[
|
||||||
cls.get_artist_from_json(artist_html=artist_html)
|
_artist_from_json(artist_html=artist_html)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EncyclopaediaMetallum(Page):
|
||||||
|
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
|
||||||
|
LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.connection: Connection = Connection(
|
||||||
|
host="https://www.metal-archives.com/",
|
||||||
|
logger=ENCYCLOPAEDIA_METALLUM_LOGGER
|
||||||
|
)
|
||||||
|
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def song_search(self, song: Song) -> List[Song]:
|
||||||
|
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \
|
||||||
|
"artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \
|
||||||
|
"&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
|
||||||
|
"=1674550595663"
|
||||||
|
|
||||||
|
"""
|
||||||
|
The difficult question I am facing is, that if I try every artist, with every song, with every album,
|
||||||
|
I end up with a quadratic runtime complecety O(n^2), where every step means one web request.
|
||||||
|
|
||||||
|
This.
|
||||||
|
Is not good.
|
||||||
|
"""
|
||||||
|
|
||||||
|
song_title = song.title
|
||||||
|
album_titles = ["*"] if song.album_collection.empty else [album.title for album in song.album_collection]
|
||||||
|
artist_titles = ["*"] if song.main_artist_collection.empty else [artist.name for artist in song.main_artist_collection]
|
||||||
|
|
||||||
|
search_results = []
|
||||||
|
|
||||||
|
for artist in artist_titles:
|
||||||
|
for album in album_titles:
|
||||||
|
r = self.connection.get(
|
||||||
|
endpoint.format(song=song_title, artist=artist, album=album)
|
||||||
|
)
|
||||||
|
|
||||||
|
if r is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
search_results.extend(_song_from_json(
|
||||||
|
artist_html=raw_song[0],
|
||||||
|
album_html=raw_song[1],
|
||||||
|
release_type=raw_song[2],
|
||||||
|
title=raw_song[3],
|
||||||
|
lyrics_html=raw_song[4]
|
||||||
|
) for raw_song in r.json()['aaData'])
|
||||||
|
|
||||||
|
return search_results
|
||||||
|
|
||||||
|
def album_search(self, album: Album) -> List[Album]:
|
||||||
|
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
|
||||||
|
"artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
|
||||||
|
"=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
|
||||||
|
"=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
|
||||||
|
"=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
|
||||||
|
|
||||||
|
|
||||||
|
album_title = album.title
|
||||||
|
artist_titles = ["*"] if album.artist_collection.empty else [artist.name for artist in album.artist_collection]
|
||||||
|
|
||||||
|
search_results = []
|
||||||
|
|
||||||
|
for artist in artist_titles:
|
||||||
|
r = self.connection.get(endpoint.format(artist=artist, album=album_title))
|
||||||
|
if r is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
search_results.extend(_album_from_json(
|
||||||
|
artist_html=raw_album[0],
|
||||||
|
album_html=raw_album[1],
|
||||||
|
release_type=raw_album[2]
|
||||||
|
) for raw_album in r.json()['aaData'])
|
||||||
|
|
||||||
|
def artist_search(self, artist: Artist) -> List[Artist]:
|
||||||
|
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={" \
|
||||||
|
"artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location" \
|
||||||
|
"=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \
|
||||||
|
"&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
|
||||||
|
|
||||||
|
r = self.connection.get(endpoint.format(artist=artist.name))
|
||||||
|
|
||||||
|
if r is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
data_key = 'aaData'
|
||||||
|
parsed_data = r.json()
|
||||||
|
if data_key not in parsed_data:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return [
|
||||||
|
_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
|
||||||
|
for raw_artist in r.json()['aaData']
|
||||||
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_song_from_json(cls, artist_html=None, album_html=None, release_type=None, title=None,
|
def _raw_search(cls, query: str) -> Options:
|
||||||
lyrics_html=None) -> Song:
|
"""
|
||||||
song_id = None
|
Searches the default endpoint from metal archives, which intern searches only
|
||||||
if lyrics_html is not None:
|
for bands, but it is the default, thus I am rolling with it
|
||||||
soup = BeautifulSoup(lyrics_html, 'html.parser')
|
"""
|
||||||
anchor = soup.find('a')
|
endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"
|
||||||
raw_song_id = anchor.get('id')
|
|
||||||
song_id = raw_song_id.replace("lyricsLink_", "")
|
r = cls.CONNECTION.get(endpoint.format(query=query))
|
||||||
|
if r is None:
|
||||||
|
return Options()
|
||||||
|
|
||||||
|
return Options([
|
||||||
|
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
|
||||||
|
for raw_artist in r.json()['aaData']
|
||||||
|
])
|
||||||
|
|
||||||
return Song(
|
|
||||||
title=title,
|
|
||||||
main_artist_list=[
|
|
||||||
cls.get_artist_from_json(artist_html=artist_html)
|
|
||||||
],
|
|
||||||
album_list=[
|
|
||||||
cls.get_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
|
|
||||||
],
|
|
||||||
source_list=[
|
|
||||||
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]:
|
def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]:
|
||||||
|
Loading…
Reference in New Issue
Block a user