cleaned up search

This commit is contained in:
Hellow2 2023-05-23 16:21:12 +02:00
parent ab7f414125
commit 67b4a3bc65
3 changed files with 72 additions and 64 deletions

View File

@ -2,6 +2,7 @@ import logging
import random import random
from copy import copy from copy import copy
from typing import Optional, Union, Type, Dict, Set, List from typing import Optional, Union, Type, Dict, Set, List
import threading
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -25,53 +26,67 @@ from ..utils import shared
from ..utils.support_classes import Query, DownloadResult, DefaultTarget from ..utils.support_classes import Query, DownloadResult, DefaultTarget
class Page: class Page(threading.Thread):
""" """
This is an abstract class, laying out the This is an abstract class, laying out the
functionality for every other class fetching something functionality for every other class fetching something
""" """
CONNECTION: Connection
API_SESSION: requests.Session = requests.Session()
API_SESSION.proxies = shared.proxies
TIMEOUT = 5
POST_TIMEOUT = TIMEOUT
TRIES = 5
LOGGER = logging.getLogger("this shouldn't be used")
SOURCE_TYPE: SourcePages SOURCE_TYPE: SourcePages
LOGGER = logging.getLogger("this shouldn't be used")
def __init__(self):
threading.Thread.__init__(self)
@classmethod @classmethod
def get_soup_from_response(cls, r: requests.Response) -> BeautifulSoup: def get_soup_from_response(cls, r: requests.Response) -> BeautifulSoup:
return BeautifulSoup(r.content, "html.parser") return BeautifulSoup(r.content, "html.parser")
@classmethod # to search stuff
def search(cls, query: Query) -> Options: def search(self, query: Query) -> List[DatabaseObject]:
results = [] results = []
if query.is_raw:
for search_query in query.default_search:
results.extend(self.general_search(search_query))
return results
music_object = query.music_object
search_functions = {
Song: self.song_search,
Album: self.album_search,
Artist: self.artist_search,
Label: self.label_search
}
if type(music_object) in search_functions:
r = search_functions[type(music_object)](music_object)
if len(r) > 0:
return r
r = []
for default_query in query.default_search: for default_query in query.default_search:
results.extend(cls._raw_search(default_query)) results.extend(self.general_search(default_query))
return Options(results) return results
@classmethod
def _raw_search(cls, query: str) -> Options:
"""
# The Query
You can define a new parameter with "#",
the letter behind it defines the *type* of parameter, followed by a space
"#a Psychonaut 4 #r Tired, Numb and #t Drop by Drop"
if no # is in the query it gets treated as "unspecified query"
# Functionality
Returns the best matches from this page for the query, passed in.
:param query:
:return possible_music_objects:
"""
def general_search(self, search_query: str) -> List[DatabaseObject]:
return [] return []
def label_search(self, label: Label) -> List[Label]:
return []
def artist_search(self, artist: Artist) -> List[Artist]:
return []
def album_search(self, album: Album) -> List[Album]:
return []
def song_search(self, song: Song) -> List[Song]:
return []
@classmethod @classmethod
def fetch_details(cls, music_object: Union[Song, Album, Artist, Label], stop_at_level: int = 1) -> DatabaseObject: def fetch_details(cls, music_object: Union[Song, Album, Artist, Label], stop_at_level: int = 1) -> DatabaseObject:
""" """

View File

@ -23,6 +23,17 @@ from ..objects import (
) )
ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
"Full-length": AlbumType.STUDIO_ALBUM,
"Single": AlbumType.SINGLE,
"EP": AlbumType.EP,
"Demo": AlbumType.DEMO,
"Video": AlbumType.OTHER,
"Live album": AlbumType.LIVE_ALBUM,
"Compilation": AlbumType.COMPILATION_ALBUM
})
class EncyclopaediaMetallum(Page): class EncyclopaediaMetallum(Page):
CONNECTION: Connection = Connection( CONNECTION: Connection = Connection(
host="https://www.metal-archives.com/", host="https://www.metal-archives.com/",
@ -31,35 +42,13 @@ class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
"Full-length": AlbumType.STUDIO_ALBUM,
"Single": AlbumType.SINGLE,
"EP": AlbumType.EP,
"Demo": AlbumType.DEMO,
"Video": AlbumType.OTHER,
"Live album": AlbumType.LIVE_ALBUM,
"Compilation": AlbumType.COMPILATION_ALBUM
})
LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
@classmethod def __init__(self):
def _raw_search(cls, query: str) -> Options: self.connection: Connection = Connection(
query_obj = cls.Query(query) host="https://www.metal-archives.com/",
logger=ENCYCLOPAEDIA_METALLUM_LOGGER
if query_obj.is_raw: )
return cls.simple_search(query_obj)
return cls.advanced_search(query_obj)
@classmethod
def advanced_search(cls, query: Query) -> Options:
if query.song is not None:
return Options(cls.search_for_song(query=query))
if query.album is not None:
return Options(cls.search_for_album(query=query))
if query.artist is not None:
return Options(cls.search_for_artist(query=query))
return Options
@classmethod @classmethod
def search_for_song(cls, query: Query) -> List[Song]: def search_for_song(cls, query: Query) -> List[Song]:
@ -123,7 +112,7 @@ class EncyclopaediaMetallum(Page):
] ]
@classmethod @classmethod
def simple_search(cls, query: Query) -> List[Artist]: def _raw_search(cls, query: str) -> Options:
""" """
Searches the default endpoint from metal archives, which intern searches only Searches the default endpoint from metal archives, which intern searches only
for bands, but it is the default, thus I am rolling with it for bands, but it is the default, thus I am rolling with it
@ -132,12 +121,12 @@ class EncyclopaediaMetallum(Page):
r = cls.CONNECTION.get(endpoint.format(query=query)) r = cls.CONNECTION.get(endpoint.format(query=query))
if r is None: if r is None:
return [] return Options()
return [ return Options([
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2]) cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
for raw_artist in r.json()['aaData'] for raw_artist in r.json()['aaData']
] ])
@classmethod @classmethod
def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist: def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:

View File

@ -9,7 +9,11 @@ class Query:
music_object: DatabaseObject = None music_object: DatabaseObject = None
) -> None: ) -> None:
self.raw_query: str = raw_query self.raw_query: str = raw_query
self.music_object: Optional[DatabaseObject] = None self.music_object: Optional[DatabaseObject] = music_object
@property
def is_raw(self) -> bool:
return self.music_object is None
@property @property
def default_search(self) -> List[str]: def default_search(self) -> List[str]: