rewritten specific search for me
This commit is contained in:
		@@ -44,13 +44,6 @@ class Page(threading.Thread):
 | 
			
		||||
 | 
			
		||||
    # to search stuff
 | 
			
		||||
    def search(self, query: Query) -> List[DatabaseObject]:
 | 
			
		||||
        results = []
 | 
			
		||||
        
 | 
			
		||||
        if query.is_raw:
 | 
			
		||||
            for search_query in query.default_search:
 | 
			
		||||
                results.extend(self.general_search(search_query))
 | 
			
		||||
            return results
 | 
			
		||||
        
 | 
			
		||||
        music_object = query.music_object
 | 
			
		||||
        
 | 
			
		||||
        search_functions = {
 | 
			
		||||
@@ -67,9 +60,9 @@ class Page(threading.Thread):
 | 
			
		||||
            
 | 
			
		||||
        r = []
 | 
			
		||||
        for default_query in query.default_search:
 | 
			
		||||
            results.extend(self.general_search(default_query))
 | 
			
		||||
            r.extend(self.general_search(default_query))
 | 
			
		||||
        
 | 
			
		||||
        return results
 | 
			
		||||
        return r
 | 
			
		||||
    
 | 
			
		||||
    def general_search(self, search_query: str) -> List[DatabaseObject]:
 | 
			
		||||
        return []
 | 
			
		||||
 
 | 
			
		||||
@@ -34,111 +34,30 @@ ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EncyclopaediaMetallum(Page):
 | 
			
		||||
    CONNECTION: Connection = Connection(
 | 
			
		||||
        host="https://www.metal-archives.com/",
 | 
			
		||||
        logger=ENCYCLOPAEDIA_METALLUM_LOGGER
 | 
			
		||||
    )
 | 
			
		||||
def _song_from_json(artist_html=None, album_html=None, release_type=None, title=None,
 | 
			
		||||
                    lyrics_html=None) -> Song:
 | 
			
		||||
    song_id = None
 | 
			
		||||
    if lyrics_html is not None:
 | 
			
		||||
        soup = BeautifulSoup(lyrics_html, 'html.parser')
 | 
			
		||||
        anchor = soup.find('a')
 | 
			
		||||
        raw_song_id = anchor.get('id')
 | 
			
		||||
        song_id = raw_song_id.replace("lyricsLink_", "")
 | 
			
		||||
 | 
			
		||||
    SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
 | 
			
		||||
    
 | 
			
		||||
    LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.connection: Connection = Connection(
 | 
			
		||||
            host="https://www.metal-archives.com/",
 | 
			
		||||
            logger=ENCYCLOPAEDIA_METALLUM_LOGGER
 | 
			
		||||
        )
 | 
			
		||||
        
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
    def song_search(self, song: Song) -> List[Song]:
 | 
			
		||||
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \
 | 
			
		||||
                   "artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \
 | 
			
		||||
                   "&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
 | 
			
		||||
                   "=1674550595663"
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        The difficult question I am facing is, that if I try every artist, with every song, with every album,
 | 
			
		||||
        I end up with a quadratic runtime complecety O(n^2), where every step means one web request.
 | 
			
		||||
        
 | 
			
		||||
        This.
 | 
			
		||||
        Is not good.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        r = self.connection.get(
 | 
			
		||||
            endpoint.format(song=song.title, artist=query.artist_str, album=query.album_str)
 | 
			
		||||
        )
 | 
			
		||||
        if r is None:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        return [cls.get_song_from_json(
 | 
			
		||||
            artist_html=raw_song[0],
 | 
			
		||||
            album_html=raw_song[1],
 | 
			
		||||
            release_type=raw_song[2],
 | 
			
		||||
            title=raw_song[3],
 | 
			
		||||
            lyrics_html=raw_song[4]
 | 
			
		||||
        ) for raw_song in r.json()['aaData']]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def search_for_album(cls, query: Query) -> List[Album]:
 | 
			
		||||
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
 | 
			
		||||
                   "artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
 | 
			
		||||
                   "=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
 | 
			
		||||
                   "=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
 | 
			
		||||
                   "=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
 | 
			
		||||
 | 
			
		||||
        r = cls.CONNECTION.get(endpoint.format(artist=query.artist_str, album=query.album_str))
 | 
			
		||||
        if r is None:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        return [cls.get_album_from_json(
 | 
			
		||||
            artist_html=raw_album[0],
 | 
			
		||||
            album_html=raw_album[1],
 | 
			
		||||
            release_type=raw_album[2]
 | 
			
		||||
        ) for raw_album in r.json()['aaData']]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def search_for_artist(cls, query: Query) -> List[Artist]:
 | 
			
		||||
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={" \
 | 
			
		||||
                   "artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location" \
 | 
			
		||||
                   "=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \
 | 
			
		||||
                   "&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
 | 
			
		||||
 | 
			
		||||
        r = cls.CONNECTION.get(endpoint.format(artist=query.artist))
 | 
			
		||||
 | 
			
		||||
        if r is None:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        data_key = 'aaData'
 | 
			
		||||
        parsed_data = r.json()
 | 
			
		||||
        if data_key not in parsed_data:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        return [
 | 
			
		||||
            cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
 | 
			
		||||
            for raw_artist in r.json()['aaData']
 | 
			
		||||
    return Song(
 | 
			
		||||
        title=title,
 | 
			
		||||
        main_artist_list=[
 | 
			
		||||
            _artist_from_json(artist_html=artist_html)
 | 
			
		||||
        ],
 | 
			
		||||
        album_list=[
 | 
			
		||||
            _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
 | 
			
		||||
        ],
 | 
			
		||||
        source_list=[
 | 
			
		||||
            Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
 | 
			
		||||
        ]
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _raw_search(cls, query: str) -> Options:
 | 
			
		||||
        """
 | 
			
		||||
        Searches the default endpoint from metal archives, which intern searches only
 | 
			
		||||
        for bands, but it is the default, thus I am rolling with it
 | 
			
		||||
        """
 | 
			
		||||
        endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"
 | 
			
		||||
 | 
			
		||||
        r = cls.CONNECTION.get(endpoint.format(query=query))
 | 
			
		||||
        if r is None:
 | 
			
		||||
            return Options()
 | 
			
		||||
 | 
			
		||||
        return Options([
 | 
			
		||||
            cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
 | 
			
		||||
            for raw_artist in r.json()['aaData']
 | 
			
		||||
        ])
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:
 | 
			
		||||
def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
 | 
			
		||||
    """
 | 
			
		||||
    TODO parse the country to a standart
 | 
			
		||||
    """
 | 
			
		||||
@@ -163,8 +82,8 @@ class EncyclopaediaMetallum(Page):
 | 
			
		||||
        ]
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def get_album_from_json(cls, album_html=None, release_type=None, artist_html=None) -> Album:
 | 
			
		||||
 | 
			
		||||
def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Album:
 | 
			
		||||
    # parse the html
 | 
			
		||||
    # <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
 | 
			
		||||
    soup = BeautifulSoup(album_html, 'html.parser')
 | 
			
		||||
@@ -173,7 +92,7 @@ class EncyclopaediaMetallum(Page):
 | 
			
		||||
    album_url = anchor.get('href')
 | 
			
		||||
    album_id = album_url.split("/")[-1]
 | 
			
		||||
 | 
			
		||||
        album_type = cls.ALBUM_TYPE_MAP[release_type.strip()]
 | 
			
		||||
    album_type = ALBUM_TYPE_MAP[release_type.strip()]
 | 
			
		||||
 | 
			
		||||
    return Album(
 | 
			
		||||
        title=album_name,
 | 
			
		||||
@@ -182,32 +101,124 @@ class EncyclopaediaMetallum(Page):
 | 
			
		||||
            Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
 | 
			
		||||
        ],
 | 
			
		||||
        artist_list=[
 | 
			
		||||
                cls.get_artist_from_json(artist_html=artist_html)
 | 
			
		||||
            _artist_from_json(artist_html=artist_html)
 | 
			
		||||
        ]
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EncyclopaediaMetallum(Page):
 | 
			
		||||
    SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
 | 
			
		||||
    LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.connection: Connection = Connection(
 | 
			
		||||
            host="https://www.metal-archives.com/",
 | 
			
		||||
            logger=ENCYCLOPAEDIA_METALLUM_LOGGER
 | 
			
		||||
        )
 | 
			
		||||
        
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
    def song_search(self, song: Song) -> List[Song]:
 | 
			
		||||
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \
 | 
			
		||||
                   "artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \
 | 
			
		||||
                   "&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
 | 
			
		||||
                   "=1674550595663"
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        The difficult question I am facing is, that if I try every artist, with every song, with every album,
 | 
			
		||||
        I end up with a quadratic runtime complecety O(n^2), where every step means one web request.
 | 
			
		||||
        
 | 
			
		||||
        This.
 | 
			
		||||
        Is not good.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        song_title = song.title
 | 
			
		||||
        album_titles = ["*"] if song.album_collection.empty else [album.title for album in song.album_collection]
 | 
			
		||||
        artist_titles = ["*"] if song.main_artist_collection.empty else [artist.name for artist in song.main_artist_collection]
 | 
			
		||||
 | 
			
		||||
        search_results = []
 | 
			
		||||
 | 
			
		||||
        for artist in artist_titles:
 | 
			
		||||
            for album in album_titles:
 | 
			
		||||
                r = self.connection.get(
 | 
			
		||||
                    endpoint.format(song=song_title, artist=artist, album=album)
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
                if r is None:
 | 
			
		||||
                    return []
 | 
			
		||||
 | 
			
		||||
                search_results.extend(_song_from_json(
 | 
			
		||||
                    artist_html=raw_song[0],
 | 
			
		||||
                    album_html=raw_song[1],
 | 
			
		||||
                    release_type=raw_song[2],
 | 
			
		||||
                    title=raw_song[3],
 | 
			
		||||
                    lyrics_html=raw_song[4]
 | 
			
		||||
                ) for raw_song in r.json()['aaData'])
 | 
			
		||||
 | 
			
		||||
        return search_results
 | 
			
		||||
 | 
			
		||||
    def album_search(self, album: Album) -> List[Album]:
 | 
			
		||||
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
 | 
			
		||||
                   "artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
 | 
			
		||||
                   "=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
 | 
			
		||||
                   "=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
 | 
			
		||||
                   "=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        album_title = album.title
 | 
			
		||||
        artist_titles = ["*"] if album.artist_collection.empty else [artist.name for artist in album.artist_collection]
 | 
			
		||||
 | 
			
		||||
        search_results = []
 | 
			
		||||
 | 
			
		||||
        for artist in artist_titles:
 | 
			
		||||
            r = self.connection.get(endpoint.format(artist=artist, album=album_title))
 | 
			
		||||
            if r is None:
 | 
			
		||||
                return []
 | 
			
		||||
 | 
			
		||||
            search_results.extend(_album_from_json(
 | 
			
		||||
                artist_html=raw_album[0],
 | 
			
		||||
                album_html=raw_album[1],
 | 
			
		||||
                release_type=raw_album[2]
 | 
			
		||||
            ) for raw_album in r.json()['aaData'])
 | 
			
		||||
 | 
			
		||||
    def artist_search(self, artist: Artist) -> List[Artist]:
 | 
			
		||||
        endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={" \
 | 
			
		||||
                   "artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location" \
 | 
			
		||||
                   "=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \
 | 
			
		||||
                   "&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
 | 
			
		||||
 | 
			
		||||
        r = self.connection.get(endpoint.format(artist=artist.name))
 | 
			
		||||
 | 
			
		||||
        if r is None:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        data_key = 'aaData'
 | 
			
		||||
        parsed_data = r.json()
 | 
			
		||||
        if data_key not in parsed_data:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        return [
 | 
			
		||||
            _artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
 | 
			
		||||
            for raw_artist in r.json()['aaData']
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def get_song_from_json(cls, artist_html=None, album_html=None, release_type=None, title=None,
 | 
			
		||||
                           lyrics_html=None) -> Song:
 | 
			
		||||
        song_id = None
 | 
			
		||||
        if lyrics_html is not None:
 | 
			
		||||
            soup = BeautifulSoup(lyrics_html, 'html.parser')
 | 
			
		||||
            anchor = soup.find('a')
 | 
			
		||||
            raw_song_id = anchor.get('id')
 | 
			
		||||
            song_id = raw_song_id.replace("lyricsLink_", "")
 | 
			
		||||
    def _raw_search(cls, query: str) -> Options:
 | 
			
		||||
        """
 | 
			
		||||
        Searches the default endpoint from metal archives, which intern searches only
 | 
			
		||||
        for bands, but it is the default, thus I am rolling with it
 | 
			
		||||
        """
 | 
			
		||||
        endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"
 | 
			
		||||
 | 
			
		||||
        r = cls.CONNECTION.get(endpoint.format(query=query))
 | 
			
		||||
        if r is None:
 | 
			
		||||
            return Options()
 | 
			
		||||
 | 
			
		||||
        return Options([
 | 
			
		||||
            cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
 | 
			
		||||
            for raw_artist in r.json()['aaData']
 | 
			
		||||
        ])
 | 
			
		||||
 | 
			
		||||
        return Song(
 | 
			
		||||
            title=title,
 | 
			
		||||
            main_artist_list=[
 | 
			
		||||
                cls.get_artist_from_json(artist_html=artist_html)
 | 
			
		||||
            ],
 | 
			
		||||
            album_list=[
 | 
			
		||||
                cls.get_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
 | 
			
		||||
            ],
 | 
			
		||||
            source_list=[
 | 
			
		||||
                Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
 | 
			
		||||
            ]
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user