Update musify.py
This commit is contained in:
parent
4a199547de
commit
f3d9025d0b
@ -1,7 +1,8 @@
|
|||||||
from typing import List
|
from typing import List, Optional
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import pycountry
|
import pycountry
|
||||||
|
import time
|
||||||
|
|
||||||
from ..utils.shared import (
|
from ..utils.shared import (
|
||||||
ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER
|
ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER
|
||||||
@ -24,6 +25,9 @@ from ..utils import (
|
|||||||
string_processing,
|
string_processing,
|
||||||
shared
|
shared
|
||||||
)
|
)
|
||||||
|
from ..utils.shared import (
|
||||||
|
MUSIFY_LOGGER as LOGGER
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class EncyclopaediaMetallum(Page):
|
class EncyclopaediaMetallum(Page):
|
||||||
@ -42,33 +46,80 @@ class EncyclopaediaMetallum(Page):
|
|||||||
query_obj = cls.Query(query)
|
query_obj = cls.Query(query)
|
||||||
|
|
||||||
if query_obj.is_raw:
|
if query_obj.is_raw:
|
||||||
return cls.simple_search(query_obj)
|
return cls.plaintext_search(query_obj.query)
|
||||||
return cls.advanced_search(query_obj)
|
return cls.plaintext_search(cls.get_plaintext_query(query_obj))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def advanced_search(cls, query: Page.Query) -> Options:
|
def get_plaintext_query(cls, query: Page.Query) -> str:
|
||||||
if query.song is not None:
|
if query.album is None:
|
||||||
return Options(cls.search_for_song(query=query))
|
return f"{query.artist or '*'} - {query.song or '*'}"
|
||||||
if query.album is not None:
|
return f"{query.artist or '*'} - {query.album * '*'} - {query.song or '*'}"
|
||||||
return Options(cls.search_for_album(query=query))
|
|
||||||
if query.artist is not None:
|
|
||||||
return Options(cls.search_for_artist(query=query))
|
|
||||||
return Options
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def search_for_song(cls, query: Page.Query) -> List[Song]:
|
def get_soup_of_search(cls, query: str, trie=0) -> Optional[BeautifulSoup]:
|
||||||
return []
|
url = f"https://musify.club/search?searchText={query}"
|
||||||
|
LOGGER.debug(f"Trying to get soup from {url}")
|
||||||
|
try:
|
||||||
|
r = cls.API_SESSION.get(url, timeout=15)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return None
|
||||||
|
if r.status_code != 200:
|
||||||
|
if r.status_code in [503] and trie < cls.TRIES:
|
||||||
|
LOGGER.warning(f"youtube blocked downloading. ({trie}-{cls.TRIES})")
|
||||||
|
LOGGER.warning(f"retrying in {cls.TIMEOUT} seconds again")
|
||||||
|
time.sleep(cls.TIMEOUT)
|
||||||
|
return cls.get_soup_of_search(query, trie=trie + 1)
|
||||||
|
|
||||||
|
LOGGER.warning("too many tries, returning")
|
||||||
|
return None
|
||||||
|
return BeautifulSoup(r.content, features="html.parser")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def search_for_album(cls, query: Page.Query) -> List[Album]:
|
def plaintext_search(cls, query: Page.Query) -> List[MusicObject]:
|
||||||
return []
|
search_soup = cls.get_soup_of_search(query=query)
|
||||||
|
if search_soup is None:
|
||||||
|
return None
|
||||||
|
|
||||||
@classmethod
|
# album and songs
|
||||||
def search_for_artist(cls, query: Page.Query) -> List[Artist]:
|
# child of div class: contacts row
|
||||||
return []
|
for contact_container_soup in search_soup.find_all("div", {"class": ["contacts", "row"]}):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# song
|
||||||
|
# div class: playlist__item
|
||||||
|
for playlist_soup in search_soup.find_all("div", {"class": "playlist"}):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# get the soup of the container with all track results
|
||||||
|
tracklist_container_soup = search_soup.find_all("div", {"class": "playlist"})
|
||||||
|
if len(tracklist_container_soup) == 0:
|
||||||
|
return []
|
||||||
|
if len(tracklist_container_soup) != 1:
|
||||||
|
LOGGER.warning("HTML Layout of https://musify.club/ changed. (or bug)")
|
||||||
|
tracklist_container_soup = tracklist_container_soup[0]
|
||||||
|
|
||||||
|
tracklist_soup = tracklist_container_soup.find_all("div", {"class": "playlist__details"})
|
||||||
|
|
||||||
|
def parse_track_soup(_track_soup):
|
||||||
|
anchor_soups = _track_soup.find_all("a")
|
||||||
|
artist_ = anchor_soups[0].text.strip()
|
||||||
|
track_ = anchor_soups[1].text.strip()
|
||||||
|
url_ = anchor_soups[1]['href']
|
||||||
|
return artist_, track_, url_
|
||||||
|
|
||||||
|
# check each track in the container, if they match
|
||||||
|
for track_soup in tracklist_soup:
|
||||||
|
artist_option, title_option, track_url = parse_track_soup(track_soup)
|
||||||
|
|
||||||
|
title_match, title_distance = phonetic_compares.match_titles(title, title_option)
|
||||||
|
artist_match, artist_distance = phonetic_compares.match_artists(artist, artist_option)
|
||||||
|
|
||||||
|
logging.debug(f"{(title, title_option, title_match, title_distance)}")
|
||||||
|
logging.debug(f"{(artist, artist_option, artist_match, artist_distance)}")
|
||||||
|
|
||||||
|
if not title_match and not artist_match:
|
||||||
|
return cls.get_download_link(track_url)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def simple_search(cls, query: Page.Query) -> List[Artist]:
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
Loading…
Reference in New Issue
Block a user