2023-03-14 13:48:03 +00:00
|
|
|
from typing import List, Optional
|
2023-03-13 14:47:38 +00:00
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import pycountry
|
2023-03-14 13:48:03 +00:00
|
|
|
import time
|
2023-03-13 14:47:38 +00:00
|
|
|
|
|
|
|
from ..utils.shared import (
|
|
|
|
ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER
|
|
|
|
)
|
|
|
|
|
|
|
|
from .abstract import Page
|
|
|
|
from ..objects import (
|
|
|
|
MusicObject,
|
|
|
|
Artist,
|
|
|
|
Source,
|
|
|
|
SourcePages,
|
|
|
|
Song,
|
|
|
|
Album,
|
|
|
|
ID3Timestamp,
|
|
|
|
FormattedText,
|
|
|
|
Label,
|
|
|
|
Options
|
|
|
|
)
|
|
|
|
from ..utils import (
|
|
|
|
string_processing,
|
|
|
|
shared
|
|
|
|
)
|
2023-03-14 13:48:03 +00:00
|
|
|
from ..utils.shared import (
|
|
|
|
MUSIFY_LOGGER as LOGGER
|
|
|
|
)
|
2023-03-13 14:47:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
class EncyclopaediaMetallum(Page):
|
|
|
|
API_SESSION: requests.Session = requests.Session()
|
|
|
|
API_SESSION.headers = {
|
|
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
|
|
|
|
"Connection": "keep-alive",
|
|
|
|
"Referer": "https://musify.club/"
|
|
|
|
}
|
|
|
|
API_SESSION.proxies = shared.proxies
|
|
|
|
|
|
|
|
SOURCE_TYPE = SourcePages.MUSIFY
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def search_by_query(cls, query: str) -> Options:
|
|
|
|
query_obj = cls.Query(query)
|
|
|
|
|
|
|
|
if query_obj.is_raw:
|
2023-03-14 13:48:03 +00:00
|
|
|
return cls.plaintext_search(query_obj.query)
|
|
|
|
return cls.plaintext_search(cls.get_plaintext_query(query_obj))
|
2023-03-13 14:47:38 +00:00
|
|
|
|
|
|
|
@classmethod
|
2023-03-14 13:48:03 +00:00
|
|
|
def get_plaintext_query(cls, query: Page.Query) -> str:
|
|
|
|
if query.album is None:
|
|
|
|
return f"{query.artist or '*'} - {query.song or '*'}"
|
|
|
|
return f"{query.artist or '*'} - {query.album * '*'} - {query.song or '*'}"
|
2023-03-13 14:47:38 +00:00
|
|
|
|
|
|
|
@classmethod
|
2023-03-14 13:48:03 +00:00
|
|
|
def get_soup_of_search(cls, query: str, trie=0) -> Optional[BeautifulSoup]:
|
|
|
|
url = f"https://musify.club/search?searchText={query}"
|
|
|
|
LOGGER.debug(f"Trying to get soup from {url}")
|
|
|
|
try:
|
|
|
|
r = cls.API_SESSION.get(url, timeout=15)
|
|
|
|
except requests.exceptions.Timeout:
|
|
|
|
return None
|
|
|
|
if r.status_code != 200:
|
|
|
|
if r.status_code in [503] and trie < cls.TRIES:
|
|
|
|
LOGGER.warning(f"youtube blocked downloading. ({trie}-{cls.TRIES})")
|
|
|
|
LOGGER.warning(f"retrying in {cls.TIMEOUT} seconds again")
|
|
|
|
time.sleep(cls.TIMEOUT)
|
|
|
|
return cls.get_soup_of_search(query, trie=trie + 1)
|
|
|
|
|
|
|
|
LOGGER.warning("too many tries, returning")
|
|
|
|
return None
|
|
|
|
return BeautifulSoup(r.content, features="html.parser")
|
2023-03-13 14:47:38 +00:00
|
|
|
|
|
|
|
@classmethod
|
2023-03-14 13:48:03 +00:00
|
|
|
def plaintext_search(cls, query: Page.Query) -> List[MusicObject]:
|
|
|
|
search_soup = cls.get_soup_of_search(query=query)
|
|
|
|
if search_soup is None:
|
|
|
|
return None
|
|
|
|
|
|
|
|
# album and songs
|
|
|
|
# child of div class: contacts row
|
|
|
|
for contact_container_soup in search_soup.find_all("div", {"class": ["contacts", "row"]}):
|
|
|
|
pass
|
|
|
|
|
|
|
|
# song
|
|
|
|
# div class: playlist__item
|
|
|
|
for playlist_soup in search_soup.find_all("div", {"class": "playlist"}):
|
|
|
|
pass
|
|
|
|
|
|
|
|
# get the soup of the container with all track results
|
|
|
|
tracklist_container_soup = search_soup.find_all("div", {"class": "playlist"})
|
|
|
|
if len(tracklist_container_soup) == 0:
|
|
|
|
return []
|
|
|
|
if len(tracklist_container_soup) != 1:
|
|
|
|
LOGGER.warning("HTML Layout of https://musify.club/ changed. (or bug)")
|
|
|
|
tracklist_container_soup = tracklist_container_soup[0]
|
|
|
|
|
|
|
|
tracklist_soup = tracklist_container_soup.find_all("div", {"class": "playlist__details"})
|
|
|
|
|
|
|
|
def parse_track_soup(_track_soup):
|
|
|
|
anchor_soups = _track_soup.find_all("a")
|
|
|
|
artist_ = anchor_soups[0].text.strip()
|
|
|
|
track_ = anchor_soups[1].text.strip()
|
|
|
|
url_ = anchor_soups[1]['href']
|
|
|
|
return artist_, track_, url_
|
|
|
|
|
|
|
|
# check each track in the container, if they match
|
|
|
|
for track_soup in tracklist_soup:
|
|
|
|
artist_option, title_option, track_url = parse_track_soup(track_soup)
|
|
|
|
|
|
|
|
title_match, title_distance = phonetic_compares.match_titles(title, title_option)
|
|
|
|
artist_match, artist_distance = phonetic_compares.match_artists(artist, artist_option)
|
|
|
|
|
|
|
|
logging.debug(f"{(title, title_option, title_match, title_distance)}")
|
|
|
|
logging.debug(f"{(artist, artist_option, artist_match, artist_distance)}")
|
|
|
|
|
|
|
|
if not title_match and not artist_match:
|
|
|
|
return cls.get_download_link(track_url)
|
2023-03-13 14:47:38 +00:00
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def fetch_album_details(cls, album: Album, flat: bool = False) -> Album:
|
|
|
|
|
|
|
|
return album
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def fetch_song_details(cls, song: Song, flat: bool = False) -> Song:
|
|
|
|
source_list = song.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
|
|
|
|
if len(source_list) == 0:
|
|
|
|
return song
|
|
|
|
|
|
|
|
"""
|
|
|
|
TODO
|
|
|
|
lyrics
|
|
|
|
"""
|
|
|
|
|
|
|
|
return song
|