made the fetching from musify more consistent
This commit is contained in:
parent
a830e4eb61
commit
1be7f6e337
@ -12,10 +12,10 @@ session.headers = {
|
|||||||
|
|
||||||
|
|
||||||
def get_musify_url(row):
|
def get_musify_url(row):
|
||||||
title = row['title']
|
title = row.title
|
||||||
artists = row['artist']
|
artists = row.artist
|
||||||
|
|
||||||
url = f"https://musify.club/search/suggestions?term={title}"
|
url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
|
||||||
|
|
||||||
r = session.get(url=url)
|
r = session.get(url=url)
|
||||||
if r.status_code == 200:
|
if r.status_code == 200:
|
||||||
@ -59,15 +59,19 @@ def download(row):
|
|||||||
return download_from_musify(file_, url)
|
return download_from_musify(file_, url)
|
||||||
|
|
||||||
|
|
||||||
|
def get_soup_of_search(query: str):
|
||||||
|
url = f"https://musify.club/search?searchText={query}"
|
||||||
|
print(url)
|
||||||
|
r = session.get(url)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise ConnectionError(f"{r.url} returned {r.status_code}:\n{r.content}")
|
||||||
|
return bs4.BeautifulSoup(r.content, features="html.parser")
|
||||||
|
|
||||||
def search_for_track(row):
|
def search_for_track(row):
|
||||||
track = row.title
|
track = row.title
|
||||||
artist = row.artist
|
artist = row.artist
|
||||||
|
|
||||||
url = f"https://musify.club/search?searchText={track}"
|
soup = get_soup_of_search(f"{artist[0]} - {track}")
|
||||||
r = session.get(url)
|
|
||||||
if r.status_code != 200:
|
|
||||||
raise ConnectionError(f"{r.url} returned {r.status_code}:\n{r.content}")
|
|
||||||
soup = bs4.BeautifulSoup(r.content, features="html.parser")
|
|
||||||
tracklist_container_soup = soup.find_all("div", {"class": "playlist"})
|
tracklist_container_soup = soup.find_all("div", {"class": "playlist"})
|
||||||
if len(tracklist_container_soup) != 1:
|
if len(tracklist_container_soup) != 1:
|
||||||
raise Exception("Connfusion Error. HTML Layout of https://musify.club changed.")
|
raise Exception("Connfusion Error. HTML Layout of https://musify.club changed.")
|
||||||
@ -98,7 +102,6 @@ def search_for_track(row):
|
|||||||
|
|
||||||
|
|
||||||
def get_musify_url_slow(row):
|
def get_musify_url_slow(row):
|
||||||
print(row)
|
|
||||||
result = search_for_track(row)
|
result = search_for_track(row)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
return result
|
return result
|
||||||
@ -109,9 +112,11 @@ if __name__ == "__main__":
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
df = pd.read_csv("../temp/.cache1.csv")
|
df = pd.read_csv("../temp/.cache1.csv")
|
||||||
print(df)
|
|
||||||
|
|
||||||
for idx, row in df.iterrows():
|
for idx, row in df.iterrows():
|
||||||
row['artist'] = json.loads(row['artist'].replace("'", '"'))
|
row['artist'] = json.loads(row['artist'].replace("'", '"'))
|
||||||
print("-" * 200)
|
print("-" * 200)
|
||||||
|
print("fast")
|
||||||
|
print(get_musify_url(row))
|
||||||
|
print("slow")
|
||||||
print(get_musify_url_slow(row))
|
print(get_musify_url_slow(row))
|
||||||
|
@ -5,7 +5,7 @@ TITLE_THRESHOLD_LEVENSHTEIN = 2
|
|||||||
|
|
||||||
def match_titles(title_1: str, title_2: str) -> (bool, int):
|
def match_titles(title_1: str, title_2: str) -> (bool, int):
|
||||||
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
||||||
return distance > 1, distance
|
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
|
||||||
|
|
||||||
|
|
||||||
def match_artists(artist_1, artist_2: str) -> (bool, int):
|
def match_artists(artist_1, artist_2: str) -> (bool, int):
|
||||||
|
Loading…
Reference in New Issue
Block a user