implemented the file and folder paths in the db
This commit is contained in:
parent
f16033b584
commit
9f05d43022
@ -1,57 +1,45 @@
|
||||
import json
|
||||
import os.path
|
||||
import requests
|
||||
import logging
|
||||
|
||||
import musify
|
||||
import youtube_music
|
||||
from metadata import database
|
||||
|
||||
|
||||
class Download:
|
||||
def __init__(self, metadata_csv: str = ".cache1.csv", proxies: dict = None,
|
||||
file: str = ".cache2.csv", temp: str = "temp") -> None:
|
||||
def __init__(self, metadata_csv: str = ".cache1.csv", proxies: dict = None) -> None:
|
||||
if proxies is not None:
|
||||
musify.set_proxy(proxies)
|
||||
|
||||
self.temp = temp
|
||||
self.metadata = pd.read_csv(os.path.join(self.temp, metadata_csv), index_col=0)
|
||||
|
||||
self.urls = []
|
||||
|
||||
for idx, row in self.metadata.iterrows():
|
||||
row['artist'] = json.loads(row['artist'].replace("'", '"'))
|
||||
for row in database.get_tracks_to_download():
|
||||
row['artists'] = [artist['name'] for artist in row['artists']]
|
||||
|
||||
id_ = row['id']
|
||||
|
||||
# check musify
|
||||
musify_url = musify.get_musify_url(row)
|
||||
if musify_url is not None:
|
||||
self.add_url(musify_url, 'musify', dict(row))
|
||||
self.add_url(musify_url, 'musify', id_)
|
||||
continue
|
||||
|
||||
# check YouTube
|
||||
youtube_url = youtube_music.get_youtube_url(row)
|
||||
if youtube_url is not None:
|
||||
self.add_url(youtube_url, 'youtube', dict(row))
|
||||
self.add_url(youtube_url, 'youtube', id_)
|
||||
continue
|
||||
|
||||
# check musify again, but with a different methode that takes longer
|
||||
musify_url = musify.get_musify_url_slow(row)
|
||||
if musify_url is not None:
|
||||
self.add_url(musify_url, 'musify', dict(row))
|
||||
self.add_url(musify_url, 'musify', id_)
|
||||
continue
|
||||
|
||||
logging.warning(f"Didn't find any sources for {row['title']}")
|
||||
|
||||
self.dump_urls(file)
|
||||
|
||||
def add_url(self, url: str, src: str, row: dict):
|
||||
row['url'] = url
|
||||
row['src'] = src
|
||||
|
||||
self.urls.append(row)
|
||||
|
||||
def dump_urls(self, file: str = ".cache2.csv"):
|
||||
df = pd.DataFrame(self.urls)
|
||||
df.to_csv(os.path.join(self.temp, file))
|
||||
def add_url(self, url: str, src: str, id_: str):
|
||||
database.set_download_data(id_, url, src)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -62,4 +50,4 @@ if __name__ == "__main__":
|
||||
|
||||
s = requests.Session()
|
||||
s.proxies = proxies
|
||||
download = Download(session=s)
|
||||
download = Download()
|
||||
|
@ -198,6 +198,9 @@ def get_tracks_to_download():
|
||||
return get_custom_track(["track.downloaded == 0"])
|
||||
|
||||
|
||||
def get_tracks_without_src():
|
||||
return get_custom_track(["(track.url IS NULL OR track.src IS NULL)"])
|
||||
|
||||
def get_tracks_without_isrc():
|
||||
return get_custom_track(["track.isrc IS NULL"])
|
||||
|
||||
|
@ -16,8 +16,8 @@ def set_proxy(proxies):
|
||||
|
||||
|
||||
def get_musify_url(row):
|
||||
title = row.title
|
||||
artists = row.artist
|
||||
title = row['title']
|
||||
artists = row['artists']
|
||||
|
||||
url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
|
||||
|
||||
@ -74,8 +74,8 @@ def get_soup_of_search(query: str):
|
||||
|
||||
|
||||
def search_for_track(row):
|
||||
track = row.title
|
||||
artist = row.artist
|
||||
track = row['title']
|
||||
artist = row['artists']
|
||||
|
||||
soup = get_soup_of_search(f"{artist[0]} - {track}")
|
||||
tracklist_container_soup = soup.find_all("div", {"class": "playlist"})
|
||||
|
Loading…
Reference in New Issue
Block a user