implemented the file and folder paths in the db

This commit is contained in:
lars 2022-10-31 00:56:32 +01:00
parent f16033b584
commit 9f05d43022
3 changed files with 19 additions and 28 deletions

View File

@ -1,57 +1,45 @@
import json
import os.path
import requests
import logging
import musify
import youtube_music
from metadata import database
class Download:
def __init__(self, metadata_csv: str = ".cache1.csv", proxies: dict = None,
file: str = ".cache2.csv", temp: str = "temp") -> None:
def __init__(self, metadata_csv: str = ".cache1.csv", proxies: dict = None) -> None:
if proxies is not None:
musify.set_proxy(proxies)
self.temp = temp
self.metadata = pd.read_csv(os.path.join(self.temp, metadata_csv), index_col=0)
self.urls = []
for idx, row in self.metadata.iterrows():
row['artist'] = json.loads(row['artist'].replace("'", '"'))
for row in database.get_tracks_to_download():
row['artists'] = [artist['name'] for artist in row['artists']]
id_ = row['id']
# check musify
musify_url = musify.get_musify_url(row)
if musify_url is not None:
self.add_url(musify_url, 'musify', dict(row))
self.add_url(musify_url, 'musify', id_)
continue
# check YouTube
youtube_url = youtube_music.get_youtube_url(row)
if youtube_url is not None:
self.add_url(youtube_url, 'youtube', dict(row))
self.add_url(youtube_url, 'youtube', id_)
continue
# check musify again, but with a different methode that takes longer
musify_url = musify.get_musify_url_slow(row)
if musify_url is not None:
self.add_url(musify_url, 'musify', dict(row))
self.add_url(musify_url, 'musify', id_)
continue
logging.warning(f"Didn't find any sources for {row['title']}")
self.dump_urls(file)
def add_url(self, url: str, src: str, row: dict):
row['url'] = url
row['src'] = src
self.urls.append(row)
def dump_urls(self, file: str = ".cache2.csv"):
df = pd.DataFrame(self.urls)
df.to_csv(os.path.join(self.temp, file))
def add_url(self, url: str, src: str, id_: str):
database.set_download_data(id_, url, src)
if __name__ == "__main__":
@ -62,4 +50,4 @@ if __name__ == "__main__":
s = requests.Session()
s.proxies = proxies
download = Download(session=s)
download = Download()

View File

@ -198,6 +198,9 @@ def get_tracks_to_download():
return get_custom_track(["track.downloaded == 0"])
def get_tracks_without_src():
return get_custom_track(["(track.url IS NULL OR track.src IS NULL)"])
def get_tracks_without_isrc():
return get_custom_track(["track.isrc IS NULL"])

View File

@ -16,8 +16,8 @@ def set_proxy(proxies):
def get_musify_url(row):
title = row.title
artists = row.artist
title = row['title']
artists = row['artists']
url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
@ -74,8 +74,8 @@ def get_soup_of_search(query: str):
def search_for_track(row):
track = row.title
artist = row.artist
track = row['title']
artist = row['artists']
soup = get_soup_of_search(f"{artist[0]} - {track}")
tracklist_container_soup = soup.find_all("div", {"class": "playlist"})