diff --git a/src/download_links.py b/src/download_links.py index 1e196d2..d9fceb8 100644 --- a/src/download_links.py +++ b/src/download_links.py @@ -1,57 +1,45 @@ -import json -import os.path import requests import logging import musify import youtube_music +from metadata import database class Download: - def __init__(self, metadata_csv: str = ".cache1.csv", proxies: dict = None, - file: str = ".cache2.csv", temp: str = "temp") -> None: + def __init__(self, metadata_csv: str = ".cache1.csv", proxies: dict = None) -> None: if proxies is not None: musify.set_proxy(proxies) - self.temp = temp - self.metadata = pd.read_csv(os.path.join(self.temp, metadata_csv), index_col=0) - self.urls = [] - for idx, row in self.metadata.iterrows(): - row['artist'] = json.loads(row['artist'].replace("'", '"')) + for row in database.get_tracks_to_download(): + row['artists'] = [artist['name'] for artist in row['artists']] + + id_ = row['id'] # check musify musify_url = musify.get_musify_url(row) if musify_url is not None: - self.add_url(musify_url, 'musify', dict(row)) + self.add_url(musify_url, 'musify', id_) continue # check YouTube youtube_url = youtube_music.get_youtube_url(row) if youtube_url is not None: - self.add_url(youtube_url, 'youtube', dict(row)) + self.add_url(youtube_url, 'youtube', id_) continue # check musify again, but with a different methode that takes longer musify_url = musify.get_musify_url_slow(row) if musify_url is not None: - self.add_url(musify_url, 'musify', dict(row)) + self.add_url(musify_url, 'musify', id_) continue logging.warning(f"Didn't find any sources for {row['title']}") - self.dump_urls(file) - - def add_url(self, url: str, src: str, row: dict): - row['url'] = url - row['src'] = src - - self.urls.append(row) - - def dump_urls(self, file: str = ".cache2.csv"): - df = pd.DataFrame(self.urls) - df.to_csv(os.path.join(self.temp, file)) + def add_url(self, url: str, src: str, id_: str): + database.set_download_data(id_, url, src) if __name__ == "__main__": @@ -62,4 +50,4 @@ if __name__ == "__main__": s = requests.Session() s.proxies = proxies - download = Download(session=s) + download = Download() diff --git a/src/metadata/database.py b/src/metadata/database.py index 9c42439..2063ffa 100644 --- a/src/metadata/database.py +++ b/src/metadata/database.py @@ -198,6 +198,9 @@ def get_tracks_to_download(): return get_custom_track(["track.downloaded == 0"]) +def get_tracks_without_src(): + return get_custom_track(["(track.url IS NULL OR track.src IS NULL)"]) + def get_tracks_without_isrc(): return get_custom_track(["track.isrc IS NULL"]) diff --git a/src/musify.py b/src/musify.py index 5a652e1..69d534a 100644 --- a/src/musify.py +++ b/src/musify.py @@ -16,8 +16,8 @@ def set_proxy(proxies): def get_musify_url(row): - title = row.title - artists = row.artist + title = row['title'] + artists = row['artists'] url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}" @@ -74,8 +74,8 @@ def get_soup_of_search(query: str): def search_for_track(row): - track = row.title - artist = row.artist + track = row['title'] + artist = row['artists'] soup = get_soup_of_search(f"{artist[0]} - {track}") tracklist_container_soup = soup.find_all("div", {"class": "playlist"})