implemented the file and folder paths in the db
This commit is contained in:
parent
f16033b584
commit
9f05d43022
@ -1,57 +1,45 @@
|
|||||||
import json
|
|
||||||
import os.path
|
|
||||||
import requests
|
import requests
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
import musify
|
import musify
|
||||||
import youtube_music
|
import youtube_music
|
||||||
|
from metadata import database
|
||||||
|
|
||||||
|
|
||||||
class Download:
|
class Download:
|
||||||
def __init__(self, metadata_csv: str = ".cache1.csv", proxies: dict = None,
|
def __init__(self, metadata_csv: str = ".cache1.csv", proxies: dict = None) -> None:
|
||||||
file: str = ".cache2.csv", temp: str = "temp") -> None:
|
|
||||||
if proxies is not None:
|
if proxies is not None:
|
||||||
musify.set_proxy(proxies)
|
musify.set_proxy(proxies)
|
||||||
|
|
||||||
self.temp = temp
|
|
||||||
self.metadata = pd.read_csv(os.path.join(self.temp, metadata_csv), index_col=0)
|
|
||||||
|
|
||||||
self.urls = []
|
self.urls = []
|
||||||
|
|
||||||
for idx, row in self.metadata.iterrows():
|
for row in database.get_tracks_to_download():
|
||||||
row['artist'] = json.loads(row['artist'].replace("'", '"'))
|
row['artists'] = [artist['name'] for artist in row['artists']]
|
||||||
|
|
||||||
|
id_ = row['id']
|
||||||
|
|
||||||
# check musify
|
# check musify
|
||||||
musify_url = musify.get_musify_url(row)
|
musify_url = musify.get_musify_url(row)
|
||||||
if musify_url is not None:
|
if musify_url is not None:
|
||||||
self.add_url(musify_url, 'musify', dict(row))
|
self.add_url(musify_url, 'musify', id_)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# check YouTube
|
# check YouTube
|
||||||
youtube_url = youtube_music.get_youtube_url(row)
|
youtube_url = youtube_music.get_youtube_url(row)
|
||||||
if youtube_url is not None:
|
if youtube_url is not None:
|
||||||
self.add_url(youtube_url, 'youtube', dict(row))
|
self.add_url(youtube_url, 'youtube', id_)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# check musify again, but with a different methode that takes longer
|
# check musify again, but with a different methode that takes longer
|
||||||
musify_url = musify.get_musify_url_slow(row)
|
musify_url = musify.get_musify_url_slow(row)
|
||||||
if musify_url is not None:
|
if musify_url is not None:
|
||||||
self.add_url(musify_url, 'musify', dict(row))
|
self.add_url(musify_url, 'musify', id_)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logging.warning(f"Didn't find any sources for {row['title']}")
|
logging.warning(f"Didn't find any sources for {row['title']}")
|
||||||
|
|
||||||
self.dump_urls(file)
|
def add_url(self, url: str, src: str, id_: str):
|
||||||
|
database.set_download_data(id_, url, src)
|
||||||
def add_url(self, url: str, src: str, row: dict):
|
|
||||||
row['url'] = url
|
|
||||||
row['src'] = src
|
|
||||||
|
|
||||||
self.urls.append(row)
|
|
||||||
|
|
||||||
def dump_urls(self, file: str = ".cache2.csv"):
|
|
||||||
df = pd.DataFrame(self.urls)
|
|
||||||
df.to_csv(os.path.join(self.temp, file))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -62,4 +50,4 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
s.proxies = proxies
|
s.proxies = proxies
|
||||||
download = Download(session=s)
|
download = Download()
|
||||||
|
@ -198,6 +198,9 @@ def get_tracks_to_download():
|
|||||||
return get_custom_track(["track.downloaded == 0"])
|
return get_custom_track(["track.downloaded == 0"])
|
||||||
|
|
||||||
|
|
||||||
|
def get_tracks_without_src():
|
||||||
|
return get_custom_track(["(track.url IS NULL OR track.src IS NULL)"])
|
||||||
|
|
||||||
def get_tracks_without_isrc():
|
def get_tracks_without_isrc():
|
||||||
return get_custom_track(["track.isrc IS NULL"])
|
return get_custom_track(["track.isrc IS NULL"])
|
||||||
|
|
||||||
|
@ -16,8 +16,8 @@ def set_proxy(proxies):
|
|||||||
|
|
||||||
|
|
||||||
def get_musify_url(row):
|
def get_musify_url(row):
|
||||||
title = row.title
|
title = row['title']
|
||||||
artists = row.artist
|
artists = row['artists']
|
||||||
|
|
||||||
url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
|
url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
|
||||||
|
|
||||||
@ -74,8 +74,8 @@ def get_soup_of_search(query: str):
|
|||||||
|
|
||||||
|
|
||||||
def search_for_track(row):
|
def search_for_track(row):
|
||||||
track = row.title
|
track = row['title']
|
||||||
artist = row.artist
|
artist = row['artists']
|
||||||
|
|
||||||
soup = get_soup_of_search(f"{artist[0]} - {track}")
|
soup = get_soup_of_search(f"{artist[0]} - {track}")
|
||||||
tracklist_container_soup = soup.find_all("div", {"class": "playlist"})
|
tracklist_container_soup = soup.find_all("div", {"class": "playlist"})
|
||||||
|
Loading…
Reference in New Issue
Block a user