added youtube as music src
This commit is contained in:
BIN
src/__pycache__/musify.cpython-310.pyc
Normal file
BIN
src/__pycache__/musify.cpython-310.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/youtube_music.cpython-310.pyc
Normal file
BIN
src/__pycache__/youtube_music.cpython-310.pyc
Normal file
Binary file not shown.
@@ -3,9 +3,11 @@ import os.path
|
||||
import pandas as pd
|
||||
from mutagen.easyid3 import EasyID3
|
||||
import json
|
||||
|
||||
import logging
|
||||
|
||||
import musify
|
||||
import youtube_music
|
||||
|
||||
"""
|
||||
https://en.wikipedia.org/wiki/ID3
|
||||
https://mutagen.readthedocs.io/en/latest/user/id3.html
|
||||
@@ -87,15 +89,25 @@ class Download:
|
||||
|
||||
for idx, row in self.dataframe.iterrows():
|
||||
row['artist'] = json.loads(row['artist'].replace("'", '"'))
|
||||
self.download(row['path'], row['file'], row['url'])
|
||||
if self.path_stuff(row['path'], row['file']):
|
||||
continue
|
||||
|
||||
src = row['src']
|
||||
if src == 'musify':
|
||||
self.download_from_musify(row['path'], row['file'], row['url'])
|
||||
elif src == 'youtube':
|
||||
youtube_music.download(row)
|
||||
self.write_metadata(row, row['file'])
|
||||
|
||||
def download(self, path, file, url):
|
||||
if os.path.exists(file):
|
||||
logging.info(f"'{file}' does already exist, thus not downloading.")
|
||||
return
|
||||
def path_stuff(self, path: str, file_: str):
|
||||
# returns true if it shouldn't be downloaded
|
||||
if os.path.exists(file_):
|
||||
logging.info(f"'{file_}' does already exist, thus not downloading.")
|
||||
return True
|
||||
os.makedirs(path, exist_ok=True)
|
||||
return False
|
||||
|
||||
def download_from_musify(self, path, file, url):
|
||||
logging.info(f"downloading: '{url}'")
|
||||
r = self.session.get(url)
|
||||
if r.status_code != 200:
|
||||
@@ -118,16 +130,6 @@ class Download:
|
||||
row[key] = str(row[key])
|
||||
audiofile[key] = row[key]
|
||||
|
||||
"""
|
||||
audiofile["artist"] = row['artist']
|
||||
audiofile["albumartist"] = row['album_artist']
|
||||
audiofile["date"] = str(row['year'])
|
||||
audiofile["genre"] = row['genre']
|
||||
audiofile["title"] = row['title']
|
||||
audiofile["album"] = row['album']
|
||||
audiofile["tracknumber"] = str(row['track'])
|
||||
"""
|
||||
|
||||
audiofile.save()
|
||||
|
||||
|
||||
|
@@ -1,69 +1,47 @@
|
||||
import json
|
||||
import os.path
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
import logging
|
||||
|
||||
import musify
|
||||
import youtube_music
|
||||
|
||||
|
||||
class Download:
|
||||
def __init__(self, metadata_csv: str = ".cache1.csv", session: requests.Session = requests.Session(),
|
||||
file: str = ".cache2.csv", temp: str = "temp") -> None:
|
||||
self.temp = temp
|
||||
|
||||
self.session = session
|
||||
self.session.headers = {
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://musify.club/"
|
||||
}
|
||||
|
||||
self.metadata = pd.read_csv(os.path.join(self.temp, metadata_csv), index_col=0)
|
||||
|
||||
self.urls = []
|
||||
missing_urls, self.urls = self.check_musify()
|
||||
|
||||
self.dump_urls(file)
|
||||
|
||||
def check_musify_track(self, row):
|
||||
artist = json.loads(row['artist'].replace("'", '"'))
|
||||
track = row['title']
|
||||
|
||||
url = f"https://musify.club/search/suggestions?term={track}"
|
||||
|
||||
r = self.session.get(url=url)
|
||||
if r.status_code == 200:
|
||||
autocomplete = r.json()
|
||||
for row in autocomplete:
|
||||
if any(a in row['label'] for a in artist) and "/track" in row['url']:
|
||||
return row
|
||||
|
||||
return None
|
||||
|
||||
def check_musify(self, urls: list = []):
|
||||
missing_urls = []
|
||||
|
||||
def get_download_link(default_url):
|
||||
# https://musify.club/track/dl/18567672/rauw-alejandro-te-felicito-feat-shakira.mp3
|
||||
# /track/sundenklang-wenn-mein-herz-schreit-3883217'
|
||||
|
||||
file_ = default_url.split("/")[-1]
|
||||
musify_id = file_.split("-")[-1]
|
||||
musify_name = "-".join(file_.split("-")[:-1])
|
||||
|
||||
logging.info(f"https://musify.club/track/dl/{musify_id}/{musify_name}.mp3")
|
||||
|
||||
return f"https://musify.club/track/dl/{musify_id}/{musify_name}.mp3"
|
||||
|
||||
for idx, row in self.metadata.iterrows():
|
||||
url = self.check_musify_track(row)
|
||||
if url is None:
|
||||
missing_urls.append(row['id'])
|
||||
row['artist'] = json.loads(row['artist'].replace("'", '"'))
|
||||
|
||||
# check musify
|
||||
musify_url = musify.get_musify_url(row)
|
||||
if musify_url is not None:
|
||||
self.add_url(musify_url, 'musify', dict(row))
|
||||
continue
|
||||
data = dict(row)
|
||||
data['url'] = get_download_link(url['url'])
|
||||
urls.append(data)
|
||||
|
||||
return missing_urls, urls
|
||||
# check youtube
|
||||
youtube_url = youtube_music.get_youtube_url(row)
|
||||
if youtube_url is not None:
|
||||
self.add_url(youtube_url, 'youtube', dict(row))
|
||||
continue
|
||||
|
||||
logging.warning(f"Didn't find any sources for {row['title']}")
|
||||
|
||||
self.dump_urls(file)
|
||||
|
||||
|
||||
def add_url(self, url: str, src: str, row: dict):
|
||||
row['url'] = url
|
||||
row['src'] = src
|
||||
|
||||
self.urls.append(row)
|
||||
|
||||
|
||||
def dump_urls(self, file: str = ".cache2.csv"):
|
||||
df = pd.DataFrame(self.urls)
|
||||
|
35
src/musify.py
Normal file
35
src/musify.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import logging
|
||||
import requests
|
||||
|
||||
session = requests.Session()
|
||||
session.headers = {
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://musify.club/"
|
||||
}
|
||||
|
||||
def get_musify_url(row):
|
||||
title = row['title']
|
||||
artists = row['artist']
|
||||
|
||||
url = f"https://musify.club/search/suggestions?term={title}"
|
||||
|
||||
r = session.get(url=url)
|
||||
if r.status_code == 200:
|
||||
autocomplete = r.json()
|
||||
for row in autocomplete:
|
||||
if any(a in row['label'] for a in artists) and "/track" in row['url']:
|
||||
return get_download_link(row['url'])
|
||||
|
||||
return None
|
||||
|
||||
def get_download_link(default_url):
|
||||
# https://musify.club/track/dl/18567672/rauw-alejandro-te-felicito-feat-shakira.mp3
|
||||
# /track/sundenklang-wenn-mein-herz-schreit-3883217'
|
||||
|
||||
file_ = default_url.split("/")[-1]
|
||||
musify_id = file_.split("-")[-1]
|
||||
musify_name = "-".join(file_.split("-")[:-1])
|
||||
|
||||
logging.info(f"https://musify.club/track/dl/{musify_id}/{musify_name}.mp3")
|
||||
|
||||
return f"https://musify.club/track/dl/{musify_id}/{musify_name}.mp3"
|
56
src/youtube_music.py
Normal file
56
src/youtube_music.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import youtube_dl
|
||||
import pandas as pd
|
||||
import jellyfish
|
||||
import logging
|
||||
|
||||
|
||||
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist':'True'}
|
||||
YOUTUBE_URL_KEY = 'webpage_url'
|
||||
|
||||
def get_youtube_from_isrc(isrc: str):
|
||||
# https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl
|
||||
with youtube_dl.YoutubeDL(YDL_OPTIONS) as ydl:
|
||||
video = ydl.extract_info(f"ytsearch:{isrc}", download=False)['entries'][0]
|
||||
print(type(video))
|
||||
if YOUTUBE_URL_KEY not in video:
|
||||
return None
|
||||
return {
|
||||
'url': video[YOUTUBE_URL_KEY],
|
||||
'title': video['title']
|
||||
}
|
||||
|
||||
def get_youtube_url(row):
|
||||
if pd.isna(row['isrc']):
|
||||
return None
|
||||
real_title = row['title'].lower()
|
||||
|
||||
result = get_youtube_from_isrc(row['isrc'])
|
||||
video_title = result['title'].lower()
|
||||
|
||||
phonetic_distance = jellyfish.levenshtein_distance(real_title, video_title)
|
||||
|
||||
print(real_title, video_title, phonetic_distance)
|
||||
if phonetic_distance > 1:
|
||||
logging.warning(f"dont downloading {result['url']} cuz the phonetic distance ({phonetic_distance}) between {real_title} and {video_title} is to high.")
|
||||
return None
|
||||
return result['url']
|
||||
|
||||
def download(row):
|
||||
url = row['url']
|
||||
file_ = row['file']
|
||||
options = {
|
||||
'format': 'bestaudio/best',
|
||||
'keepvideo': False,
|
||||
'outtmpl': file_
|
||||
}
|
||||
|
||||
with youtube_dl.YoutubeDL(options) as ydl:
|
||||
ydl.download([url])
|
||||
|
||||
if __name__ == "__main__":
|
||||
# example isrc that exists on youtube music
|
||||
ISRC = "DEUM71500715"
|
||||
result = get_youtube_from_isrc(ISRC)
|
||||
print(result)
|
||||
result = get_youtube_from_isrc("aslhfklasdhfjklasdfjkhasdjlfhlasdjfkuuiueiw")
|
||||
print(result)
|
Reference in New Issue
Block a user