added the opportunity to choose the activated audio sources and change theire priority

This commit is contained in:
Lars Noack 2022-11-16 13:21:30 +01:00
parent 5240f3eb34
commit 0cb5cd32e9
7 changed files with 156 additions and 13 deletions

View File

@ -1,5 +1,9 @@
import music_kraken
# from .audio_source.sources.musify import Musify
from .audio_source.sources.youtube import Youtube
if __name__ == "__main__":
music_kraken.cli(start_at=0, only_lyrics=False)
music_kraken.cli()
# Youtube.fetch_audio({'title': 'dfas', '': '', 'isrc': ''})
# Youtube.fetch_audio({'title': 'dfas', 'url': '', 'file': 'dasf', 'isrc': ''})

View File

@ -13,6 +13,12 @@ from .sources import (
logger = DOWNLOAD_LOGGER
# maps the classes to get data from to the source name
sources = {
'Youtube': youtube.Youtube,
'Musify': musify.Musify
}
"""
https://en.wikipedia.org/wiki/ID3
https://mutagen.readthedocs.io/en/latest/user/id3.html
@ -35,12 +41,16 @@ class Download:
self.write_metadata(row, row['file'])
continue
download_success = Download.download_from_src(row['src'], row)
"""
download_success = None
src = row['src']
if src == 'musify':
download_success = musify.download(row)
elif src == 'youtube':
download_success = youtube.download(row)
"""
if download_success == -1:
logger.warning(f"couldn't download {row['url']} from {row['src']}")
@ -48,6 +58,14 @@ class Download:
self.write_metadata(row, row['file'])
@staticmethod
def download_from_src(src, row):
if src not in sources:
raise ValueError(f"source {src} seems to not exist")
source_subclass = sources[src]
return source_subclass.fetch_audio(row)
@staticmethod
def write_metadata(row, file_path):
if not os.path.exists(file_path):

View File

@ -1,4 +1,5 @@
from ..utils.shared import *
from .sources import (
youtube,
musify,
@ -7,6 +8,12 @@ from .sources import (
logger = URL_DOWNLOAD_LOGGER
# maps the classes to get data from to the source name
sources = {
'Youtube': youtube.Youtube,
'Musify': musify.Musify
}
class Download:
def __init__(self) -> None:
@ -28,15 +35,15 @@ class Download:
self.add_url(file_path, 'file', id_)
continue
"""
"""
# check YouTube
youtube_url = youtube.get_youtube_url(row)
youtube_url = youtube.Youtube.fetch_source(row)
if youtube_url is not None:
self.add_url(youtube_url, 'youtube', id_)
continue
# check musify
musify_url = musify.get_musify_url(row)
musify_url = musify.Musify.fetch_source(row)
if musify_url is not None:
self.add_url(musify_url, 'musify', id_)
continue
@ -46,9 +53,23 @@ class Download:
if musify_url is not None:
self.add_url(musify_url, 'musify', id_)
continue
"""
for src in AUDIO_SOURCES:
res = Download.fetch_from_src(row, src)
if res is not None:
Download.add_url(res, src, id_)
break
logger.warning(f"Didn't find any sources for {row['title']}")
@staticmethod
def fetch_from_src(row: dict, src: str):
if src not in sources:
raise ValueError(f"source {src} seems to not exist")
source_subclass = sources[src]
return source_subclass.fetch_source(row)
@staticmethod
def add_url(url: str, src: str, id_: str):
database.set_download_data(id_, url, src)

View File

@ -1,4 +1,3 @@
import logging
import time
import requests
@ -7,6 +6,8 @@ import bs4
from ...utils.shared import *
from ...utils import phonetic_compares
from .source import AudioSource
TRIES = 5
TIMEOUT = 10
@ -19,6 +20,38 @@ session.headers = {
session.proxies = proxies
class Musify(AudioSource):
@classmethod
def fetch_source(cls, row: dict):
super().fetch_source(row)
title = row['title']
artists = row['artists']
url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
try:
r = session.get(url=url)
except requests.exceptions.ConnectionError:
return None
if r.status_code == 200:
autocomplete = r.json()
for row in autocomplete:
if any(a in row['label'] for a in artists) and "/track" in row['url']:
return get_download_link(row['url'])
return None
@classmethod
def fetch_audio(cls, row: dict):
super().fetch_audio(row)
url = row['url']
file_ = row['file']
return download_from_musify(file_, url)
def get_musify_url(row):
title = row['title']
artists = row['artists']

View File

@ -1,5 +1,8 @@
from ...utils.shared import *
from typing import Tuple
logger = URL_DOWNLOAD_LOGGER
"""
The class "Source" is the superclass every class for specific audio
sources inherits from. This gives the advantage of a consistent
@ -7,9 +10,11 @@ calling of the functions do search for a song and to download it.
"""
class Source:
def __init__(self):
pass
class AudioSource:
@classmethod
def fetch_source(cls, row: dict):
logger.info(f"try getting source {row['title']} from {cls.__name__}")
def get_source(self, row) -> Tuple[str, str]:
return "", ""
@classmethod
def fetch_audio(cls, row: dict):
logger.info(f"downloading audio from {row['url']} from {cls.__name__} to {row['file']}")

View File

@ -1,10 +1,13 @@
from typing import List
import youtube_dl
import logging
import time
from ...utils.shared import *
from ...utils import phonetic_compares
from .source import AudioSource
logger = YOUTUBE_LOGGER
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
YOUTUBE_URL_KEY = 'webpage_url'
@ -13,6 +16,61 @@ WAIT_BETWEEN_BLOCK = 10
MAX_TRIES = 3
class Youtube(AudioSource):
@classmethod
def fetch_source(cls, row: dict):
super().fetch_source(row)
if row['isrc'] is None:
return None
real_title = row['title'].lower()
final_result = None
results = get_youtube_from_isrc(row['isrc'])
for result in results:
video_title = result['title'].lower()
match, distance = phonetic_compares.match_titles(video_title, real_title)
if match:
logger.warning(
f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.")
continue
final_result = result
if final_result is None:
return None
return final_result['url']
@classmethod
def fetch_audio(cls, row: dict, trie: int=0):
super().fetch_audio(row)
url = row['url']
file_ = row['file']
options = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'keepvideo': False,
'outtmpl': file_
}
try:
with youtube_dl.YoutubeDL(options) as ydl:
ydl.download([url])
except youtube_dl.utils.DownloadError:
logging.warning(f"youtube blocked downloading. ({trie}-{MAX_TRIES})")
if trie >= MAX_TRIES:
logging.warning("too many tries, returning")
logging.warning(f"retrying in {WAIT_BETWEEN_BLOCK} seconds again")
time.sleep(WAIT_BETWEEN_BLOCK)
return Youtube.fetch_audio(row, trie=trie + 1)
def get_youtube_from_isrc(isrc: str) -> List[dict]:
# https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl
with youtube_dl.YoutubeDL(YDL_OPTIONS) as ydl:

View File

@ -14,7 +14,8 @@ DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-d
SEARCH_LOGGER = logging.getLogger("mb-cli")
DATABASE_LOGGER = logging.getLogger("database")
METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
URL_DOWNLOAD_LOGGER = logging.getLogger("AudioSource")
YOUTUBE_LOGGER = logging.getLogger("Youtube")
PATH_LOGGER = logging.getLogger("create-paths")
DOWNLOAD_LOGGER = logging.getLogger("download")
LYRICS_LOGGER = logging.getLogger("lyrics")
@ -35,7 +36,7 @@ database = Database(os.path.join(temp_dir, DATABASE_FILE),
os.path.join(temp_dir, DATABASE_STRUCTURE_FILE),
DATABASE_STRUCTURE_FALLBACK,
DATABASE_LOGGER,
reset_anyways=False)
reset_anyways=True)
TOR = False
@ -43,3 +44,6 @@ proxies = {
'http': 'socks5h://127.0.0.1:9150',
'https': 'socks5h://127.0.0.1:9150'
} if TOR else {}
# only the sources here will get downloaded, in the order the list is ordered
AUDIO_SOURCES = ["Youtube", "Musify"]