added the opportunity to choose the activated audio sources and change theire priority

This commit is contained in:
Lars Noack 2022-11-16 13:21:30 +01:00
parent 5240f3eb34
commit 0cb5cd32e9
7 changed files with 156 additions and 13 deletions

View File

@ -1,5 +1,9 @@
import music_kraken import music_kraken
# from .audio_source.sources.musify import Musify
from .audio_source.sources.youtube import Youtube
if __name__ == "__main__": if __name__ == "__main__":
music_kraken.cli(start_at=0, only_lyrics=False) music_kraken.cli()
# Youtube.fetch_audio({'title': 'dfas', '': '', 'isrc': ''})
# Youtube.fetch_audio({'title': 'dfas', 'url': '', 'file': 'dasf', 'isrc': ''})

View File

@ -13,6 +13,12 @@ from .sources import (
logger = DOWNLOAD_LOGGER logger = DOWNLOAD_LOGGER
# maps the classes to get data from to the source name
sources = {
'Youtube': youtube.Youtube,
'Musify': musify.Musify
}
""" """
https://en.wikipedia.org/wiki/ID3 https://en.wikipedia.org/wiki/ID3
https://mutagen.readthedocs.io/en/latest/user/id3.html https://mutagen.readthedocs.io/en/latest/user/id3.html
@ -35,12 +41,16 @@ class Download:
self.write_metadata(row, row['file']) self.write_metadata(row, row['file'])
continue continue
download_success = Download.download_from_src(row['src'], row)
"""
download_success = None download_success = None
src = row['src'] src = row['src']
if src == 'musify': if src == 'musify':
download_success = musify.download(row) download_success = musify.download(row)
elif src == 'youtube': elif src == 'youtube':
download_success = youtube.download(row) download_success = youtube.download(row)
"""
if download_success == -1: if download_success == -1:
logger.warning(f"couldn't download {row['url']} from {row['src']}") logger.warning(f"couldn't download {row['url']} from {row['src']}")
@ -48,6 +58,14 @@ class Download:
self.write_metadata(row, row['file']) self.write_metadata(row, row['file'])
@staticmethod
def download_from_src(src, row):
if src not in sources:
raise ValueError(f"source {src} seems to not exist")
source_subclass = sources[src]
return source_subclass.fetch_audio(row)
@staticmethod @staticmethod
def write_metadata(row, file_path): def write_metadata(row, file_path):
if not os.path.exists(file_path): if not os.path.exists(file_path):

View File

@ -1,4 +1,5 @@
from ..utils.shared import * from ..utils.shared import *
from .sources import ( from .sources import (
youtube, youtube,
musify, musify,
@ -7,6 +8,12 @@ from .sources import (
logger = URL_DOWNLOAD_LOGGER logger = URL_DOWNLOAD_LOGGER
# maps the classes to get data from to the source name
sources = {
'Youtube': youtube.Youtube,
'Musify': musify.Musify
}
class Download: class Download:
def __init__(self) -> None: def __init__(self) -> None:
@ -28,15 +35,15 @@ class Download:
self.add_url(file_path, 'file', id_) self.add_url(file_path, 'file', id_)
continue continue
""" """
"""
# check YouTube # check YouTube
youtube_url = youtube.get_youtube_url(row) youtube_url = youtube.Youtube.fetch_source(row)
if youtube_url is not None: if youtube_url is not None:
self.add_url(youtube_url, 'youtube', id_) self.add_url(youtube_url, 'youtube', id_)
continue continue
# check musify # check musify
musify_url = musify.get_musify_url(row) musify_url = musify.Musify.fetch_source(row)
if musify_url is not None: if musify_url is not None:
self.add_url(musify_url, 'musify', id_) self.add_url(musify_url, 'musify', id_)
continue continue
@ -46,9 +53,23 @@ class Download:
if musify_url is not None: if musify_url is not None:
self.add_url(musify_url, 'musify', id_) self.add_url(musify_url, 'musify', id_)
continue continue
"""
for src in AUDIO_SOURCES:
res = Download.fetch_from_src(row, src)
if res is not None:
Download.add_url(res, src, id_)
break
logger.warning(f"Didn't find any sources for {row['title']}") logger.warning(f"Didn't find any sources for {row['title']}")
@staticmethod
def fetch_from_src(row: dict, src: str):
if src not in sources:
raise ValueError(f"source {src} seems to not exist")
source_subclass = sources[src]
return source_subclass.fetch_source(row)
@staticmethod @staticmethod
def add_url(url: str, src: str, id_: str): def add_url(url: str, src: str, id_: str):
database.set_download_data(id_, url, src) database.set_download_data(id_, url, src)

View File

@ -1,4 +1,3 @@
import logging
import time import time
import requests import requests
@ -7,6 +6,8 @@ import bs4
from ...utils.shared import * from ...utils.shared import *
from ...utils import phonetic_compares from ...utils import phonetic_compares
from .source import AudioSource
TRIES = 5 TRIES = 5
TIMEOUT = 10 TIMEOUT = 10
@ -19,6 +20,38 @@ session.headers = {
session.proxies = proxies session.proxies = proxies
class Musify(AudioSource):
@classmethod
def fetch_source(cls, row: dict):
super().fetch_source(row)
title = row['title']
artists = row['artists']
url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
try:
r = session.get(url=url)
except requests.exceptions.ConnectionError:
return None
if r.status_code == 200:
autocomplete = r.json()
for row in autocomplete:
if any(a in row['label'] for a in artists) and "/track" in row['url']:
return get_download_link(row['url'])
return None
@classmethod
def fetch_audio(cls, row: dict):
super().fetch_audio(row)
url = row['url']
file_ = row['file']
return download_from_musify(file_, url)
def get_musify_url(row): def get_musify_url(row):
title = row['title'] title = row['title']
artists = row['artists'] artists = row['artists']

View File

@ -1,5 +1,8 @@
from ...utils.shared import *
from typing import Tuple from typing import Tuple
logger = URL_DOWNLOAD_LOGGER
""" """
The class "Source" is the superclass every class for specific audio The class "Source" is the superclass every class for specific audio
sources inherits from. This gives the advantage of a consistent sources inherits from. This gives the advantage of a consistent
@ -7,9 +10,11 @@ calling of the functions do search for a song and to download it.
""" """
class Source: class AudioSource:
def __init__(self): @classmethod
pass def fetch_source(cls, row: dict):
logger.info(f"try getting source {row['title']} from {cls.__name__}")
def get_source(self, row) -> Tuple[str, str]: @classmethod
return "", "" def fetch_audio(cls, row: dict):
logger.info(f"downloading audio from {row['url']} from {cls.__name__} to {row['file']}")

View File

@ -1,10 +1,13 @@
from typing import List from typing import List
import youtube_dl import youtube_dl
import logging
import time import time
from ...utils.shared import *
from ...utils import phonetic_compares from ...utils import phonetic_compares
from .source import AudioSource
logger = YOUTUBE_LOGGER
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'} YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
YOUTUBE_URL_KEY = 'webpage_url' YOUTUBE_URL_KEY = 'webpage_url'
@ -13,6 +16,61 @@ WAIT_BETWEEN_BLOCK = 10
MAX_TRIES = 3 MAX_TRIES = 3
class Youtube(AudioSource):
@classmethod
def fetch_source(cls, row: dict):
super().fetch_source(row)
if row['isrc'] is None:
return None
real_title = row['title'].lower()
final_result = None
results = get_youtube_from_isrc(row['isrc'])
for result in results:
video_title = result['title'].lower()
match, distance = phonetic_compares.match_titles(video_title, real_title)
if match:
logger.warning(
f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.")
continue
final_result = result
if final_result is None:
return None
return final_result['url']
@classmethod
def fetch_audio(cls, row: dict, trie: int=0):
super().fetch_audio(row)
url = row['url']
file_ = row['file']
options = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'keepvideo': False,
'outtmpl': file_
}
try:
with youtube_dl.YoutubeDL(options) as ydl:
ydl.download([url])
except youtube_dl.utils.DownloadError:
logging.warning(f"youtube blocked downloading. ({trie}-{MAX_TRIES})")
if trie >= MAX_TRIES:
logging.warning("too many tries, returning")
logging.warning(f"retrying in {WAIT_BETWEEN_BLOCK} seconds again")
time.sleep(WAIT_BETWEEN_BLOCK)
return Youtube.fetch_audio(row, trie=trie + 1)
def get_youtube_from_isrc(isrc: str) -> List[dict]: def get_youtube_from_isrc(isrc: str) -> List[dict]:
# https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl # https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl
with youtube_dl.YoutubeDL(YDL_OPTIONS) as ydl: with youtube_dl.YoutubeDL(YDL_OPTIONS) as ydl:

View File

@ -14,7 +14,8 @@ DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-d
SEARCH_LOGGER = logging.getLogger("mb-cli") SEARCH_LOGGER = logging.getLogger("mb-cli")
DATABASE_LOGGER = logging.getLogger("database") DATABASE_LOGGER = logging.getLogger("database")
METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download") METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download") URL_DOWNLOAD_LOGGER = logging.getLogger("AudioSource")
YOUTUBE_LOGGER = logging.getLogger("Youtube")
PATH_LOGGER = logging.getLogger("create-paths") PATH_LOGGER = logging.getLogger("create-paths")
DOWNLOAD_LOGGER = logging.getLogger("download") DOWNLOAD_LOGGER = logging.getLogger("download")
LYRICS_LOGGER = logging.getLogger("lyrics") LYRICS_LOGGER = logging.getLogger("lyrics")
@ -35,7 +36,7 @@ database = Database(os.path.join(temp_dir, DATABASE_FILE),
os.path.join(temp_dir, DATABASE_STRUCTURE_FILE), os.path.join(temp_dir, DATABASE_STRUCTURE_FILE),
DATABASE_STRUCTURE_FALLBACK, DATABASE_STRUCTURE_FALLBACK,
DATABASE_LOGGER, DATABASE_LOGGER,
reset_anyways=False) reset_anyways=True)
TOR = False TOR = False
@ -43,3 +44,6 @@ proxies = {
'http': 'socks5h://127.0.0.1:9150', 'http': 'socks5h://127.0.0.1:9150',
'https': 'socks5h://127.0.0.1:9150' 'https': 'socks5h://127.0.0.1:9150'
} if TOR else {} } if TOR else {}
# only the sources here will get downloaded, in the order the list is ordered
AUDIO_SOURCES = ["Youtube", "Musify"]