From 0cb5cd32e94680e7d2839d28975e309692ab91c4 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 16 Nov 2022 13:21:30 +0100 Subject: [PATCH] added the opportunity to choose the activated audio sources and change theire priority --- src/music_kraken/__main__.py | 6 +- src/music_kraken/audio_source/fetch_audio.py | 18 ++++++ src/music_kraken/audio_source/fetch_source.py | 27 ++++++++- .../audio_source/sources/musify.py | 35 ++++++++++- .../audio_source/sources/source.py | 15 +++-- .../audio_source/sources/youtube.py | 60 ++++++++++++++++++- src/music_kraken/utils/shared.py | 8 ++- 7 files changed, 156 insertions(+), 13 deletions(-) diff --git a/src/music_kraken/__main__.py b/src/music_kraken/__main__.py index 14c1772..913e44d 100644 --- a/src/music_kraken/__main__.py +++ b/src/music_kraken/__main__.py @@ -1,5 +1,9 @@ import music_kraken +# from .audio_source.sources.musify import Musify +from .audio_source.sources.youtube import Youtube if __name__ == "__main__": - music_kraken.cli(start_at=0, only_lyrics=False) + music_kraken.cli() + # Youtube.fetch_audio({'title': 'dfas', '': '', 'isrc': ''}) + # Youtube.fetch_audio({'title': 'dfas', 'url': '', 'file': 'dasf', 'isrc': ''}) diff --git a/src/music_kraken/audio_source/fetch_audio.py b/src/music_kraken/audio_source/fetch_audio.py index 337860d..c372e87 100644 --- a/src/music_kraken/audio_source/fetch_audio.py +++ b/src/music_kraken/audio_source/fetch_audio.py @@ -13,6 +13,12 @@ from .sources import ( logger = DOWNLOAD_LOGGER +# maps the classes to get data from to the source name +sources = { + 'Youtube': youtube.Youtube, + 'Musify': musify.Musify +} + """ https://en.wikipedia.org/wiki/ID3 https://mutagen.readthedocs.io/en/latest/user/id3.html @@ -35,12 +41,16 @@ class Download: self.write_metadata(row, row['file']) continue + download_success = Download.download_from_src(row['src'], row) + + """ download_success = None src = row['src'] if src == 'musify': download_success = musify.download(row) elif src == 'youtube': download_success = youtube.download(row) + """ if download_success == -1: logger.warning(f"couldn't download {row['url']} from {row['src']}") @@ -48,6 +58,14 @@ class Download: self.write_metadata(row, row['file']) + @staticmethod + def download_from_src(src, row): + if src not in sources: + raise ValueError(f"source {src} seems to not exist") + source_subclass = sources[src] + + return source_subclass.fetch_audio(row) + @staticmethod def write_metadata(row, file_path): if not os.path.exists(file_path): diff --git a/src/music_kraken/audio_source/fetch_source.py b/src/music_kraken/audio_source/fetch_source.py index 5c57bee..e241df0 100644 --- a/src/music_kraken/audio_source/fetch_source.py +++ b/src/music_kraken/audio_source/fetch_source.py @@ -1,4 +1,5 @@ from ..utils.shared import * + from .sources import ( youtube, musify, @@ -7,6 +8,12 @@ from .sources import ( logger = URL_DOWNLOAD_LOGGER +# maps the classes to get data from to the source name +sources = { + 'Youtube': youtube.Youtube, + 'Musify': musify.Musify +} + class Download: def __init__(self) -> None: @@ -28,15 +35,15 @@ class Download: self.add_url(file_path, 'file', id_) continue """ - + """ # check YouTube - youtube_url = youtube.get_youtube_url(row) + youtube_url = youtube.Youtube.fetch_source(row) if youtube_url is not None: self.add_url(youtube_url, 'youtube', id_) continue # check musify - musify_url = musify.get_musify_url(row) + musify_url = musify.Musify.fetch_source(row) if musify_url is not None: self.add_url(musify_url, 'musify', id_) continue @@ -46,9 +53,23 @@ class Download: if musify_url is not None: self.add_url(musify_url, 'musify', id_) continue + """ + for src in AUDIO_SOURCES: + res = Download.fetch_from_src(row, src) + if res is not None: + Download.add_url(res, src, id_) + break logger.warning(f"Didn't find any sources for {row['title']}") + @staticmethod + def fetch_from_src(row: dict, src: str): + if src not in sources: + raise ValueError(f"source {src} seems to not exist") + + source_subclass = sources[src] + return source_subclass.fetch_source(row) + @staticmethod def add_url(url: str, src: str, id_: str): database.set_download_data(id_, url, src) diff --git a/src/music_kraken/audio_source/sources/musify.py b/src/music_kraken/audio_source/sources/musify.py index 3d424a2..5811638 100644 --- a/src/music_kraken/audio_source/sources/musify.py +++ b/src/music_kraken/audio_source/sources/musify.py @@ -1,4 +1,3 @@ -import logging import time import requests @@ -7,6 +6,8 @@ import bs4 from ...utils.shared import * from ...utils import phonetic_compares +from .source import AudioSource + TRIES = 5 TIMEOUT = 10 @@ -19,6 +20,38 @@ session.headers = { session.proxies = proxies +class Musify(AudioSource): + @classmethod + def fetch_source(cls, row: dict): + super().fetch_source(row) + + title = row['title'] + artists = row['artists'] + + url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}" + + try: + r = session.get(url=url) + except requests.exceptions.ConnectionError: + return None + if r.status_code == 200: + autocomplete = r.json() + for row in autocomplete: + if any(a in row['label'] for a in artists) and "/track" in row['url']: + return get_download_link(row['url']) + + return None + + @classmethod + def fetch_audio(cls, row: dict): + super().fetch_audio(row) + + url = row['url'] + file_ = row['file'] + return download_from_musify(file_, url) + + + def get_musify_url(row): title = row['title'] artists = row['artists'] diff --git a/src/music_kraken/audio_source/sources/source.py b/src/music_kraken/audio_source/sources/source.py index d92860d..71ffaac 100644 --- a/src/music_kraken/audio_source/sources/source.py +++ b/src/music_kraken/audio_source/sources/source.py @@ -1,5 +1,8 @@ +from ...utils.shared import * from typing import Tuple +logger = URL_DOWNLOAD_LOGGER + """ The class "Source" is the superclass every class for specific audio sources inherits from. This gives the advantage of a consistent @@ -7,9 +10,11 @@ calling of the functions do search for a song and to download it. """ -class Source: - def __init__(self): - pass +class AudioSource: + @classmethod + def fetch_source(cls, row: dict): + logger.info(f"try getting source {row['title']} from {cls.__name__}") - def get_source(self, row) -> Tuple[str, str]: - return "", "" + @classmethod + def fetch_audio(cls, row: dict): + logger.info(f"downloading audio from {row['url']} from {cls.__name__} to {row['file']}") diff --git a/src/music_kraken/audio_source/sources/youtube.py b/src/music_kraken/audio_source/sources/youtube.py index 51d03c5..44803ca 100644 --- a/src/music_kraken/audio_source/sources/youtube.py +++ b/src/music_kraken/audio_source/sources/youtube.py @@ -1,10 +1,13 @@ from typing import List import youtube_dl -import logging import time +from ...utils.shared import * from ...utils import phonetic_compares +from .source import AudioSource + +logger = YOUTUBE_LOGGER YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'} YOUTUBE_URL_KEY = 'webpage_url' @@ -13,6 +16,61 @@ WAIT_BETWEEN_BLOCK = 10 MAX_TRIES = 3 +class Youtube(AudioSource): + @classmethod + def fetch_source(cls, row: dict): + super().fetch_source(row) + + if row['isrc'] is None: + return None + + real_title = row['title'].lower() + + final_result = None + results = get_youtube_from_isrc(row['isrc']) + for result in results: + video_title = result['title'].lower() + match, distance = phonetic_compares.match_titles(video_title, real_title) + + if match: + logger.warning( + f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.") + continue + + final_result = result + + if final_result is None: + return None + return final_result['url'] + + @classmethod + def fetch_audio(cls, row: dict, trie: int=0): + super().fetch_audio(row) + + url = row['url'] + file_ = row['file'] + options = { + 'format': 'bestaudio/best', + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'mp3', + 'preferredquality': '192', + }], + 'keepvideo': False, + 'outtmpl': file_ + } + + try: + with youtube_dl.YoutubeDL(options) as ydl: + ydl.download([url]) + except youtube_dl.utils.DownloadError: + logging.warning(f"youtube blocked downloading. ({trie}-{MAX_TRIES})") + if trie >= MAX_TRIES: + logging.warning("too many tries, returning") + logging.warning(f"retrying in {WAIT_BETWEEN_BLOCK} seconds again") + time.sleep(WAIT_BETWEEN_BLOCK) + return Youtube.fetch_audio(row, trie=trie + 1) + def get_youtube_from_isrc(isrc: str) -> List[dict]: # https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl with youtube_dl.YoutubeDL(YDL_OPTIONS) as ydl: diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index 55ed317..c0469b3 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -14,7 +14,8 @@ DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-d SEARCH_LOGGER = logging.getLogger("mb-cli") DATABASE_LOGGER = logging.getLogger("database") METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download") -URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download") +URL_DOWNLOAD_LOGGER = logging.getLogger("AudioSource") +YOUTUBE_LOGGER = logging.getLogger("Youtube") PATH_LOGGER = logging.getLogger("create-paths") DOWNLOAD_LOGGER = logging.getLogger("download") LYRICS_LOGGER = logging.getLogger("lyrics") @@ -35,7 +36,7 @@ database = Database(os.path.join(temp_dir, DATABASE_FILE), os.path.join(temp_dir, DATABASE_STRUCTURE_FILE), DATABASE_STRUCTURE_FALLBACK, DATABASE_LOGGER, - reset_anyways=False) + reset_anyways=True) TOR = False @@ -43,3 +44,6 @@ proxies = { 'http': 'socks5h://127.0.0.1:9150', 'https': 'socks5h://127.0.0.1:9150' } if TOR else {} + +# only the sources here will get downloaded, in the order the list is ordered +AUDIO_SOURCES = ["Youtube", "Musify"]