added the opportunity to choose the activated audio sources and change theire priority

2022-11-16 13:21:30 +01:00 · 2022-11-16 13:21:30 +01:00 · 0cb5cd32e9
commit 0cb5cd32e9
parent 5240f3eb34
7 changed files with 156 additions and 13 deletions
--- a/src/music_kraken/main.py
+++ b/src/music_kraken/main.py
@ -1,5 +1,9 @@
 import music_kraken
 # from .audio_source.sources.musify import Musify
 from .audio_source.sources.youtube import Youtube
 if __name__ == "__main__":
-    music_kraken.cli(start_at=0, only_lyrics=False)
+    music_kraken.cli()
    # Youtube.fetch_audio({'title': 'dfas', '': '', 'isrc': ''})
    # Youtube.fetch_audio({'title': 'dfas', 'url': '', 'file': 'dasf', 'isrc': ''})
--- a/src/music_kraken/audio_source/fetch_audio.py
+++ b/src/music_kraken/audio_source/fetch_audio.py
@ -13,6 +13,12 @@ from .sources import (
 logger = DOWNLOAD_LOGGER
 # maps the classes to get data from to the source name
 sources = {
    'Youtube': youtube.Youtube,
    'Musify': musify.Musify
 }
 """
 https://en.wikipedia.org/wiki/ID3
 https://mutagen.readthedocs.io/en/latest/user/id3.html
@ -35,12 +41,16 @@ class Download:
                self.write_metadata(row, row['file'])
                continue
            download_success = Download.download_from_src(row['src'], row)
            """
            download_success = None
            src = row['src']
            if src == 'musify':
                download_success = musify.download(row)
            elif src == 'youtube':
                download_success = youtube.download(row)
            """
            if download_success == -1:
                logger.warning(f"couldn't download {row['url']} from {row['src']}")
@ -48,6 +58,14 @@ class Download:
            self.write_metadata(row, row['file'])
    @staticmethod
    def download_from_src(src, row):
        if src not in sources:
            raise ValueError(f"source {src} seems to not exist")
        source_subclass = sources[src]
        return source_subclass.fetch_audio(row)
    @staticmethod
    def write_metadata(row, file_path):
        if not os.path.exists(file_path):
--- a/src/music_kraken/audio_source/fetch_source.py
+++ b/src/music_kraken/audio_source/fetch_source.py
@ -1,4 +1,5 @@
 from ..utils.shared import *
 from .sources import (
    youtube,
    musify,
@ -7,6 +8,12 @@ from .sources import (
 logger = URL_DOWNLOAD_LOGGER
 # maps the classes to get data from to the source name
 sources = {
    'Youtube': youtube.Youtube,
    'Musify': musify.Musify
 }
 class Download:
    def __init__(self) -> None:
@ -28,15 +35,15 @@ class Download:
                self.add_url(file_path, 'file', id_)
                continue
            """
-
+            """
            # check YouTube
-            youtube_url = youtube.get_youtube_url(row)
+            youtube_url = youtube.Youtube.fetch_source(row)
            if youtube_url is not None:
                self.add_url(youtube_url, 'youtube', id_)
                continue
            # check musify
-            musify_url = musify.get_musify_url(row)
+            musify_url = musify.Musify.fetch_source(row)
            if musify_url is not None:
                self.add_url(musify_url, 'musify', id_)
                continue
@ -46,9 +53,23 @@ class Download:
            if musify_url is not None:
                self.add_url(musify_url, 'musify', id_)
                continue
            """
            for src in AUDIO_SOURCES:
                res = Download.fetch_from_src(row, src)
                if res is not None:
                    Download.add_url(res, src, id_)
                    break
            logger.warning(f"Didn't find any sources for {row['title']}")
    @staticmethod
    def fetch_from_src(row: dict, src: str):
        if src not in sources:
            raise ValueError(f"source {src} seems to not exist")
        source_subclass = sources[src]
        return source_subclass.fetch_source(row)
    @staticmethod
    def add_url(url: str, src: str, id_: str):
        database.set_download_data(id_, url, src)
--- a/src/music_kraken/audio_source/sources/musify.py
+++ b/src/music_kraken/audio_source/sources/musify.py
@ -1,4 +1,3 @@
 import logging
 import time
 import requests
@ -7,6 +6,8 @@ import bs4
 from ...utils.shared import *
 from ...utils import phonetic_compares
 from .source import AudioSource
 TRIES = 5
 TIMEOUT = 10
@ -19,6 +20,38 @@ session.headers = {
 session.proxies = proxies
 class Musify(AudioSource):
    @classmethod
    def fetch_source(cls, row: dict):
        super().fetch_source(row)
        title = row['title']
        artists = row['artists']
        url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
        try:
            r = session.get(url=url)
        except requests.exceptions.ConnectionError:
            return None
        if r.status_code == 200:
            autocomplete = r.json()
            for row in autocomplete:
                if any(a in row['label'] for a in artists) and "/track" in row['url']:
                    return get_download_link(row['url'])
        return None
    @classmethod
    def fetch_audio(cls, row: dict):
        super().fetch_audio(row)
        url = row['url']
        file_ = row['file']
        return download_from_musify(file_, url)
 def get_musify_url(row):
    title = row['title']
    artists = row['artists']
--- a/src/music_kraken/audio_source/sources/source.py
+++ b/src/music_kraken/audio_source/sources/source.py
@ -1,5 +1,8 @@
 from ...utils.shared import *
 from typing import Tuple
 logger = URL_DOWNLOAD_LOGGER
 """
 The class "Source" is the superclass every class for specific audio
 sources inherits from. This gives the advantage of a consistent
@ -7,9 +10,11 @@ calling of the functions do search for a song and to download it.
 """
-class Source:
+class AudioSource:
-    def __init__(self):
+    @classmethod
-        pass
+    def fetch_source(cls, row: dict):
        logger.info(f"try getting source {row['title']} from {cls.__name__}")
-    def get_source(self, row) -> Tuple[str, str]:
+    @classmethod
-        return "", ""
+    def fetch_audio(cls, row: dict):
        logger.info(f"downloading audio from {row['url']} from {cls.__name__} to {row['file']}")
--- a/src/music_kraken/audio_source/sources/youtube.py
+++ b/src/music_kraken/audio_source/sources/youtube.py
@ -1,10 +1,13 @@
 from typing import List
 import youtube_dl
 import logging
 import time
 from ...utils.shared import *
 from ...utils import phonetic_compares
 from .source import AudioSource
 logger = YOUTUBE_LOGGER
 YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
 YOUTUBE_URL_KEY = 'webpage_url'
@ -13,6 +16,61 @@ WAIT_BETWEEN_BLOCK = 10
 MAX_TRIES = 3
 class Youtube(AudioSource):
    @classmethod
    def fetch_source(cls, row: dict):
        super().fetch_source(row)
        if row['isrc'] is None:
            return None
        real_title = row['title'].lower()
        final_result = None
        results = get_youtube_from_isrc(row['isrc'])
        for result in results:
            video_title = result['title'].lower()
            match, distance = phonetic_compares.match_titles(video_title, real_title)
            if match:
                logger.warning(
                    f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.")
                continue
            final_result = result
        if final_result is None:
            return None
        return final_result['url']
    @classmethod
    def fetch_audio(cls, row: dict, trie: int=0):
        super().fetch_audio(row)
        url = row['url']
        file_ = row['file']
        options = {
            'format': 'bestaudio/best',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'keepvideo': False,
            'outtmpl': file_
        }
        try:
            with youtube_dl.YoutubeDL(options) as ydl:
                ydl.download([url])
        except youtube_dl.utils.DownloadError:
            logging.warning(f"youtube blocked downloading. ({trie}-{MAX_TRIES})")
            if trie >= MAX_TRIES:
                logging.warning("too many tries, returning")
            logging.warning(f"retrying in {WAIT_BETWEEN_BLOCK} seconds again")
            time.sleep(WAIT_BETWEEN_BLOCK)
            return Youtube.fetch_audio(row, trie=trie + 1)
 def get_youtube_from_isrc(isrc: str) -> List[dict]:
    # https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl
    with youtube_dl.YoutubeDL(YDL_OPTIONS) as ydl:
--- a/src/music_kraken/utils/shared.py
+++ b/src/music_kraken/utils/shared.py
@ -14,7 +14,8 @@ DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-d
 SEARCH_LOGGER = logging.getLogger("mb-cli")
 DATABASE_LOGGER = logging.getLogger("database")
 METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
-URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
+URL_DOWNLOAD_LOGGER = logging.getLogger("AudioSource")
 YOUTUBE_LOGGER = logging.getLogger("Youtube")
 PATH_LOGGER = logging.getLogger("create-paths")
 DOWNLOAD_LOGGER = logging.getLogger("download")
 LYRICS_LOGGER = logging.getLogger("lyrics")
@ -35,7 +36,7 @@ database = Database(os.path.join(temp_dir, DATABASE_FILE),
                    os.path.join(temp_dir, DATABASE_STRUCTURE_FILE),
                    DATABASE_STRUCTURE_FALLBACK,
                    DATABASE_LOGGER,
-                    reset_anyways=False)
+                    reset_anyways=True)
 TOR = False
@ -43,3 +44,6 @@ proxies = {
    'http': 'socks5h://127.0.0.1:9150',
    'https': 'socks5h://127.0.0.1:9150'
 } if TOR else {}
 # only the sources here will get downloaded, in the order the list is ordered
 AUDIO_SOURCES = ["Youtube", "Musify"]