added the opportunity to choose the activated audio sources and change theire priority

2022-11-16 13:21:30 +01:00
parent 5240f3eb34
commit 0cb5cd32e9
7 changed files with 156 additions and 13 deletions
--- a/src/music_kraken/main.py
+++ b/src/music_kraken/main.py
@@ -1,5 +1,9 @@
 import music_kraken
+# from .audio_source.sources.musify import Musify
+from .audio_source.sources.youtube import Youtube


 if __name__ == "__main__":
-    music_kraken.cli(start_at=0, only_lyrics=False)
+    music_kraken.cli()
+    # Youtube.fetch_audio({'title': 'dfas', '': '', 'isrc': ''})
+    # Youtube.fetch_audio({'title': 'dfas', 'url': '', 'file': 'dasf', 'isrc': ''})
--- a/src/music_kraken/audio_source/fetch_audio.py
+++ b/src/music_kraken/audio_source/fetch_audio.py
@@ -13,6 +13,12 @@ from .sources import (

 logger = DOWNLOAD_LOGGER

+# maps the classes to get data from to the source name
+sources = {
+    'Youtube': youtube.Youtube,
+    'Musify': musify.Musify
+}
+
 """
 https://en.wikipedia.org/wiki/ID3
 https://mutagen.readthedocs.io/en/latest/user/id3.html
@@ -35,12 +41,16 @@ class Download:
                self.write_metadata(row, row['file'])
                continue

+            download_success = Download.download_from_src(row['src'], row)
+
+            """
            download_success = None
            src = row['src']
            if src == 'musify':
                download_success = musify.download(row)
            elif src == 'youtube':
                download_success = youtube.download(row)
+            """

            if download_success == -1:
                logger.warning(f"couldn't download {row['url']} from {row['src']}")
@@ -48,6 +58,14 @@ class Download:

            self.write_metadata(row, row['file'])

+    @staticmethod
+    def download_from_src(src, row):
+        if src not in sources:
+            raise ValueError(f"source {src} seems to not exist")
+        source_subclass = sources[src]
+
+        return source_subclass.fetch_audio(row)
+
    @staticmethod
    def write_metadata(row, file_path):
        if not os.path.exists(file_path):
--- a/src/music_kraken/audio_source/fetch_source.py
+++ b/src/music_kraken/audio_source/fetch_source.py
@@ -1,4 +1,5 @@
 from ..utils.shared import *
+
 from .sources import (
    youtube,
    musify,
@@ -7,6 +8,12 @@ from .sources import (

 logger = URL_DOWNLOAD_LOGGER

+# maps the classes to get data from to the source name
+sources = {
+    'Youtube': youtube.Youtube,
+    'Musify': musify.Musify
+}
+

 class Download:
    def __init__(self) -> None:
@@ -28,15 +35,15 @@ class Download:
                self.add_url(file_path, 'file', id_)
                continue
            """
-
+            """
            # check YouTube
-            youtube_url = youtube.get_youtube_url(row)
+            youtube_url = youtube.Youtube.fetch_source(row)
            if youtube_url is not None:
                self.add_url(youtube_url, 'youtube', id_)
                continue

            # check musify
-            musify_url = musify.get_musify_url(row)
+            musify_url = musify.Musify.fetch_source(row)
            if musify_url is not None:
                self.add_url(musify_url, 'musify', id_)
                continue
@@ -46,9 +53,23 @@ class Download:
            if musify_url is not None:
                self.add_url(musify_url, 'musify', id_)
                continue
+            """
+            for src in AUDIO_SOURCES:
+                res = Download.fetch_from_src(row, src)
+                if res is not None:
+                    Download.add_url(res, src, id_)
+                    break

            logger.warning(f"Didn't find any sources for {row['title']}")

+    @staticmethod
+    def fetch_from_src(row: dict, src: str):
+        if src not in sources:
+            raise ValueError(f"source {src} seems to not exist")
+
+        source_subclass = sources[src]
+        return source_subclass.fetch_source(row)
+
    @staticmethod
    def add_url(url: str, src: str, id_: str):
        database.set_download_data(id_, url, src)
--- a/src/music_kraken/audio_source/sources/musify.py
+++ b/src/music_kraken/audio_source/sources/musify.py
@@ -1,4 +1,3 @@
-import logging
 import time

 import requests
@@ -7,6 +6,8 @@ import bs4
 from ...utils.shared import *
 from ...utils import phonetic_compares

+from .source import AudioSource
+
 TRIES = 5
 TIMEOUT = 10

@@ -19,6 +20,38 @@ session.headers = {
 session.proxies = proxies


+class Musify(AudioSource):
+    @classmethod
+    def fetch_source(cls, row: dict):
+        super().fetch_source(row)
+
+        title = row['title']
+        artists = row['artists']
+
+        url = f"https://musify.club/search/suggestions?term={artists[0]} - {title}"
+
+        try:
+            r = session.get(url=url)
+        except requests.exceptions.ConnectionError:
+            return None
+        if r.status_code == 200:
+            autocomplete = r.json()
+            for row in autocomplete:
+                if any(a in row['label'] for a in artists) and "/track" in row['url']:
+                    return get_download_link(row['url'])
+
+        return None
+
+    @classmethod
+    def fetch_audio(cls, row: dict):
+        super().fetch_audio(row)
+
+        url = row['url']
+        file_ = row['file']
+        return download_from_musify(file_, url)
+
+
+
 def get_musify_url(row):
    title = row['title']
    artists = row['artists']
--- a/src/music_kraken/audio_source/sources/source.py
+++ b/src/music_kraken/audio_source/sources/source.py
@@ -1,5 +1,8 @@
+from ...utils.shared import *
 from typing import Tuple

+logger = URL_DOWNLOAD_LOGGER
+
 """
 The class "Source" is the superclass every class for specific audio
 sources inherits from. This gives the advantage of a consistent
@@ -7,9 +10,11 @@ calling of the functions do search for a song and to download it.
 """


-class Source:
-    def __init__(self):
-        pass
+class AudioSource:
+    @classmethod
+    def fetch_source(cls, row: dict):
+        logger.info(f"try getting source {row['title']} from {cls.__name__}")

-    def get_source(self, row) -> Tuple[str, str]:
-        return "", ""
+    @classmethod
+    def fetch_audio(cls, row: dict):
+        logger.info(f"downloading audio from {row['url']} from {cls.__name__} to {row['file']}")
--- a/src/music_kraken/audio_source/sources/youtube.py
+++ b/src/music_kraken/audio_source/sources/youtube.py
@@ -1,10 +1,13 @@
 from typing import List

 import youtube_dl
-import logging
 import time

+from ...utils.shared import *
 from ...utils import phonetic_compares
+from .source import AudioSource
+
+logger = YOUTUBE_LOGGER

 YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
 YOUTUBE_URL_KEY = 'webpage_url'
@@ -13,6 +16,61 @@ WAIT_BETWEEN_BLOCK = 10
 MAX_TRIES = 3


+class Youtube(AudioSource):
+    @classmethod
+    def fetch_source(cls, row: dict):
+        super().fetch_source(row)
+
+        if row['isrc'] is None:
+            return None
+
+        real_title = row['title'].lower()
+
+        final_result = None
+        results = get_youtube_from_isrc(row['isrc'])
+        for result in results:
+            video_title = result['title'].lower()
+            match, distance = phonetic_compares.match_titles(video_title, real_title)
+
+            if match:
+                logger.warning(
+                    f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.")
+                continue
+
+            final_result = result
+
+        if final_result is None:
+            return None
+        return final_result['url']
+
+    @classmethod
+    def fetch_audio(cls, row: dict, trie: int=0):
+        super().fetch_audio(row)
+
+        url = row['url']
+        file_ = row['file']
+        options = {
+            'format': 'bestaudio/best',
+            'postprocessors': [{
+                'key': 'FFmpegExtractAudio',
+                'preferredcodec': 'mp3',
+                'preferredquality': '192',
+            }],
+            'keepvideo': False,
+            'outtmpl': file_
+        }
+
+        try:
+            with youtube_dl.YoutubeDL(options) as ydl:
+                ydl.download([url])
+        except youtube_dl.utils.DownloadError:
+            logging.warning(f"youtube blocked downloading. ({trie}-{MAX_TRIES})")
+            if trie >= MAX_TRIES:
+                logging.warning("too many tries, returning")
+            logging.warning(f"retrying in {WAIT_BETWEEN_BLOCK} seconds again")
+            time.sleep(WAIT_BETWEEN_BLOCK)
+            return Youtube.fetch_audio(row, trie=trie + 1)
+
 def get_youtube_from_isrc(isrc: str) -> List[dict]:
    # https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl
    with youtube_dl.YoutubeDL(YDL_OPTIONS) as ydl:
--- a/src/music_kraken/utils/shared.py
+++ b/src/music_kraken/utils/shared.py
@@ -14,7 +14,8 @@ DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-d
 SEARCH_LOGGER = logging.getLogger("mb-cli")
 DATABASE_LOGGER = logging.getLogger("database")
 METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
-URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
+URL_DOWNLOAD_LOGGER = logging.getLogger("AudioSource")
+YOUTUBE_LOGGER = logging.getLogger("Youtube")
 PATH_LOGGER = logging.getLogger("create-paths")
 DOWNLOAD_LOGGER = logging.getLogger("download")
 LYRICS_LOGGER = logging.getLogger("lyrics")
@@ -35,7 +36,7 @@ database = Database(os.path.join(temp_dir, DATABASE_FILE),
                    os.path.join(temp_dir, DATABASE_STRUCTURE_FILE),
                    DATABASE_STRUCTURE_FALLBACK,
                    DATABASE_LOGGER,
-                    reset_anyways=False)
+                    reset_anyways=True)


 TOR = False
@@ -43,3 +44,6 @@ proxies = {
    'http': 'socks5h://127.0.0.1:9150',
    'https': 'socks5h://127.0.0.1:9150'
 } if TOR else {}
+
+# only the sources here will get downloaded, in the order the list is ordered
+AUDIO_SOURCES = ["Youtube", "Musify"]