Merge branch 'youtube' into experimental

This commit is contained in:
Hellow2
2023-06-21 08:23:16 +02:00
123 changed files with 5679 additions and 4748 deletions

View File

@@ -1,17 +1,11 @@
import logging
import re
from pathlib import Path
from typing import List
import gc
import musicbrainzngs
from . import objects, pages
from .utils import exception, shared, path_manager
from .utils.config import config, read, write, PATHS_SECTION
from .utils.shared import MUSIC_DIR, MODIFY_GC, NOT_A_GENRE_REGEX, get_random_message
from .utils.string_processing import fit_to_file_system
from .utils.config import read_config
from .utils.shared import MODIFY_GC
from . import cli
if MODIFY_GC:
"""
@@ -30,241 +24,3 @@ if MODIFY_GC:
logging.getLogger("musicbrainzngs").setLevel(logging.WARNING)
musicbrainzngs.set_useragent("metadata receiver", "0.1", "https://github.com/HeIIow2/music-downloader")
URL_REGEX = 'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
DOWNLOAD_COMMANDS = {
"ok",
"download",
"\\d",
"hs"
}
EXIT_COMMANDS = {
"exit",
"quit"
}
def print_cute_message():
message = get_random_message()
try:
print(message)
except UnicodeEncodeError:
message = str(c for c in message if 0 < ord(c) < 127)
print(message)
def exit_message():
print()
print_cute_message()
print("See you soon! :3")
def paths():
print(f"Temp dir:\t{shared.TEMP_DIR}\n"
f"Music dir:\t{shared.MUSIC_DIR}\n"
f"Log file:\t{shared.LOG_PATH}\n"
f"Config file:\t{shared.CONFIG_FILE}")
print()
print_cute_message()
print()
def settings(
name: str = None,
value: str = None,
):
def modify_setting(_name: str, _value: str, invalid_ok: bool = True) -> bool:
try:
config.set_name_to_value(_name, _value)
except exception.config.SettingException as e:
if invalid_ok:
print(e)
return False
else:
raise e
write()
return True
def print_settings():
for i, attribute in enumerate(config):
print(f"{i:0>2}: {attribute.name}={attribute.value}")
def modify_setting_by_index(index: int) -> bool:
attribute = list(config)[index]
print()
print(attribute)
input__ = input(f"{attribute.name}=")
if not modify_setting(attribute.name, input__.strip()):
return modify_setting_by_index(index)
return True
if name is not None and value is not None:
modify_setting(name, value, invalid_ok=True)
print()
print_cute_message()
print()
return
while True:
print_settings()
input_ = input("Id of setting to modify: ")
print()
if input_.isdigit() and int(input_) < len(config):
if modify_setting_by_index(int(input_)):
print()
print_cute_message()
print()
return
else:
print("Please input a valid ID.")
print()
def cli(
genre: str = None,
download_all: bool = False,
direct_download_url: str = None,
command_list: List[str] = None
):
def get_existing_genre() -> List[str]:
"""
gets the name of all subdirectories of shared.MUSIC_DIR,
but filters out all directories, where the name matches with any patern
from shared.NOT_A_GENRE_REGEX.
"""
existing_genres: List[str] = []
# get all subdirectories of MUSIC_DIR, not the files in the dir.
existing_subdirectories: List[Path] = [f for f in MUSIC_DIR.iterdir() if f.is_dir()]
for subdirectory in existing_subdirectories:
name: str = subdirectory.name
if not any(re.match(regex_pattern, name) for regex_pattern in NOT_A_GENRE_REGEX):
existing_genres.append(name)
existing_genres.sort()
return existing_genres
def get_genre():
existing_genres = get_existing_genre()
for i, genre_option in enumerate(existing_genres):
print(f"{i + 1:0>2}: {genre_option}")
while True:
genre = input("Id or new genre: ")
if genre.isdigit():
genre_id = int(genre) - 1
if genre_id >= len(existing_genres):
print(f"No genre under the id {genre_id + 1}.")
continue
return existing_genres[genre_id]
new_genre = fit_to_file_system(genre)
agree_inputs = {"y", "yes", "ok"}
verification = input(f"create new genre \"{new_genre}\"? (Y/N): ").lower()
if verification in agree_inputs:
return new_genre
def next_search(_search: pages.Search, query: str) -> bool:
"""
:param _search:
:param query:
:return exit in the next step:
"""
nonlocal genre
nonlocal download_all
query: str = query.strip()
parsed: str = query.lower()
if parsed in EXIT_COMMANDS:
return True
if parsed == ".":
return False
if parsed == "..":
_search.goto_previous()
return False
if parsed.isdigit():
_search.choose_index(int(parsed))
return False
if parsed in DOWNLOAD_COMMANDS:
r = _search.download_chosen(genre=genre, download_all=download_all)
print()
print(r)
print()
return not r.is_mild_failure
url = re.match(URL_REGEX, query)
if url is not None:
if not _search.search_url(url.string):
print("The given url couldn't be found.")
return False
page = _search.get_page_from_query(parsed)
if page is not None:
_search.choose_page(page)
return False
# if everything else is not valid search
_search.search(query)
return False
if genre is None:
genre = get_genre()
print()
print_cute_message()
print()
print(f"Downloading to: \"{genre}\"")
print()
search = pages.Search()
# directly download url
if direct_download_url is not None:
if search.search_url(direct_download_url):
r = search.download_chosen(genre=genre, download_all=download_all)
print()
print(r)
print()
else:
print(f"Sorry, could not download the url: {direct_download_url}")
exit_message()
return
# run one command after another from the command list
if command_list is not None:
for command in command_list:
print(f">> {command}")
if next_search(search, command):
break
print(search)
exit_message()
return
# the actual cli
while True:
if next_search(search, input(">> ")):
break
print(search)
exit_message()

View File

@@ -1,4 +1,4 @@
if __name__ == "__main__":
def cli():
import argparse
parser = argparse.ArgumentParser(
@@ -16,10 +16,16 @@ if __name__ == "__main__":
help="Sets the logging level to debug."
)
parser.add_argument(
'-m', '--force-post-process',
action="store_true",
help="If a to downloaded thing is skipped due to being found on disc,\nit will still update the metadata accordingly."
)
parser.add_argument(
'-t', '--test',
action="store_true",
help="For the sake of testing. Equals: '-v -g test'"
help="For the sake of testing. Equals: '-vp -g test'"
)
# general arguments
@@ -65,6 +71,13 @@ if __name__ == "__main__":
help="Resets the config file to the default one.",
action="store_true"
)
parser.add_argument(
"--frontend",
"-f",
help="Set a good and fast invidious/piped instance from your homecountry, to reduce the latency.",
action="store_true"
)
arguments = parser.parse_args()
@@ -73,39 +86,44 @@ if __name__ == "__main__":
print("Setting logging-level to DEBUG")
logging.getLogger().setLevel(logging.DEBUG)
import music_kraken
music_kraken.read()
if arguments.setting is not None:
music_kraken.settings(*arguments.setting)
exit()
if arguments.settings:
music_kraken.settings()
exit()
if arguments.paths:
music_kraken.paths()
exit()
from . import cli
from .utils.config import read_config
from .utils import shared
if arguments.r:
import os
if os.path.exists(music_kraken.shared.CONFIG_FILE):
os.remove(music_kraken.shared.CONFIG_FILE)
music_kraken.read()
if os.path.exists(shared.CONFIG_FILE):
os.remove(shared.CONFIG_FILE)
read_config()
exit()
read_config()
if arguments.setting is not None:
cli.settings(*arguments.setting)
if arguments.settings:
cli.settings()
if arguments.paths:
cli.print_paths()
if arguments.frontend:
cli.set_frontend(silent=False)
# getting the genre
genre: str = arguments.genre
if arguments.test:
genre = "test"
try:
music_kraken.cli(
genre=genre,
download_all=arguments.all,
direct_download_url=arguments.url
)
except KeyboardInterrupt:
print("\n\nRaise an issue if I fucked up:\nhttps://github.com/HeIIow2/music-downloader/issues")
music_kraken.exit_message()
cli.download(
genre=genre,
download_all=arguments.all,
direct_download_url=arguments.url,
process_metadata_anyway=arguments.force_post_process or arguments.test
)
if __name__ == "__main__":
cli()

View File

@@ -1,13 +1,18 @@
import ffmpeg
from typing import List, Tuple
from tqdm import tqdm
from ffmpeg_progress_yield import FfmpegProgress
import subprocess
from ..utils.shared import BITRATE, AUDIO_FORMAT, CODEX_LOGGER as LOGGER
from ..objects import Target
def correct_codec(target: Target, bitrate_kb: int = BITRATE, audio_format: str = AUDIO_FORMAT):
def correct_codec(target: Target, bitrate_kb: int = BITRATE, audio_format: str = AUDIO_FORMAT, interval_list: List[Tuple[float, float]] = None):
if not target.exists:
LOGGER.warning(f"Target doesn't exist: {target.file_path}")
return
interval_list = interval_list or []
bitrate_b = int(bitrate_kb / 1024)
@@ -15,18 +20,36 @@ def correct_codec(target: Target, bitrate_kb: int = BITRATE, audio_format: str =
path=target._path,
file=str(target._file) + "." + audio_format
)
# get the select thingie
# https://stackoverflow.com/questions/50594412/cut-multiple-parts-of-a-video-with-ffmpeg
aselect_list: List[str] = []
start = 0
next_start = 0
for end, next_start in interval_list:
aselect_list.append(f"between(t,{start},{end})")
start = next_start
aselect_list.append(f"gte(t,{next_start})")
select = f"aselect='{'+'.join(aselect_list)}',asetpts=N/SR/TB"
# build the ffmpeg command
ffmpeg_command = [
"ffmpeg",
"-i", str(target.file_path),
"-af", select,
"-b", str(bitrate_b),
str(output_target.file_path)
]
stream = ffmpeg.input(target.file_path)
stream = stream.audio
stream = ffmpeg.output(
stream,
str(output_target.file_path),
audio_bitrate=bitrate_b,
format=audio_format
)
out, err = ffmpeg.run(stream, quiet=True, overwrite_output=True)
if err != "":
LOGGER.debug(err)
# run the ffmpeg command with a progressbar
ff = FfmpegProgress(ffmpeg_command)
with tqdm(total=100, desc=f"removing {len(interval_list)} segments") as pbar:
for progress in ff.run_command_with_progress():
pbar.update(progress-pbar.n)
LOGGER.debug(ff.stderr)
output_target.copy_content(target)
output_target.file_path.unlink()
output_target.delete()

View File

@@ -0,0 +1,5 @@
from .informations import print_paths
from .main_downloader import download
from .options.settings import settings
from .options.frontend import set_frontend

View File

@@ -0,0 +1 @@
from .paths import print_paths

View File

@@ -0,0 +1,20 @@
from ..utils import cli_function
from ...utils.path_manager import LOCATIONS
from ...utils import shared
def all_paths():
return {
"Temp dir": LOCATIONS.TEMP_DIRECTORY,
"Music dir": LOCATIONS.MUSIC_DIRECTORY,
"Log file": shared.LOG_PATH,
"Conf dir": LOCATIONS.CONFIG_DIRECTORY,
"Conf file": LOCATIONS.CONFIG_FILE
}
@cli_function
def print_paths():
for name, path in all_paths().items():
print(f"{name}:\t{path}")

View File

@@ -0,0 +1,405 @@
from typing import Set, Type, Dict, List
from pathlib import Path
import re
from .utils import cli_function
from ..utils.shared import MUSIC_DIR, NOT_A_GENRE_REGEX, ENABLE_RESULT_HISTORY, HISTORY_LENGTH, HELP_MESSAGE
from ..utils.regex import URL_PATTERN
from ..utils.string_processing import fit_to_file_system
from ..utils.support_classes import Query, DownloadResult
from ..utils.exception.download import UrlNotFoundException
from ..download.results import Results, Option, PageResults
from ..download.page_attributes import Pages
from ..pages import Page
from ..objects import Song, Album, Artist, DatabaseObject
"""
This is the implementation of the Shell
# Behaviour
## Searching
```mkshell
> s: {querry or url}
# examples
> s: https://musify.club/release/some-random-release-183028492
> s: r: #a an Artist #r some random Release
```
Searches for an url, or an query
### Query Syntax
```
#a {artist} #r {release} #t {track}
```
You can escape stuff like `#` doing this: `\#`
## Downloading
To download something, you either need a direct link, or you need to have already searched for options
```mkshell
> d: {option ids or direct url}
# examples
> d: 0, 3, 4
> d: 1
> d: https://musify.club/release/some-random-release-183028492
```
## Misc
### Exit
```mkshell
> q
> quit
> exit
> abort
```
### Current Options
```mkshell
> .
```
### Previous Options
```
> ..
```
"""
EXIT_COMMANDS = {"q", "quit", "exit", "abort"}
ALPHABET = "abcdefghijklmnopqrstuvwxyz"
PAGE_NAME_FILL = "-"
MAX_PAGE_LEN = 21
def get_existing_genre() -> List[str]:
"""
gets the name of all subdirectories of shared.MUSIC_DIR,
but filters out all directories, where the name matches with any patern
from shared.NOT_A_GENRE_REGEX.
"""
existing_genres: List[str] = []
# get all subdirectories of MUSIC_DIR, not the files in the dir.
existing_subdirectories: List[Path] = [f for f in MUSIC_DIR.iterdir() if f.is_dir()]
for subdirectory in existing_subdirectories:
name: str = subdirectory.name
if not any(re.match(regex_pattern, name) for regex_pattern in NOT_A_GENRE_REGEX):
existing_genres.append(name)
existing_genres.sort()
return existing_genres
def get_genre():
existing_genres = get_existing_genre()
for i, genre_option in enumerate(existing_genres):
print(f"{i + 1:0>2}: {genre_option}")
while True:
genre = input("Id or new genre: ")
if genre.isdigit():
genre_id = int(genre) - 1
if genre_id >= len(existing_genres):
print(f"No genre under the id {genre_id + 1}.")
continue
return existing_genres[genre_id]
new_genre = fit_to_file_system(genre)
agree_inputs = {"y", "yes", "ok"}
verification = input(f"create new genre \"{new_genre}\"? (Y/N): ").lower()
if verification in agree_inputs:
return new_genre
def help_message():
print()
print(HELP_MESSAGE)
print()
class Downloader:
def __init__(
self,
exclude_pages: Set[Type[Page]] = None,
exclude_shady: bool = False,
max_displayed_options: int = 10,
option_digits: int = 3,
genre: str = None,
process_metadata_anyway: bool = False,
) -> None:
self.pages: Pages = Pages(exclude_pages=exclude_pages, exclude_shady=exclude_shady)
self.page_dict: Dict[str, Type[Page]] = dict()
self.max_displayed_options = max_displayed_options
self.option_digits: int = option_digits
self.current_results: Results = None
self._result_history: List[Results] = []
self.genre = genre or get_genre()
self.process_metadata_anyway = process_metadata_anyway
print()
print(f"Downloading to: \"{self.genre}\"")
print()
def print_current_options(self):
self.page_dict = dict()
print()
page_count = 0
for option in self.current_results.formated_generator(max_items_per_page=self.max_displayed_options):
if isinstance(option, Option):
print(f"{option.index:0{self.option_digits}} {option.music_object.option_string}")
else:
prefix = ALPHABET[page_count%len(ALPHABET)]
print(f"({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------")
self.page_dict[prefix] = option
self.page_dict[option.__name__] = option
page_count += 1
print()
def set_current_options(self, current_options: Results):
if ENABLE_RESULT_HISTORY:
self._result_history.append(current_options)
if HISTORY_LENGTH != -1:
if len(self._result_history) > HISTORY_LENGTH:
self._result_history.pop(0)
self.current_results = current_options
def previous_option(self) -> bool:
if not ENABLE_RESULT_HISTORY:
print("History is turned of.\nGo to settings, and change the value at 'result_history' to 'true'.")
return False
if len(self._result_history) <= 1:
print(f"No results in history.")
return False
self._result_history.pop()
self.current_results = self._result_history[-1]
return True
def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query:
song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True)
album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True)
artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True)
if song is not None:
song.album_collection.append(album)
song.main_artist_collection.append(artist)
return Query(raw_query=query, music_object=song)
if album is not None:
album.artist_collection.append(artist)
return Query(raw_query=query, music_object=album)
if artist is not None:
return Query(raw_query=query, music_object=artist)
return Query(raw_query=query)
def search(self, query: str):
if re.match(URL_PATTERN, query) is not None:
try:
page, data_object = self.pages.fetch_url(query)
except UrlNotFoundException as e:
print(f"{e.url} could not be attributed/parsed to any yet implemented site.\n"
f"PR appreciated if the site isn't implemented.\n"
f"Recommendations and suggestions on sites to implement appreciated.\n"
f"But don't be a bitch if I don't end up implementing it.")
return
self.set_current_options(PageResults(page, data_object.options))
self.print_current_options()
return
special_characters = "#\\"
query = query + " "
key_text = {}
skip_next = False
escape_next = False
new_text = ""
latest_key: str = None
for i in range(len(query) - 1):
current_char = query[i]
next_char = query[i+1]
if skip_next:
skip_next = False
continue
if escape_next:
new_text += current_char
escape_next = False
# escaping
if current_char == "\\":
if next_char in special_characters:
escape_next = True
continue
if current_char == "#":
if latest_key is not None:
key_text[latest_key] = new_text
new_text = ""
latest_key = next_char
skip_next = True
continue
new_text += current_char
if latest_key is not None:
key_text[latest_key] = new_text
parsed_query: Query = self._process_parsed(key_text, query)
self.set_current_options(self.pages.search(parsed_query))
self.print_current_options()
def goto(self, index: int):
page: Type[Page]
music_object: DatabaseObject
if self.current_results is not None:
self.current_results.delete_details(index)
try:
page, music_object = self.current_results.get_music_object_by_index(index)
except KeyError:
print()
print(f"The option {index} doesn't exist.")
print()
return
self.pages.fetch_details(music_object)
self.set_current_options(PageResults(page, music_object.options))
self.print_current_options()
def download(self, download_str: str, download_all: bool = False) -> bool:
to_download: List[DatabaseObject] = []
if re.match(URL_PATTERN, download_str) is not None:
_, music_objects = self.pages.fetch_url(download_str)
to_download.append(music_objects)
else:
index: str
for index in download_str.split(", "):
if not index.strip().isdigit():
print()
print(f"Every download thingie has to be an index, not {index}.")
print()
return False
for index in download_str.split(", "):
to_download.append(self.current_results.get_music_object_by_index(int(index))[1])
print()
print("Downloading:")
for download_object in to_download:
print(download_object.option_string)
print()
_result_map: Dict[DatabaseObject, DownloadResult] = dict()
for database_object in to_download:
r = self.pages.download(music_object=database_object, genre=self.genre, download_all=download_all, process_metadata_anyway=self.process_metadata_anyway)
_result_map[database_object] = r
for music_object, result in _result_map.items():
print()
print(music_object.option_string)
print(result)
return True
def process_input(self, input_str: str) -> bool:
input_str = input_str.strip()
processed_input: str = input_str.lower()
if processed_input in EXIT_COMMANDS:
return True
if processed_input == ".":
self.print_current_options()
return False
if processed_input == "..":
if self.previous_option():
self.print_current_options()
return False
if processed_input.startswith("s: "):
self.search(input_str[3:])
return False
if processed_input.startswith("d: "):
return self.download(input_str[3:])
if processed_input.isdigit():
self.goto(int(processed_input))
return False
if processed_input != "help":
print("Invalid input.")
help_message()
return False
def mainloop(self):
while True:
if self.process_input(input("> ")):
return
@cli_function
def download(
genre: str = None,
download_all: bool = False,
direct_download_url: str = None,
command_list: List[str] = None,
process_metadata_anyway: bool = False,
):
shell = Downloader(genre=genre, process_metadata_anyway=process_metadata_anyway)
if command_list is not None:
for command in command_list:
shell.process_input(command)
return
if direct_download_url is not None:
if shell.download(direct_download_url, download_all=download_all):
return
shell.mainloop()

View File

@@ -0,0 +1,185 @@
from typing import Dict, List
from dataclasses import dataclass
from collections import defaultdict
from ..utils import cli_function
from ...objects import Country
from ...utils import config, write_config
from ...connection import Connection
@dataclass
class Instance:
"""
Attributes which influence the quality of an instance:
- users
"""
name: str
uri: str
regions: List[Country]
users: int = 0
def __str__(self) -> str:
return f"{self.name} with {self.users} users."
class FrontendInstance:
SETTING_NAME = "placeholder"
def __init__(self) -> None:
self.region_instances: Dict[Country, List[Instance]] = defaultdict(list)
self.all_instances: List[Instance] = []
def add_instance(self, instance: Instance):
self.all_instances.append(instance)
config.set_name_to_value("youtube_url", instance.uri)
for region in instance.regions:
self.region_instances[region].append(instance)
def fetch(self, silent: bool = False):
if not silent:
print(f"Downloading {type(self).__name__} instances...")
def set_instance(self, instance: Instance):
config.set_name_to_value(self.SETTING_NAME, instance.uri)
write_config()
def _choose_country(self) -> List[Instance]:
print("Input the country code, an example would be \"US\"")
print('\n'.join(f'{region.name} ({region.alpha_2})' for region in self.region_instances))
print()
available_instances = set(i.alpha_2 for i in self.region_instances)
chosen_region = ""
while chosen_region not in available_instances:
chosen_region = input("nearest country: ").strip().upper()
return self.region_instances[Country.by_alpha_2(chosen_region)]
def choose(self, silent: bool = False):
instances = self.all_instances if silent else self._choose_country()
instances.sort(key=lambda x: x.users, reverse=True)
if silent:
self.set_instance(instances[0])
return
# output the options
print("Choose your instance (input needs to be a digit):")
for i, instance in enumerate(instances):
print(f"{i}) {instance}")
print()
# ask for index
index = ""
while not index.isdigit() or int(index) >= len(instances):
index = input("> ").strip()
instance = instances[int(index)]
print()
print(f"Setting the instance to {instance}")
self.set_instance(instance)
class Invidious(FrontendInstance):
SETTING_NAME = "invidious_instance"
def __init__(self) -> None:
self.connection = Connection(host="https://api.invidious.io/")
self.endpoint = "https://api.invidious.io/instances.json"
super().__init__()
def _process_instance(self, all_instance_data: dict):
instance_data = all_instance_data[1]
stats = instance_data["stats"]
if not instance_data["api"]:
return
if instance_data["type"] != "https":
return
region = instance_data["region"]
instance = Instance(
name=all_instance_data[0],
uri=instance_data["uri"],
regions=[Country.by_alpha_2(region)],
users=stats["usage"]["users"]["total"]
)
self.add_instance(instance)
def fetch(self, silent: bool):
r = self.connection.get(self.endpoint)
if r is None:
return
for instance in r.json():
self._process_instance(all_instance_data=instance)
class Piped(FrontendInstance):
SETTING_NAME = "piped_instance"
def __init__(self) -> None:
self.connection = Connection(host="https://raw.githubusercontent.com")
super().__init__()
def process_instance(self, instance_data: str):
cells = instance_data.split(" | ")
instance = Instance(
name=cells[0].strip(),
uri=cells[1].strip(),
regions=[Country.by_emoji(flag) for flag in cells[2].split(", ")]
)
self.add_instance(instance)
def fetch(self, silent: bool = False):
r = self.connection.get("https://raw.githubusercontent.com/wiki/TeamPiped/Piped-Frontend/Instances.md")
if r is None:
return
process = False
for line in r.content.decode("utf-8").split("\n"):
line = line.strip()
if line != "" and process:
self.process_instance(line)
if line.startswith("---"):
process = True
class FrontendSelection:
def __init__(self):
self.invidious = Invidious()
self.piped = Piped()
def choose(self, silent: bool = False):
self.invidious.fetch(silent)
self.invidious.choose(silent)
self.piped.fetch(silent)
self.piped.choose(silent)
@cli_function
def set_frontend(silent: bool = False):
shell = FrontendSelection()
shell.choose(silent=silent)

View File

@@ -0,0 +1,71 @@
from ..utils import cli_function
from ...utils.config import config, write_config
from ...utils import exception
def modify_setting(_name: str, _value: str, invalid_ok: bool = True) -> bool:
try:
config.set_name_to_value(_name, _value)
except exception.config.SettingException as e:
if invalid_ok:
print(e)
return False
else:
raise e
write_config()
return True
def print_settings():
for i, attribute in enumerate(config):
print(f"{i:0>2}: {attribute.name}={attribute.value}")
def modify_setting_by_index(index: int) -> bool:
attribute = list(config)[index]
print()
print(attribute)
input__ = input(f"{attribute.name}=")
if not modify_setting(attribute.name, input__.strip()):
return modify_setting_by_index(index)
return True
def modify_setting_by_index(index: int) -> bool:
attribute = list(config)[index]
print()
print(attribute)
input__ = input(f"{attribute.name}=")
if not modify_setting(attribute.name, input__.strip()):
return modify_setting_by_index(index)
return True
@cli_function
def settings(
name: str = None,
value: str = None,
):
if name is not None and value is not None:
modify_setting(name, value, invalid_ok=True)
return
while True:
print_settings()
input_ = input("Id of setting to modify: ")
print()
if input_.isdigit() and int(input_) < len(config):
if modify_setting_by_index(int(input_)):
return
else:
print("Please input a valid ID.")
print()

View File

@@ -0,0 +1,32 @@
from ..utils.shared import get_random_message
def cli_function(function):
def wrapper(*args, **kwargs):
print_cute_message()
print()
try:
function(*args, **kwargs)
except KeyboardInterrupt:
print("\n\nRaise an issue if I fucked up:\nhttps://github.com/HeIIow2/music-downloader/issues")
finally:
print()
print_cute_message()
print("See you soon! :3")
exit()
return wrapper
def print_cute_message():
message = get_random_message()
try:
print(message)
except UnicodeEncodeError:
message = str(c for c in message if 0 < ord(c) < 127)
print(message)

View File

@@ -0,0 +1 @@
from .connection import Connection

View File

@@ -0,0 +1,295 @@
import time
from typing import List, Dict, Callable, Optional, Set
from urllib.parse import urlparse, urlunsplit, ParseResult
import logging
import requests
from tqdm import tqdm
from .rotating import RotatingProxy
from ..utils.shared import PROXIES_LIST, CHUNK_SIZE
from ..utils.support_classes import DownloadResult
from ..objects import Target
class Connection:
def __init__(
self,
host: str,
proxies: List[dict] = None,
tries: int = (len(PROXIES_LIST) + 1) * 4,
timeout: int = 7,
logger: logging.Logger = logging.getLogger("connection"),
header_values: Dict[str, str] = None,
accepted_response_codes: Set[int] = None,
semantic_not_found: bool = True
):
if proxies is None:
proxies = PROXIES_LIST
if header_values is None:
header_values = dict()
self.HEADER_VALUES = header_values
self.LOGGER = logger
self.HOST = urlparse(host)
self.TRIES = tries
self.TIMEOUT = timeout
self.rotating_proxy = RotatingProxy(proxy_list=proxies)
self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200}
self.SEMANTIC_NOT_FOUND = semantic_not_found
self.session = requests.Session()
self.session.headers = self.get_header(**self.HEADER_VALUES)
self.session.proxies = self.rotating_proxy.current_proxy
def base_url(self, url: ParseResult = None):
if url is None:
url = self.HOST
return urlunsplit((url.scheme, url.netloc, "", "", ""))
def get_header(self, **header_values) -> Dict[str, str]:
return {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"Connection": "keep-alive",
# "Host": self.HOST.netloc,
"Referer": self.base_url(),
**header_values
}
def rotate(self):
self.session.proxies = self.rotating_proxy.rotate()
def _update_headers(
self,
headers: Optional[dict],
refer_from_origin: bool,
url: ParseResult
) -> Dict[str, str]:
if headers is None:
headers = dict()
if not refer_from_origin:
headers["Referer"] = self.base_url(url=url)
return headers
def _request(
self,
request: Callable,
try_count: int,
accepted_response_codes: set,
url: str,
timeout: float,
headers: dict,
refer_from_origin: bool = True,
raw_url: bool = False,
wait_on_403: bool = True,
**kwargs
) -> Optional[requests.Response]:
if try_count >= self.TRIES:
return
if timeout is None:
timeout = self.TIMEOUT
parsed_url = urlparse(url)
headers = self._update_headers(
headers=headers,
refer_from_origin=refer_from_origin,
url=parsed_url
)
request_url = parsed_url.geturl() if not raw_url else url
connection_failed = False
try:
r: requests.Response = request(request_url, timeout=timeout, headers=headers, **kwargs)
if r.status_code in accepted_response_codes:
return r
if self.SEMANTIC_NOT_FOUND and r.status_code == 404:
self.LOGGER.warning(f"Couldn't find url (404): {request_url}")
return None
except requests.exceptions.Timeout:
self.LOGGER.warning(f"Request timed out at \"{request_url}\": ({try_count}-{self.TRIES})")
connection_failed = True
except requests.exceptions.ConnectionError:
self.LOGGER.warning(f"Couldn't connect to \"{request_url}\": ({try_count}-{self.TRIES})")
connection_failed = True
if not connection_failed:
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} "
f"at {url}. ({try_count}-{self.TRIES})")
self.LOGGER.debug(r.content)
if wait_on_403:
self.LOGGER.warning(f"Waiting for 5 seconds.")
time.sleep(5)
self.rotate()
return self._request(
request=request,
try_count=try_count+1,
accepted_response_codes=accepted_response_codes,
url=url,
timeout=timeout,
headers=headers,
**kwargs
)
def get(
self,
url: str,
refer_from_origin: bool = True,
stream: bool = False,
accepted_response_codes: set = None,
timeout: float = None,
headers: dict = None,
raw_url: bool = False,
**kwargs
) -> Optional[requests.Response]:
if accepted_response_codes is None:
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES
r = self._request(
request=self.session.get,
try_count=0,
accepted_response_codes=accepted_response_codes,
url=url,
timeout=timeout,
headers=headers,
raw_url=raw_url,
refer_from_origin=refer_from_origin,
stream=stream,
**kwargs
)
if r is None:
self.LOGGER.warning(f"Max attempts ({self.TRIES}) exceeded for: GET:{url}")
return r
def post(
self,
url: str,
json: dict,
refer_from_origin: bool = True,
stream: bool = False,
accepted_response_codes: set = None,
timeout: float = None,
headers: dict = None,
raw_url: bool = False,
**kwargs
) -> Optional[requests.Response]:
r = self._request(
request=self.session.post,
try_count=0,
accepted_response_codes=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
url=url,
timeout=timeout,
headers=headers,
refer_from_origin=refer_from_origin,
raw_url=raw_url,
json=json,
stream=stream,
**kwargs
)
if r is None:
self.LOGGER.warning(f"Max attempts ({self.TRIES}) exceeded for: GET:{url}")
self.LOGGER.warning(f"payload: {json}")
return r
def stream_into(
self,
url: str,
target: Target,
description: str = "download",
refer_from_origin: bool = True,
accepted_response_codes: set = None,
timeout: float = None,
headers: dict = None,
raw_url: bool = False,
chunk_size: int = CHUNK_SIZE,
try_count: int = 0,
progress: int = 0,
**kwargs
) -> DownloadResult:
if progress > 0:
if headers is None:
headers = dict()
headers["Range"] = f"bytes={target.size}-"
if accepted_response_codes is None:
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES
r = self._request(
request=self.session.get,
try_count=0,
accepted_response_codes=accepted_response_codes,
url=url,
timeout=timeout,
headers=headers,
raw_url=raw_url,
refer_from_origin=refer_from_origin,
stream=True,
**kwargs
)
if r is None:
return DownloadResult(error_message=f"Could not establish connection to: {url}")
target.create_path()
total_size = int(r.headers.get('content-length'))
progress = 0
retry = False
with target.open("ab") as f:
"""
https://en.wikipedia.org/wiki/Kilobyte
> The internationally recommended unit symbol for the kilobyte is kB.
"""
with tqdm(total=total_size-target.size, unit='B', unit_scale=True, unit_divisor=1024, desc=description) as t:
try:
for chunk in r.iter_content(chunk_size=chunk_size):
size = f.write(chunk)
progress += size
t.update(size)
except requests.exceptions.ConnectionError:
if try_count >= self.TRIES:
self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.")
return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunksize might help.")
self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})")
retry = True
if total_size > progress:
retry = True
if retry:
self.LOGGER.warning(f"Retrying stream...")
accepted_response_codes.add(206)
return self.stream_into(
url = url,
target = target,
description = description,
try_count=try_count+1,
progress=progress,
accepted_response_codes=accepted_response_codes,
timeout=timeout,
headers=headers,
raw_url=raw_url,
refer_from_origin=refer_from_origin,
chunk_size=chunk_size,
**kwargs
)
return DownloadResult()

View File

@@ -0,0 +1,43 @@
from typing import Dict, List
import requests
class RotatingObject:
"""
This will be used for RotatingProxies and invidious instances.
"""
def __init__(self, object_list: list):
self._object_list: list = object_list
if len(self._object_list) <= 0:
raise ValueError("There needs to be at least one item in a Rotating structure.")
self._current_index = 0
@property
def object(self):
return self._object_list[self._current_index]
def __len__(self):
return len(self._object_list)
@property
def next(self):
self._current_index = (self._current_index + 1) % len(self._object_list)
return self._object_list[self._current_index]
class RotatingProxy(RotatingObject):
def __init__(self, proxy_list: List[Dict[str, str]]):
super().__init__(
proxy_list if len(proxy_list) > 0 else [None]
)
def rotate(self) -> Dict[str, str]:
return self.next
@property
def current_proxy(self) -> Dict[str, str]:
return super().object

View File

@@ -0,0 +1,97 @@
from typing import Tuple, Type, Dict, List, Set
from .results import SearchResults
from ..objects import DatabaseObject, Source
from ..utils.enums.source import SourcePages
from ..utils.support_classes import Query, DownloadResult
from ..utils.exception.download import UrlNotFoundException
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, INDEPENDENT_DB_OBJECTS
ALL_PAGES: Set[Type[Page]] = {
EncyclopaediaMetallum,
Musify,
YouTube,
}
AUDIO_PAGES: Set[Type[Page]] = {
Musify,
YouTube,
}
SHADY_PAGES: Set[Type[Page]] = {
Musify,
}
class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None:
# initialize all page instances
self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourcePages, Type[Page]] = dict()
exclude_pages = exclude_pages if exclude_pages is not None else set()
if exclude_shady:
exclude_pages = exclude_pages.union(SHADY_PAGES)
if not exclude_pages.issubset(ALL_PAGES):
raise ValueError(f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}")
def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]:
return tuple(sorted(page_set, key=lambda page: page.__name__))
self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages)
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set)
self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES)
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
for page_type in self.pages:
self._page_instances[page_type] = page_type()
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def search(self, query: Query) -> SearchResults:
result = SearchResults()
for page_type in self.pages:
result.add(
page=page_type,
search_result=self._page_instances[page_type].search(query=query)
)
return result
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return music_object
for source_page in music_object.source_collection.source_pages:
page_type = self._source_to_page[source_page]
if page_type in self._pages_set:
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
return music_object
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
_page_types = set(self._source_to_page[src] for src in music_object.source_collection.source_pages)
audio_pages = self._audio_pages_set.intersection(_page_types)
for download_page in audio_pages:
return self._page_instances[download_page].download(music_object=music_object, genre=genre, download_all=download_all, process_metadata_anyway=process_metadata_anyway)
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]:
source = Source.match_url(url, SourcePages.MANUAL)
if source is None:
raise UrlNotFoundException(url=url)
_actual_page = self._source_to_page[source.page_enum]
return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level)

View File

@@ -0,0 +1,101 @@
from typing import Tuple, Type, Dict, List, Generator, Union
from dataclasses import dataclass
from ..objects import DatabaseObject
from ..utils.enums.source import SourcePages
from ..pages import Page, EncyclopaediaMetallum, Musify
@dataclass
class Option:
index: int
music_object: DatabaseObject
class Results:
def __init__(self) -> None:
self._by_index: Dict[int, DatabaseObject] = dict()
self._page_by_index: Dict[int: Type[Page]] = dict()
def __iter__(self) -> Generator[DatabaseObject, None, None]:
for option in self.formated_generator():
if isinstance(option, Option):
yield option.music_object
def formated_generator(self, max_items_per_page: int = 10) -> Generator[Union[Type[Page], Option], None, None]:
self._by_index = dict()
self._page_by_index = dict()
def get_music_object_by_index(self, index: int) -> Tuple[Type[Page], DatabaseObject]:
# if this throws a key error, either the formated generator needs to be iterated, or the option doesn't exist.
return self._page_by_index[index], self._by_index[index]
def delete_details(self, exclude_index: int):
for index, music_object in self._by_index.items():
if index == exclude_index:
continue
music_object.strip_details()
class SearchResults(Results):
def __init__(
self,
pages: Tuple[Type[Page], ...] = None
) -> None:
super().__init__()
self.pages = pages or []
# this would initialize a list for every page, which I don't think I want
# self.results = Dict[Type[Page], List[DatabaseObject]] = {page: [] for page in self.pages}
self.results: Dict[Type[Page], List[DatabaseObject]] = {}
def add(self, page: Type[Page], search_result: List[DatabaseObject]):
"""
adds a list of found music objects to the according page
WARNING: if a page already has search results, they are just gonna be overwritten
"""
self.results[page] = search_result
def get_page_results(self, page: Type[Page]) -> "PageResults":
return PageResults(page, self.results.get(page, []))
def formated_generator(self, max_items_per_page: int = 10):
super().formated_generator()
i = 0
for page in self.results:
yield page
j = 0
for option in self.results[page]:
yield Option(i, option)
self._by_index[i] = option
self._page_by_index[i] = page
i += 1
j += 1
if j >= max_items_per_page:
break
class PageResults(Results):
def __init__(self, page: Type[Page], results: List[DatabaseObject]) -> None:
super().__init__()
self.page: Type[Page] = page
self.results: List[DatabaseObject] = results
def formated_generator(self, max_items_per_page: int = 10):
super().formated_generator()
i = 0
yield self.page
for option in self.results:
yield Option(i, option)
self._by_index[i] = option
self._page_by_index[i] = self.page
i += 1

View File

@@ -1,2 +0,0 @@

View File

@@ -1,106 +0,0 @@
from typing import List
import mutagen.id3
import requests
import os.path
from mutagen.easyid3 import EasyID3
from pydub import AudioSegment
from ..utils.shared import *
from .sources import (
youtube,
musify,
local_files
)
from ..database.song import (
Song as song_object,
Target as target_object,
Source as source_object
)
from ..database.temp_database import temp_database
logger = DOWNLOAD_LOGGER
# maps the classes to get data from to the source name
sources = {
'Youtube': youtube.Youtube,
'Musify': musify.Musify
}
"""
https://en.wikipedia.org/wiki/ID3
https://mutagen.readthedocs.io/en/latest/user/id3.html
# to get all valid keys
from mutagen.easyid3 import EasyID3
print("\n".join(EasyID3.valid_keys.keys()))
print(EasyID3.valid_keys.keys())
"""
class Download:
def __init__(self):
Download.fetch_audios(temp_database.get_tracks_to_download())
@classmethod
def fetch_audios(cls, songs: List[song_object], override_existing: bool = False):
for song in songs:
if not cls.path_stuff(song.target) and not override_existing:
cls.write_metadata(song)
continue
is_downloaded = False
for source in song.sources:
download_success = Download.download_from_src(song, source)
if download_success == -1:
logger.warning(f"couldn't download {song['url']} from {song['src']}")
else:
is_downloaded = True
break
if is_downloaded:
cls.write_metadata(song)
@classmethod
def download_from_src(cls, song: song_object, source: source_object):
if source.src not in sources:
raise ValueError(f"source {source.src} seems to not exist")
source_subclass = sources[source.src]
return source_subclass.fetch_audio(song, source)
@classmethod
def write_metadata(cls, song: song_object):
if not os.path.exists(song.target.file):
logger.warning(f"file {song.target.file} doesn't exist")
return False
# only convert the file to the proper format if mutagen doesn't work with it due to time
try:
audiofile = EasyID3(song.target.file)
except mutagen.id3.ID3NoHeaderError:
AudioSegment.from_file(song.target.file).export(song.target.file, format="mp3")
audiofile = EasyID3(song.target.file)
for key, value in song.get_metadata():
if type(value) != list:
value = str(value)
audiofile[key] = value
logger.info("saving")
audiofile.save(song.target.file, v1=2)
@classmethod
def path_stuff(cls, target: target_object) -> bool:
# returns true if it should be downloaded
if os.path.exists(target.file):
logger.info(f"'{target.file}' does already exist, thus not downloading.")
return False
os.makedirs(target.path, exist_ok=True)
return True
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
s = requests.Session()
Download()

View File

@@ -1,70 +0,0 @@
from typing import List
from ..utils.shared import *
from .sources import (
youtube,
musify,
local_files
)
from ..database.song import Song as song_object
from ..database.temp_database import temp_database
logger = URL_DOWNLOAD_LOGGER
# maps the classes to get data from to the source name
sources = {
'Youtube': youtube.Youtube,
'Musify': musify.Musify
}
class Download:
def __init__(self) -> None:
for song in temp_database.get_tracks_without_src():
id_ = song['id']
if os.path.exists(song.target.file):
logger.info(f"skipping the fetching of the download links, cuz {song.target.file} already exists.")
continue
success = False
for src in AUDIO_SOURCES:
res = Download.fetch_from_src(song, src)
if res is not None:
success = True
Download.add_url(res, src, id_)
if not success:
logger.warning(f"Didn't find any sources for {song}")
@classmethod
def fetch_sources(cls, songs: List[song_object], skip_existing_files: bool = False):
for song in songs:
if song.target.exists_on_disc and skip_existing_files:
logger.info(f"skipping the fetching of the download links, cuz {song.target.file} already exists.")
continue
success = False
for src in AUDIO_SOURCES:
res = cls.fetch_from_src(song, src)
if res is not None:
success = True
cls.add_url(res, src, song.id)
if not success:
logger.warning(f"Didn't find any sources for {song}")
@classmethod
def fetch_from_src(cls, song, src):
if src not in sources:
raise ValueError(f"source {src} seems to not exist")
source_subclass = sources[src]
return source_subclass.fetch_source(song)
@classmethod
def add_url(cls, url: str, src: str, id_: str):
temp_database.set_download_data(id_, url, src)
if __name__ == "__main__":
download = Download()

View File

@@ -1,7 +0,0 @@
from . import (
metadata_search,
metadata_fetch
)
MetadataSearch = metadata_search.Search
MetadataDownload = metadata_fetch.MetadataDownloader

View File

@@ -1,345 +0,0 @@
from src.music_kraken.utils.shared import *
from src.music_kraken.utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
from src.music_kraken.database.temp_database import temp_database
from typing import List
import musicbrainzngs
import logging
# I don't know if it would be feesable to set up my own mb instance
# https://github.com/metabrainz/musicbrainz-docker
# IMPORTANT DOCUMENTATION WHICH CONTAINS FOR EXAMPLE THE INCLUDES
# https://python-musicbrainzngs.readthedocs.io/en/v0.7.1/api/#getting-data
logger = METADATA_DOWNLOAD_LOGGER
class MetadataDownloader:
def __init__(self):
pass
class Artist:
def __init__(
self,
musicbrainz_artistid: str,
release_groups: List = [],
new_release_groups: bool = True
):
"""
release_groups: list
"""
self.release_groups = release_groups
self.musicbrainz_artistid = musicbrainz_artistid
try:
result = musicbrainzngs.get_artist_by_id(self.musicbrainz_artistid, includes=["release-groups", "releases"])
except musicbrainzngs.musicbrainz.NetworkError:
return
artist_data = get_elem_from_obj(result, ['artist'], return_if_none={})
self.artist = get_elem_from_obj(artist_data, ['name'])
self.save()
# STARTING TO FETCH' RELEASE GROUPS. IMPORTANT: DON'T WRITE ANYTHING BESIDES THAT HERE
if not new_release_groups:
return
# sort all release groups by date and add album sort to have them in chronological order.
release_groups = artist_data['release-group-list']
for i, release_group in enumerate(release_groups):
release_groups[i]['first-release-date'] = parse_music_brainz_date(release_group['first-release-date'])
release_groups.sort(key=lambda x: x['first-release-date'])
for i, release_group in enumerate(release_groups):
self.release_groups.append(MetadataDownloader.ReleaseGroup(
musicbrainz_releasegroupid=release_group['id'],
artists=[self],
albumsort=i + 1
))
def __str__(self):
newline = "\n"
return f"artist: \"{self.artist}\""
def save(self):
logger.info(f"caching {self}")
temp_database.add_artist(
musicbrainz_artistid=self.musicbrainz_artistid,
artist=self.artist
)
class ReleaseGroup:
def __init__(
self,
musicbrainz_releasegroupid: str,
artists=[],
albumsort: int = None,
only_download_distinct_releases: bool = True,
fetch_further: bool = True
):
"""
split_artists: list -> if len > 1: album_artist=VariousArtists
releases: list
"""
self.musicbrainz_releasegroupid = musicbrainz_releasegroupid
self.artists = artists
self.releases = []
try:
result = musicbrainzngs.get_release_group_by_id(musicbrainz_releasegroupid,
includes=["artist-credits", "releases"])
except musicbrainzngs.musicbrainz.NetworkError:
return
release_group_data = get_elem_from_obj(result, ['release-group'], return_if_none={})
artist_datas = get_elem_from_obj(release_group_data, ['artist-credit'], return_if_none={})
release_datas = get_elem_from_obj(release_group_data, ['release-list'], return_if_none={})
# only for printing the release
self.name = get_elem_from_obj(release_group_data, ['title'])
for artist_data in artist_datas:
artist_id = get_elem_from_obj(artist_data, ['artist', 'id'])
if artist_id is None:
continue
self.append_artist(artist_id)
self.albumartist = "Various Artists" if len(self.artists) > 1 else self.artists[0].artist
self.album_artist_id = None if self.albumartist == "Various Artists" else self.artists[
0].musicbrainz_artistid
self.albumsort = albumsort
self.musicbrainz_albumtype = get_elem_from_obj(release_group_data, ['primary-type'])
self.compilation = "1" if self.musicbrainz_albumtype == "Compilation" else None
self.save()
if not fetch_further:
return
if only_download_distinct_releases:
self.append_distinct_releases(release_datas)
else:
self.append_all_releases(release_datas)
def __str__(self):
return f"release group: \"{self.name}\""
def save(self):
logger.info(f"caching {self}")
temp_database.add_release_group(
musicbrainz_releasegroupid=self.musicbrainz_releasegroupid,
artist_ids=[artist.musicbrainz_artistid for artist in self.artists],
albumartist=self.albumartist,
albumsort=self.albumsort,
musicbrainz_albumtype=self.musicbrainz_albumtype,
compilation=self.compilation,
album_artist_id=self.album_artist_id
)
def append_artist(self, artist_id: str):
for existing_artist in self.artists:
if artist_id == existing_artist.musicbrainz_artistid:
return existing_artist
new_artist = MetadataDownloader.Artist(artist_id, release_groups=[self],
new_release_groups=False)
self.artists.append(new_artist)
return new_artist
def append_release(self, release_data: dict):
musicbrainz_albumid = get_elem_from_obj(release_data, ['id'])
if musicbrainz_albumid is None:
return
self.releases.append(
MetadataDownloader.Release(musicbrainz_albumid, release_group=self))
def append_distinct_releases(self, release_datas: List[dict]):
titles = {}
for release_data in release_datas:
title = get_elem_from_obj(release_data, ['title'])
if title is None:
continue
titles[title] = release_data
for key in titles:
self.append_release(titles[key])
def append_all_releases(self, release_datas: List[dict]):
for release_data in release_datas:
self.append_release(release_data)
class Release:
def __init__(
self,
musicbrainz_albumid: str,
release_group=None,
fetch_furter: bool = True
):
"""
release_group: ReleaseGroup
tracks: list
"""
self.musicbrainz_albumid = musicbrainz_albumid
self.release_group = release_group
self.tracklist = []
try:
result = musicbrainzngs.get_release_by_id(self.musicbrainz_albumid,
includes=["recordings", "labels", "release-groups"])
except musicbrainzngs.musicbrainz.NetworkError:
return
release_data = get_elem_from_obj(result, ['release'], return_if_none={})
label_data = get_elem_from_obj(release_data, ['label-info-list'], return_if_none={})
recording_datas = get_elem_from_obj(release_data, ['medium-list', 0, 'track-list'], return_if_none=[])
release_group_data = get_elem_from_obj(release_data, ['release-group'], return_if_none={})
if self.release_group is None:
self.release_group = MetadataDownloader.ReleaseGroup(
musicbrainz_releasegroupid=get_elem_from_obj(
release_group_data, ['id']),
fetch_further=False)
self.title = get_elem_from_obj(release_data, ['title'])
self.copyright = get_elem_from_obj(label_data, [0, 'label', 'name'])
self.album_status = get_elem_from_obj(release_data, ['status'])
self.language = get_elem_from_obj(release_data, ['text-representation', 'language'])
self.year = get_elem_from_obj(release_data, ['date'], lambda x: x.split("-")[0])
self.date = get_elem_from_obj(release_data, ['date'])
self.country = get_elem_from_obj(release_data, ['country'])
self.barcode = get_elem_from_obj(release_data, ['barcode'])
self.save()
if fetch_furter:
self.append_recordings(recording_datas)
def __str__(self):
return f"release: {self.title} ©{self.copyright} {self.album_status}"
def save(self):
logger.info(f"caching {self}")
temp_database.add_release(
musicbrainz_albumid=self.musicbrainz_albumid,
release_group_id=self.release_group.musicbrainz_releasegroupid,
title=self.title,
copyright_=self.copyright,
album_status=self.album_status,
language=self.language,
year=self.year,
date=self.date,
country=self.country,
barcode=self.barcode
)
def append_recordings(self, recording_datas: dict):
for i, recording_data in enumerate(recording_datas):
musicbrainz_releasetrackid = get_elem_from_obj(recording_data, ['recording', 'id'])
if musicbrainz_releasetrackid is None:
continue
self.tracklist.append(
MetadataDownloader.Track(musicbrainz_releasetrackid, self,
track_number=str(i + 1)))
class Track:
def __init__(
self,
musicbrainz_releasetrackid: str,
release=None,
track_number: str = None
):
"""
release: Release
feature_artists: list
"""
self.musicbrainz_releasetrackid = musicbrainz_releasetrackid
self.release = release
self.artists = []
self.track_number = track_number
try:
result = musicbrainzngs.get_recording_by_id(self.musicbrainz_releasetrackid,
includes=["artists", "releases", "recording-rels", "isrcs",
"work-level-rels"])
except musicbrainzngs.musicbrainz.NetworkError:
return
recording_data = result['recording']
release_data = get_elem_from_obj(recording_data, ['release-list', -1])
if self.release is None:
self.release = MetadataDownloader.Release(get_elem_from_obj(release_data, ['id']), fetch_furter=False)
for artist_data in get_elem_from_obj(recording_data, ['artist-credit'], return_if_none=[]):
self.append_artist(get_elem_from_obj(artist_data, ['artist', 'id']))
self.isrc = get_elem_from_obj(recording_data, ['isrc-list', 0])
self.title = recording_data['title']
self.lenth = get_elem_from_obj(recording_data, ['length'])
self.save()
def __str__(self):
return f"track: \"{self.title}\" {self.isrc or ''}"
def save(self):
logger.info(f"caching {self}")
temp_database.add_track(
musicbrainz_releasetrackid=self.musicbrainz_releasetrackid,
musicbrainz_albumid=self.release.musicbrainz_albumid,
feature_aritsts=[artist.musicbrainz_artistid for artist in self.artists],
tracknumber=self.track_number,
track=self.title,
isrc=self.isrc,
length=int(self.lenth)
)
def append_artist(self, artist_id: str):
if artist_id is None:
return
for existing_artist in self.artists:
if artist_id == existing_artist.musicbrainz_artistid:
return existing_artist
new_artist = MetadataDownloader.Artist(artist_id, new_release_groups=False)
self.artists.append(new_artist)
return new_artist
def download(self, option: dict):
type_ = option['type']
mb_id = option['id']
if type_ == "artist":
return self.Artist(mb_id)
if type_ == "release_group":
return self.ReleaseGroup(mb_id)
if type_ == "release":
return self.Release(mb_id)
if type_ == "recording":
return self.Track(mb_id)
logger.error(f"download type {type_} doesn't exists :(")
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(os.path.join(TEMP_DIR, LOG_FILE)),
logging.StreamHandler()
]
)
downloader = MetadataDownloader()
downloader.download({'id': 'd2006339-9e98-4624-a386-d503328eb854', 'type': 'recording'})
downloader.download({'id': 'cdd16860-35fd-46af-bd8c-5de7b15ebc31', 'type': 'release'})
# download({'id': '4b9af532-ef7e-42ab-8b26-c466327cb5e0', 'type': 'release'})
#download({'id': 'c24ed9e7-6df9-44de-8570-975f1a5a75d1', 'type': 'track'})

View File

@@ -1,364 +0,0 @@
from typing import List
import musicbrainzngs
from src.music_kraken.utils.shared import *
from src.music_kraken.utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
logger = SEARCH_LOGGER
MAX_PARAMETERS = 3
OPTION_TYPES = ['artist', 'release_group', 'release', 'recording']
class Option:
def __init__(self, type_: str, id_: str, name: str, additional_info: str = "") -> None:
# print(type_, id_, name)
if type_ not in OPTION_TYPES:
raise ValueError(f"type: {type_} doesn't exist. Legal Values: {OPTION_TYPES}")
self.type = type_
self.name = name
self.id = id_
self.additional_info = additional_info
def __getitem__(self, item):
map_ = {
"id": self.id,
"type": self.type,
"kind": self.type,
"name": self.name
}
return map_[item]
def __repr__(self) -> str:
type_repr = {
'artist': 'artist\t\t',
'release_group': 'release group\t',
'release': 'release\t\t',
'recording': 'recording\t'
}
return f"{type_repr[self.type]}: \"{self.name}\"{self.additional_info}"
class MultipleOptions:
def __init__(self, option_list: List[Option]) -> None:
self.option_list = option_list
def __repr__(self) -> str:
return "\n".join([f"{str(i).zfill(2)}) {choice.__repr__()}" for i, choice in enumerate(self.option_list)])
class Search:
def __init__(self) -> None:
self.options_history = []
self.current_option: Option
def append_new_choices(self, new_choices: List[Option]) -> MultipleOptions:
self.options_history.append(new_choices)
return MultipleOptions(new_choices)
def get_previous_options(self):
self.options_history.pop(-1)
return MultipleOptions(self.options_history[-1])
@staticmethod
def fetch_new_options_from_artist(artist: Option):
"""
returning list of artist and every release group
"""
result = musicbrainzngs.get_artist_by_id(artist.id, includes=["release-groups", "releases"])
artist_data = get_elem_from_obj(result, ['artist'], return_if_none={})
result = [artist]
# sort all release groups by date and add album sort to have them in chronological order.
release_group_list = artist_data['release-group-list']
for i, release_group in enumerate(release_group_list):
release_group_list[i]['first-release-date'] = parse_music_brainz_date(release_group['first-release-date'])
release_group_list.sort(key=lambda x: x['first-release-date'])
release_group_list = [Option("release_group", get_elem_from_obj(release_group_, ['id']),
get_elem_from_obj(release_group_, ['title']),
additional_info=f" ({get_elem_from_obj(release_group_, ['type'])}) from {get_elem_from_obj(release_group_, ['first-release-date'])}")
for release_group_ in release_group_list]
result.extend(release_group_list)
return result
@staticmethod
def fetch_new_options_from_release_group(release_group: Option):
"""
returning list including the artists, the releases and the tracklist of the first release
"""
results = []
result = musicbrainzngs.get_release_group_by_id(release_group.id,
includes=["artist-credits", "releases"])
release_group_data = get_elem_from_obj(result, ['release-group'], return_if_none={})
artist_datas = get_elem_from_obj(release_group_data, ['artist-credit'], return_if_none={})
release_datas = get_elem_from_obj(release_group_data, ['release-list'], return_if_none={})
# appending all the artists to results
for artist_data in artist_datas:
results.append(Option('artist', get_elem_from_obj(artist_data, ['artist', 'id']),
get_elem_from_obj(artist_data, ['artist', 'name'])))
# appending initial release group
results.append(release_group)
# appending all releases
first_release = None
for i, release_data in enumerate(release_datas):
results.append(
Option('release', get_elem_from_obj(release_data, ['id']), get_elem_from_obj(release_data, ['title']),
additional_info=f" ({get_elem_from_obj(release_data, ['status'])})"))
if i == 0:
first_release = results[-1]
# append tracklist of first release
if first_release is not None:
results.extend(Search.fetch_new_options_from_release(first_release, only_tracklist=True))
return results
@staticmethod
def fetch_new_options_from_release(release: Option, only_tracklist: bool = False):
"""
artists
release group
release
tracklist
"""
results = []
result = musicbrainzngs.get_release_by_id(release.id,
includes=["recordings", "labels", "release-groups", "artist-credits"])
release_data = get_elem_from_obj(result, ['release'], return_if_none={})
label_data = get_elem_from_obj(release_data, ['label-info-list'], return_if_none={})
recording_datas = get_elem_from_obj(release_data, ['medium-list', 0, 'track-list'], return_if_none=[])
release_group_data = get_elem_from_obj(release_data, ['release-group'], return_if_none={})
artist_datas = get_elem_from_obj(release_data, ['artist-credit'], return_if_none={})
# appending all the artists to results
for artist_data in artist_datas:
results.append(Option('artist', get_elem_from_obj(artist_data, ['artist', 'id']),
get_elem_from_obj(artist_data, ['artist', 'name'])))
# appending the according release group
results.append(Option("release_group", get_elem_from_obj(release_group_data, ['id']),
get_elem_from_obj(release_group_data, ['title']),
additional_info=f" ({get_elem_from_obj(release_group_data, ['type'])}) from {get_elem_from_obj(release_group_data, ['first-release-date'])}"))
# appending the release
results.append(release)
# appending the tracklist, but first putting it in a list, in case of only_tracklist being True to
# return this instead
tracklist = []
for i, recording_data in enumerate(recording_datas):
recording_data = recording_data['recording']
tracklist.append(Option('recording', get_elem_from_obj(recording_data, ['id']),
get_elem_from_obj(recording_data, ['title']),
f" ({get_elem_from_obj(recording_data, ['length'])}) from {get_elem_from_obj(recording_data, ['artist-credit-phrase'])}"))
if only_tracklist:
return tracklist
results.extend(tracklist)
return results
@staticmethod
def fetch_new_options_from_record(recording: Option):
"""
artists, release, record
"""
results = []
result = musicbrainzngs.get_recording_by_id(recording.id, includes=["artists", "releases"])
recording_data = result['recording']
release_datas = get_elem_from_obj(recording_data, ['release-list'])
artist_datas = get_elem_from_obj(recording_data, ['artist-credit'], return_if_none={})
# appending all the artists to results
for artist_data in artist_datas:
results.append(Option('artist', get_elem_from_obj(artist_data, ['artist', 'id']),
get_elem_from_obj(artist_data, ['artist', 'name'])))
# appending all releases
for i, release_data in enumerate(release_datas):
results.append(
Option('release', get_elem_from_obj(release_data, ['id']), get_elem_from_obj(release_data, ['title']),
additional_info=f" ({get_elem_from_obj(release_data, ['status'])})"))
results.append(recording)
return results
def fetch_new_options(self) -> MultipleOptions:
if self.current_option is None:
return -1
result = []
if self.current_option.type == 'artist':
result = self.fetch_new_options_from_artist(self.current_option)
elif self.current_option.type == 'release_group':
result = self.fetch_new_options_from_release_group(self.current_option)
elif self.current_option.type == 'release':
result = self.fetch_new_options_from_release(self.current_option)
elif self.current_option.type == 'recording':
result = self.fetch_new_options_from_record(self.current_option)
return self.append_new_choices(result)
def choose(self, index: int) -> MultipleOptions:
if len(self.options_history) == 0:
logging.error("initial query neaded before choosing")
return MultipleOptions([])
latest_options = self.options_history[-1]
if index >= len(latest_options):
logging.error("index outside of options")
return MultipleOptions([])
self.current_option = latest_options[index]
return self.fetch_new_options()
@staticmethod
def search_recording_from_text(artist: str = None, release_group: str = None, recording: str = None,
query: str = None):
result = musicbrainzngs.search_recordings(artist=artist, release=release_group, recording=recording,
query=query)
recording_list = get_elem_from_obj(result, ['recording-list'], return_if_none=[])
resulting_options = [
Option("recording", get_elem_from_obj(recording_, ['id']), get_elem_from_obj(recording_, ['title']),
additional_info=f" of {get_elem_from_obj(recording_, ['release-list', 0, 'title'])} by {get_elem_from_obj(recording_, ['artist-credit', 0, 'name'])}")
for recording_ in recording_list]
return resulting_options
@staticmethod
def search_release_group_from_text(artist: str = None, release_group: str = None, query: str = None):
result = musicbrainzngs.search_release_groups(artist=artist, releasegroup=release_group, query=query)
release_group_list = get_elem_from_obj(result, ['release-group-list'], return_if_none=[])
resulting_options = [Option("release_group", get_elem_from_obj(release_group_, ['id']),
get_elem_from_obj(release_group_, ['title']),
additional_info=f" by {get_elem_from_obj(release_group_, ['artist-credit', 0, 'name'])}")
for release_group_ in release_group_list]
return resulting_options
@staticmethod
def search_artist_from_text(artist: str = None, query: str = None):
result = musicbrainzngs.search_artists(artist=artist, query=query)
artist_list = get_elem_from_obj(result, ['artist-list'], return_if_none=[])
resulting_options = [Option("artist", get_elem_from_obj(artist_, ['id']), get_elem_from_obj(artist_, ['name']),
additional_info=f": {', '.join([i['name'] for i in get_elem_from_obj(artist_, ['tag-list'], return_if_none=[])])}")
for artist_ in artist_list]
return resulting_options
def search_from_text(self, artist: str = None, release_group: str = None, recording: str = None) -> MultipleOptions:
logger.info(
f"searching specified artist: \"{artist}\", release group: \"{release_group}\", recording: \"{recording}\"")
if artist is None and release_group is None and recording is None:
logger.error("either artist, release group or recording has to be set")
return MultipleOptions([])
if recording is not None:
logger.info("search for recording")
results = self.search_recording_from_text(artist=artist, release_group=release_group, recording=recording)
elif release_group is not None:
logger.info("search for release group")
results = self.search_release_group_from_text(artist=artist, release_group=release_group)
else:
logger.info("search for artist")
results = self.search_artist_from_text(artist=artist)
return self.append_new_choices(results)
def search_from_text_unspecified(self, query: str) -> MultipleOptions:
logger.info(f"searching unspecified: \"{query}\"")
results = []
results.extend(self.search_artist_from_text(query=query))
results.extend(self.search_release_group_from_text(query=query))
results.extend(self.search_recording_from_text(query=query))
return self.append_new_choices(results)
def search_from_query(self, query: str) -> MultipleOptions:
if query is None:
return MultipleOptions([])
"""
mit # wird ein neuer Parameter gestartet
der Buchstabe dahinter legt die Art des Parameters fest
"#a Psychonaut 4 #r Tired, Numb and #t Drop by Drop"
if no # is in the query it gets treated as "unspecified query"
:param query:
:return:
"""
if not '#' in query:
return self.search_from_text_unspecified(query)
artist = None
release_group = None
recording = None
query = query.strip()
parameters = query.split('#')
parameters.remove('')
if len(parameters) > MAX_PARAMETERS:
raise ValueError(f"too many parameters. Only {MAX_PARAMETERS} are allowed")
for parameter in parameters:
splitted = parameter.split(" ")
type_ = splitted[0]
input_ = " ".join(splitted[1:]).strip()
if type_ == "a":
artist = input_
continue
if type_ == "r":
release_group = input_
continue
if type_ == "t":
recording = input_
continue
return self.search_from_text(artist=artist, release_group=release_group, recording=recording)
def automated_demo():
search = Search()
search.search_from_text(artist="I Prevail")
# choose an artist
search.choose(0)
# choose a release group
search.choose(9)
# choose a release
search.choose(2)
# choose a recording
search.choose(4)
def interactive_demo():
search = Search()
while True:
input_ = input(
"q to quit, .. for previous options, int for this element, str to search for query, ok to download: ")
input_.strip()
if input_.lower() == "ok":
break
if input_.lower() == "q":
break
if input_.lower() == "..":
search.get_previous_options()
continue
if input_.isdigit():
search.choose(int(input_))
continue
search.search_from_query(input_)
if __name__ == "__main__":
interactive_demo()

View File

@@ -1,4 +0,0 @@
from enum import Enum
class Providers(Enum):
musicbrainz = "musicbrainz"

View File

@@ -1,59 +0,0 @@
from typing import List
import musicbrainzngs
from src.music_kraken.database import (
Artist,
Album,
Song
)
from src.music_kraken.utils.object_handeling import (
get_elem_from_obj
)
def get_artist(flat: bool = False) -> Artist:
# getting the flat artist
artist_object = Artist()
if flat:
return artist_object
# get additional stuff like discography
return artist_object
def get_album(flat: bool = False) -> Album:
# getting the flat album object
album_object = Album()
if flat:
return album_object
# get additional stuff like tracklist
return album_object
def get_song(mb_id: str, flat: bool = False) -> Song:
# getting the flat song object
try:
result = musicbrainzngs.get_recording_by_id(mb_id,
includes=["artists", "releases", "recording-rels", "isrcs",
"work-level-rels"])
except musicbrainzngs.musicbrainz.NetworkError:
return
recording_data = result['recording']
song_object = Song(
mb_id=mb_id,
title=recording_data['title'],
length=get_elem_from_obj(recording_data, ['length']),
isrc=get_elem_from_obj(recording_data, ['isrc-list', 0])
)
if flat:
return song_object
# fetch additional stuff
artist_data_list = get_elem_from_obj(recording_data, ['artist-credit'], return_if_none=[])
for artist_data in artist_data_list:
mb_artist_id = get_elem_from_obj(artist_data, ['artist', 'id'])
release_data = get_elem_from_obj(recording_data, ['release-list', -1])
mb_release_id = get_elem_from_obj(release_data, ['id'])
return song_object

View File

@@ -1,172 +0,0 @@
import requests
from typing import List
from bs4 import BeautifulSoup
import pycountry
from src.music_kraken.database import (
Lyrics,
Song,
Artist
)
from src.music_kraken.utils.shared import *
from src.music_kraken.utils import phonetic_compares
from src.music_kraken.utils.object_handeling import get_elem_from_obj
TIMEOUT = 10
# search doesn't support isrc
# https://genius.com/api/search/multi?q=I Prevail - Breaking Down
# https://genius.com/api/songs/6192944
# https://docs.genius.com/
session = requests.Session()
session.headers = {
"Connection": "keep-alive",
"Referer": "https://genius.com/search/embed"
}
session.proxies = proxies
logger = GENIUS_LOGGER
class LyricsSong:
def __init__(self, raw_data: dict, desirered_data: dict):
self.raw_data = raw_data
self.desired_data = desirered_data
song_data = get_elem_from_obj(self.raw_data, ['result'], return_if_none={})
self.id = get_elem_from_obj(song_data, ['id'])
self.artist = get_elem_from_obj(song_data, ['primary_artist', 'name'])
self.title = get_elem_from_obj(song_data, ['title'])
lang_code = get_elem_from_obj(song_data, ['language']) or "en"
self.language = pycountry.languages.get(alpha_2=lang_code)
self.lang = self.language.alpha_3
self.url = get_elem_from_obj(song_data, ['url'])
# maybe could be implemented
self.lyricist: str
if get_elem_from_obj(song_data, ['lyrics_state']) != "complete":
logger.warning(
f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
self.valid = self.is_valid()
if self.valid:
logger.info(f"found lyrics for \"{self.__repr__()}\"")
else:
return
self.lyrics = self.fetch_lyrics()
if self.lyrics is None:
self.valid = False
def is_valid(self) -> bool:
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
artist_match, artist_distance = phonetic_compares.match_artists(self.desired_data['artist'], self.artist)
return not title_match and not artist_match
def __repr__(self) -> str:
return f"{self.title} by {self.artist} ({self.url})"
def fetch_lyrics(self) -> str | None:
if not self.valid:
logger.warning(f"{self.__repr__()} is invalid but the lyrics still get fetched. Something could be wrong.")
try:
r = session.get(self.url, timeout=TIMEOUT)
except requests.exceptions.Timeout:
logger.warning(f"{self.url} timed out after {TIMEOUT} seconds")
return None
if r.status_code != 200:
logger.warning(f"{r.url} returned {r.status_code}:\n{r.content}")
return None
soup = BeautifulSoup(r.content, "html.parser")
lyrics_soups = soup.find_all('div', {'data-lyrics-container': "true"})
if len(lyrics_soups) == 0:
logger.warning(f"didn't found lyrics on {self.url}")
return None
# if len(lyrics_soups) != 1:
# logger.warning(f"number of lyrics_soups doesn't equals 1, but {len(lyrics_soups)} on {self.url}")
lyrics = "\n".join([lyrics_soup.getText(separator="\n", strip=True) for lyrics_soup in lyrics_soups])
# <div data-lyrics-container="true" class="Lyrics__Container-sc-1ynbvzw-6 YYrds">With the soundle
self.lyrics = lyrics
return lyrics
def get_lyrics_object(self) -> Lyrics | None:
if self.lyrics is None:
return None
return Lyrics(text=self.lyrics, language=self.lang or "en")
lyrics_object = property(fget=get_lyrics_object)
def process_multiple_songs(song_datas: list, desired_data: dict) -> List[LyricsSong]:
all_songs = [LyricsSong(song_data, desired_data) for song_data in song_datas]
return all_songs
def search_song_list(artist: str, track: str) -> List[LyricsSong]:
endpoint = "https://genius.com/api/search/multi?q="
url = f"{endpoint}{artist} - {track}"
logging.info(f"requesting {url}")
desired_data = {
'artist': artist,
'track': track
}
try:
r = session.get(url, timeout=TIMEOUT)
except requests.exceptions.Timeout:
logger.warning(f"{url} timed out after {TIMEOUT} seconds")
return []
if r.status_code != 200:
logging.warning(f"{r.url} returned {r.status_code}:\n{r.content}")
return []
content = r.json()
if get_elem_from_obj(content, ['meta', 'status']) != 200:
logging.warning(f"{r.url} returned {get_elem_from_obj(content, ['meta', 'status'])}:\n{content}")
return []
sections = get_elem_from_obj(content, ['response', 'sections'])
for section in sections:
section_type = get_elem_from_obj(section, ['type'])
if section_type == "song":
return process_multiple_songs(get_elem_from_obj(section, ['hits'], return_if_none=[]), desired_data)
return []
def fetch_lyrics_from_artist(song: Song, artist: Artist) -> List[Lyrics]:
lyrics_list: List[Lyrics] = []
lyrics_song_list = search_song_list(artist.name, song.title)
for lyrics_song in lyrics_song_list:
if lyrics_song.valid:
lyrics_list.append(lyrics_song.lyrics_object)
return lyrics_list
def fetch_lyrics(song: Song) -> List[Lyrics]:
lyrics: List[Lyrics] = []
for artist in song.artists:
lyrics.extend(fetch_lyrics_from_artist(song, artist))
return lyrics
"""
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
songs = search("Zombiez", "WALL OF Z")
for song in songs:
print(song)
"""

View File

@@ -1,57 +0,0 @@
import os
from ...utils.shared import *
from ...utils import phonetic_compares
def is_valid(a1, a2, t1, t2) -> bool:
title_match, title_distance = phonetic_compares.match_titles(t1, t2)
artist_match, artist_distance = phonetic_compares.match_artists(a1, a2)
return not title_match and not artist_match
def get_metadata(file):
artist = None
title = None
audiofile = EasyID3(file)
artist = audiofile['artist']
title = audiofile['title']
return artist, title
def check_for_song(folder, artists, title):
if not os.path.exists(folder):
return False
files = [os.path.join(folder, i) for i in os.listdir(folder)]
for file in files:
artists_, title_ = get_metadata(file)
if is_valid(artists, artists_, title, title_):
return True
return False
def get_path(row):
title = row['title']
artists = row['artists']
path_ = os.path.join(MUSIC_DIR, row['path'])
print(artists, title, path_)
check_for_song(path_, artists, title)
return None
if __name__ == "__main__":
row = {'artists': ['Psychonaut 4'], 'id': '6b40186b-6678-4328-a4b8-eb7c9806a9fb', 'tracknumber': None,
'titlesort ': None, 'musicbrainz_releasetrackid': '6b40186b-6678-4328-a4b8-eb7c9806a9fb',
'musicbrainz_albumid': '0d229a02-74f6-4c77-8c20-6612295870ae', 'title': 'Sweet Decadance', 'isrc': None,
'album': 'Neurasthenia', 'copyright': 'Talheim Records', 'album_status': 'Official', 'language': 'eng',
'year': '2016', 'date': '2016-10-07', 'country': 'AT', 'barcode': None, 'albumartist': 'Psychonaut 4',
'albumsort': None, 'musicbrainz_albumtype': 'Album', 'compilation': None,
'album_artist_id': 'c0c720b5-012f-4204-a472-981403f37b12', 'path': 'dsbm/Psychonaut 4/Neurasthenia',
'file': 'dsbm/Psychonaut 4/Neurasthenia/Sweet Decadance.mp3', 'genre': 'dsbm', 'url': None, 'src': None}
print(get_path(row))

View File

@@ -1,181 +0,0 @@
import time
import requests
import bs4
from ...utils.shared import *
from ...utils import phonetic_compares
from .source import AudioSource
from ...database import song as song_objects
TRIES = 5
TIMEOUT = 10
logger = MUSIFY_LOGGER
session = requests.Session()
session.headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
"Connection": "keep-alive",
"Referer": "https://musify.club/"
}
session.proxies = proxies
class Musify(AudioSource):
@classmethod
def fetch_source(cls, song: dict) -> str | None:
super().fetch_source(song)
title = song.title
artists = song.get_artist_names()
# trying to get a download link via the autocomplete api
for artist in artists:
url = cls.fetch_source_from_autocomplete(title=title, artist=artist)
if url is not None:
logger.info(f"found download link {url}")
return url
# trying to get a download link via the html of the direct search page
for artist in artists:
url = cls.fetch_source_from_search(title=title, artist=artist)
if url is not None:
logger.info(f"found download link {url}")
return url
logger.warning(f"Didn't find the audio on {cls.__name__}")
@classmethod
def get_download_link(cls, track_url: str) -> str | None:
# https://musify.club/track/dl/18567672/rauw-alejandro-te-felicito-feat-shakira.mp3
# /track/sundenklang-wenn-mein-herz-schreit-3883217'
file_ = track_url.split("/")[-1]
if len(file_) == 0:
return None
musify_id = file_.split("-")[-1]
musify_name = "-".join(file_.split("-")[:-1])
return f"https://musify.club/track/dl/{musify_id}/{musify_name}.mp3"
@classmethod
def fetch_source_from_autocomplete(cls, title: str, artist: str) -> str | None:
url = f"https://musify.club/search/suggestions?term={artist} - {title}"
try:
logger.info(f"calling {url}")
r = session.get(url=url)
except requests.exceptions.ConnectionError:
logger.info("connection error occurred")
return None
if r.status_code == 200:
autocomplete = r.json()
for song in autocomplete:
if artist in song['label'] and "/track" in song['url']:
return cls.get_download_link(song['url'])
return None
@classmethod
def get_soup_of_search(cls, query: str, trie=0) -> bs4.BeautifulSoup | None:
url = f"https://musify.club/search?searchText={query}"
logger.debug(f"Trying to get soup from {url}")
try:
r = session.get(url, timeout=15)
except requests.exceptions.Timeout:
return None
if r.status_code != 200:
if r.status_code in [503] and trie < TRIES:
logging.warning(f"youtube blocked downloading. ({trie}-{TRIES})")
logging.warning(f"retrying in {TIMEOUT} seconds again")
time.sleep(TIMEOUT)
return cls.get_soup_of_search(query, trie=trie + 1)
logging.warning("too many tries, returning")
return None
return bs4.BeautifulSoup(r.content, features="html.parser")
@classmethod
def fetch_source_from_search(cls, title: str, artist: str) -> str | None:
query: str = f"{artist[0]} - {title}"
search_soup = cls.get_soup_of_search(query=query)
if search_soup is None:
return None
# get the soup of the container with all track results
tracklist_container_soup = search_soup.find_all("div", {"class": "playlist"})
if len(tracklist_container_soup) == 0:
return None
if len(tracklist_container_soup) != 1:
logger.warning("HTML Layout of https://musify.club changed. (or bug)")
tracklist_container_soup = tracklist_container_soup[0]
tracklist_soup = tracklist_container_soup.find_all("div", {"class": "playlist__details"})
def parse_track_soup(_track_soup):
anchor_soups = _track_soup.find_all("a")
artist_ = anchor_soups[0].text.strip()
track_ = anchor_soups[1].text.strip()
url_ = anchor_soups[1]['href']
return artist_, track_, url_
# check each track in the container, if they match
for track_soup in tracklist_soup:
artist_option, title_option, track_url = parse_track_soup(track_soup)
title_match, title_distance = phonetic_compares.match_titles(title, title_option)
artist_match, artist_distance = phonetic_compares.match_artists(artist, artist_option)
logging.debug(f"{(title, title_option, title_match, title_distance)}")
logging.debug(f"{(artist, artist_option, artist_match, artist_distance)}")
if not title_match and not artist_match:
return cls.get_download_link(track_url)
return None
@classmethod
def download_from_musify(cls, target: song_objects.Target, url):
# returns if target hasn't been set
if target.path is None or target.file is None:
logger.warning(f"target hasn't been set. Can't download. Most likely a bug.")
return False
# download the audio data
logger.info(f"downloading: '{url}'")
try:
r = session.get(url, timeout=TIMEOUT)
except requests.exceptions.ConnectionError:
return False
except requests.exceptions.ReadTimeout:
logger.warning(f"musify server didn't respond after {TIMEOUT} seconds")
return False
if r.status_code != 200:
if r.status_code == 404:
logger.warning(f"{r.url} was not found")
return False
if r.status_code == 503:
logger.warning(f"{r.url} raised an internal server error")
return False
logger.error(f"\"{url}\" returned {r.status_code}: {r.text}")
return False
# write to the file and create folder if it doesn't exist
if not os.path.exists(target.path):
os.makedirs(target.path, exist_ok=True)
with open(target.file, "wb") as mp3_file:
mp3_file.write(r.content)
logger.info("finished")
return True
@classmethod
def fetch_audio(cls, song: song_objects.Song, src: song_objects.Source):
super().fetch_audio(song, src)
return cls.download_from_musify(song.target, src.url)
if __name__ == "__main__":
pass

View File

@@ -1,23 +0,0 @@
from ...utils.shared import *
from typing import Tuple
from ...database import song as song_objects
logger = URL_DOWNLOAD_LOGGER
"""
The class "Source" is the superclass every class for specific audio
sources inherits from. This gives the advantage of a consistent
calling of the functions do search for a song and to download it.
"""
class AudioSource:
@classmethod
def fetch_source(cls, row: dict):
logger.info(f"try getting source {row.title} from {cls.__name__}")
@classmethod
def fetch_audio(cls, song: song_objects.Song, src: song_objects.Source):
logger.info(f"downloading {song}: {cls.__name__} {src.url} -> {song.target.file}")

View File

@@ -1,98 +0,0 @@
from typing import List
import youtube_dl
import time
from ...utils.shared import *
from ...utils import phonetic_compares
from .source import AudioSource
from ...database import song as song_objects
logger = YOUTUBE_LOGGER
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
YOUTUBE_URL_KEY = 'webpage_url'
YOUTUBE_TITLE_KEY = 'title'
WAIT_BETWEEN_BLOCK = 10
MAX_TRIES = 3
def youtube_length_to_mp3_length(youtube_len: float) -> int:
return int(youtube_len * 1000)
class Youtube(AudioSource):
@classmethod
def get_youtube_from_isrc(cls, isrc: str) -> List[dict]:
# https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl
with youtube_dl.YoutubeDL(YDL_OPTIONS) as ydl:
try:
videos = ydl.extract_info(f"ytsearch:{isrc}", download=False)['entries']
except youtube_dl.utils.DownloadError:
return []
return [{
'url': video[YOUTUBE_URL_KEY],
'title': video[YOUTUBE_TITLE_KEY],
'length': youtube_length_to_mp3_length(float(videos[0]['duration']))
} for video in videos]
@classmethod
def fetch_source(cls, song: song_objects.Song):
# https://stackoverflow.com/questions/63388364/searching-youtube-videos-using-youtube-dl
super().fetch_source(song)
if not song.has_isrc():
return None
real_title = song.title.lower()
final_result = None
results = cls.get_youtube_from_isrc(song.isrc)
for result in results:
video_title = result['title'].lower()
match, distance = phonetic_compares.match_titles(video_title, real_title)
if match:
continue
if not phonetic_compares.match_length(song.length, result['length']):
logger.warning(f"{song.length} doesn't match with {result}")
continue
final_result = result
if final_result is None:
return None
logger.info(f"found video {final_result}")
return final_result['url']
@classmethod
def fetch_audio(cls, song: song_objects.Song, src: song_objects.Source, trie: int=0):
super().fetch_audio(song, src)
if song.target.file is None or song.target.path is None:
logger.warning(f"target hasn't been set. Can't download. Most likely a bug.")
return False
options = {
'format': 'bestaudio/best',
'keepvideo': False,
'outtmpl': song.target.file
}
# downloading
try:
with youtube_dl.YoutubeDL(options) as ydl:
ydl.download([src.url])
except youtube_dl.utils.DownloadError:
# retry when failing
logger.warning(f"youtube blocked downloading. ({trie}-{MAX_TRIES})")
if trie >= MAX_TRIES:
logger.warning("too many tries, returning")
return False
logger.warning(f"retrying in {WAIT_BETWEEN_BLOCK} seconds again")
time.sleep(WAIT_BETWEEN_BLOCK)
return cls.fetch_audio(song, src, trie=trie + 1)

View File

@@ -1,32 +1,20 @@
from ..utils.enums import album
from . import (
song,
metadata,
source,
parents,
formatted_text,
option,
collection
from .option import Options
from .parents import DatabaseObject
from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp
from .source import Source, SourcePages, SourceTypes
from .song import (
Song,
Album,
Artist,
Target,
Lyrics,
Label
)
DatabaseObject = parents.DatabaseObject
from .formatted_text import FormattedText
from .collection import Collection
Metadata = metadata.Metadata
ID3Mapping = metadata.Mapping
ID3Timestamp = metadata.ID3Timestamp
Source = source.Source
Song = song.Song
Artist = song.Artist
Source = source.Source
Target = song.Target
Lyrics = song.Lyrics
Label = song.Label
Album = song.Album
FormattedText = formatted_text.FormattedText
Options = option.Options
Collection = collection.Collection
from .country import Country

View File

@@ -96,7 +96,7 @@ class Collection:
return AppendResult(True, existing_object, False)
# if the object does already exist
# thus merging and don't add it afterwards
# thus merging and don't add it afterward
if merge_into_existing:
existing_object.merge(element)
# in case any relevant data has been added (e.g. it remaps the old object)
@@ -158,3 +158,6 @@ class Collection:
@property
def empty(self) -> bool:
return len(self._data) == 0
def clear(self):
self.__init__(element_type=self.element_type)

File diff suppressed because one or more lines are too long

View File

@@ -17,7 +17,7 @@ class Lyrics(DatabaseObject):
def __init__(
self,
text: FormattedText,
language: pycountry.Languages,
language: pycountry.Languages = pycountry.languages.get(alpha_2="en"),
_id: str = None,
dynamic: bool = False,
source_list: List[Source] = None,

View File

@@ -16,7 +16,11 @@ class Mapping(Enum):
TITLE = "TIT2"
ISRC = "TSRC"
LENGTH = "TLEN" # in milliseconds
DATE = "TYER"
# The 'Date' frame is a numeric string in the DDMM format containing the date for the recording. This field is always four characters long.
DATE = "TDAT"
# The 'Time' frame is a numeric string in the HHMM format containing the time for the recording. This field is always four characters long.
TIME = "TIME"
YEAR = "TYER"
TRACKNUMBER = "TRCK"
TOTALTRACKS = "TRCK" # Stored in the same frame with TRACKNUMBER, separated by '/': e.g. '4/9'.
TITLESORTORDER = "TSOT"
@@ -205,6 +209,19 @@ class ID3Timestamp:
time_format = self.get_time_format()
return time_format, self.date_obj.strftime(time_format)
@classmethod
def fromtimestamp(cls, utc_timestamp: int):
date_obj = datetime.datetime.fromtimestamp(utc_timestamp)
return cls(
year=date_obj.year,
month=date_obj.month,
day=date_obj.day,
hour=date_obj.hour,
minute=date_obj.minute,
second=date_obj.second
)
@classmethod
def strptime(cls, time_stamp: str, format: str):
"""
@@ -285,7 +302,7 @@ class Metadata:
if id3_dict is not None:
self.add_metadata_dict(id3_dict)
def __setitem__(self, frame, value_list: list, override_existing: bool = True):
def __setitem__(self, frame: Mapping, value_list: list, override_existing: bool = True):
if type(value_list) != list:
raise ValueError(f"can only set attribute to list, not {type(value_list)}")

View File

@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, List
from typing import TYPE_CHECKING, List, Iterable
if TYPE_CHECKING:
from .parents import DatabaseObject
@@ -14,6 +14,13 @@ class Options:
def __iter__(self):
for database_object in self._data:
yield database_object
def append(self, element: 'DatabaseObject'):
self._data.append(element)
def extend(self, iterable: Iterable['DatabaseObject']):
for element in iterable:
self.append(element)
def get_next_options(self, index: int) -> 'Options':
if index >= len(self._data):

View File

@@ -11,6 +11,11 @@ class DatabaseObject:
COLLECTION_ATTRIBUTES: tuple = tuple()
SIMPLE_ATTRIBUTES: dict = dict()
# contains all collection attributes, which describe something "smaller"
# e.g. album has songs, but not artist.
DOWNWARDS_COLLECTION_ATTRIBUTES: tuple = tuple()
UPWARDS_COLLECTION_ATTRIBUTES: tuple = tuple()
def __init__(self, _id: int = None, dynamic: bool = False, **kwargs) -> None:
self.automatic_id: bool = False
@@ -31,6 +36,11 @@ class DatabaseObject:
self.build_version = -1
def __hash__(self):
if self.dynamic:
raise TypeError("Dynamic DatabaseObjects are unhashable.")
return self.id
def __eq__(self, other) -> bool:
if not isinstance(other, type(self)):
return False
@@ -65,6 +75,9 @@ class DatabaseObject:
return list()
def merge(self, other, override: bool = False):
if other is None:
return
if self is other:
return
@@ -82,13 +95,17 @@ class DatabaseObject:
if override or getattr(self, simple_attribute) == default_value:
setattr(self, simple_attribute, getattr(other, simple_attribute))
def strip_details(self):
for collection in type(self).DOWNWARDS_COLLECTION_ATTRIBUTES:
getattr(self, collection).clear()
@property
def metadata(self) -> Metadata:
return Metadata()
@property
def options(self) -> Options:
return Options([self])
def options(self) -> List["DatabaseObject"]:
return [self]
@property
def option_string(self) -> str:

View File

@@ -14,7 +14,7 @@ from .metadata import (
Metadata
)
from .option import Options
from .parents import MainObject
from .parents import MainObject, DatabaseObject
from .source import Source, SourceCollection
from .target import Target
from ..utils.string_processing import unify
@@ -46,6 +46,8 @@ class Song(MainObject):
"genre": None,
"notes": FormattedText()
}
UPWARDS_COLLECTION_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection")
def __init__(
self,
@@ -160,7 +162,7 @@ class Song(MainObject):
f"feat. Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.feature_artist_collection)})"
@property
def options(self) -> Options:
def options(self) -> List[DatabaseObject]:
"""
Return a list of related objects including the song object, album object, main artist objects, and
feature artist objects.
@@ -171,7 +173,7 @@ class Song(MainObject):
options.extend(self.feature_artist_collection)
options.extend(self.album_collection)
options.append(self)
return Options(options)
return options
@property
def tracksort_str(self) -> str:
@@ -204,6 +206,9 @@ class Album(MainObject):
"notes": FormattedText()
}
DOWNWARDS_COLLECTION_ATTRIBUTES = ("song_collection", )
UPWARDS_COLLECTION_ATTRIBUTES = ("artist_collection", "label_collection")
def __init__(
self,
_id: int = None,
@@ -290,7 +295,11 @@ class Album(MainObject):
id3Mapping.COPYRIGHT: [self.copyright],
id3Mapping.LANGUAGE: [self.iso_639_2_lang],
id3Mapping.ALBUM_ARTIST: [a.name for a in self.artist_collection],
id3Mapping.DATE: [self.date.timestamp],
id3Mapping.DATE: [self.date.strftime("%d%m")] if self.date.has_year and self.date.has_month else [],
id3Mapping.TIME: [self.date.strftime(("%H%M"))] if self.date.has_hour and self.date.has_minute else [],
id3Mapping.YEAR: [str(self.date.year).zfill(4)] if self.date.has_year else [],
id3Mapping.RELEASE_DATE: [self.date.timestamp],
id3Mapping.ORIGINAL_RELEASE_DATE: [self.date.timestamp],
id3Mapping.ALBUMSORTORDER: [str(self.albumsort)] if self.albumsort is not None else []
})
@@ -304,12 +313,12 @@ class Album(MainObject):
f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})"
@property
def options(self) -> Options:
def options(self) -> List[DatabaseObject]:
options = self.artist_collection.shallow_list
options.append(self)
options.extend(self.song_collection)
return Options(options)
return options
def update_tracksort(self):
"""
@@ -398,6 +407,10 @@ class Album(MainObject):
:return:
"""
return len(self.artist_collection) > 1
@property
def album_type_string(self) -> str:
return self.album_type.value
"""
@@ -422,6 +435,9 @@ class Artist(MainObject):
"general_genre": ""
}
DOWNWARDS_COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection")
UPWARDS_COLLECTION_ATTRIBUTES = ("label_collection", )
def __init__(
self,
_id: int = None,
@@ -592,11 +608,11 @@ class Artist(MainObject):
f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})"
@property
def options(self) -> Options:
def options(self) -> List[DatabaseObject]:
options = [self]
options.extend(self.main_album_collection)
options.extend(self.feature_song_collection)
return Options(options)
return options
@property
def country_string(self):
@@ -646,6 +662,8 @@ class Label(MainObject):
"notes": FormattedText()
}
DOWNWARDS_COLLECTION_ATTRIBUTES = COLLECTION_ATTRIBUTES
def __init__(
self,
_id: int = None,
@@ -694,7 +712,9 @@ class Label(MainObject):
]
@property
def options(self) -> Options:
def options(self) -> List[DatabaseObject]:
options = [self]
options.extend(self.current_artist_collection.shallow_list)
options.extend(self.album_collection.shallow_list)
return options

View File

@@ -1,9 +1,10 @@
from collections import defaultdict
from enum import Enum
from typing import List, Dict, Tuple, Optional
from typing import List, Dict, Set, Tuple, Optional
from urllib.parse import urlparse
from ..utils.enums.source import SourcePages, SourceTypes
from ..utils.shared import ALL_YOUTUBE_URLS
from .metadata import Mapping, Metadata
from .parents import DatabaseObject
from .collection import Collection
@@ -53,7 +54,7 @@ class Source(DatabaseObject):
if "musify" in parsed.netloc:
return cls(SourcePages.MUSIFY, url, referer_page=referer_page)
if url.startswith("https://www.youtube"):
if parsed.netloc in [_url.netloc for _url in ALL_YOUTUBE_URLS]:
return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)
if url.startswith("https://www.deezer"):
@@ -128,6 +129,10 @@ class SourceCollection(Collection):
super().map_element(source)
self._page_to_source_list[source.page_enum].append(source)
@property
def source_pages(self) -> Set[SourcePages]:
return set(source.page_enum for source in self._data)
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
"""

View File

@@ -1,5 +1,5 @@
from pathlib import Path
from typing import List, Tuple
from typing import List, Tuple, TextIO
import requests
from tqdm import tqdm
@@ -98,3 +98,9 @@ class Target(DatabaseObject):
except requests.exceptions.Timeout:
shared.DOWNLOAD_LOGGER.error("Stream timed out.")
return False
def open(self, file_mode: str, **kwargs) -> TextIO:
return self.file_path.open(file_mode, **kwargs)
def delete(self):
self.file_path.unlink(missing_ok=True)

View File

@@ -1,10 +1,5 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum
from .musify import Musify
from .youtube import YouTube
EncyclopaediaMetallum = EncyclopaediaMetallum
Musify = Musify
from . import download_center
Search = download_center.Search
from .abstract import Page, INDEPENDENT_DB_OBJECTS

View File

@@ -1,13 +1,13 @@
import logging
import random
from copy import copy
from typing import Optional, Union, Type, Dict, Set
from typing import Optional, Union, Type, Dict, Set, List, Tuple
from string import Formatter
import requests
from bs4 import BeautifulSoup
from .support_classes.default_target import DefaultTarget
from .support_classes.download_result import DownloadResult
from ..connection import Connection
from ..objects import (
Song,
Source,
@@ -23,6 +23,143 @@ from ..utils.enums.source import SourcePages
from ..utils.enums.album import AlbumType
from ..audio import write_metadata_to_target, correct_codec
from ..utils import shared
from ..utils.shared import DOWNLOAD_PATH, DOWNLOAD_FILE, AUDIO_FORMAT
from ..utils.support_classes import Query, DownloadResult
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
class NamingDict(dict):
CUSTOM_KEYS: Dict[str, str] = {
"label": "label.name",
"artist": "artist.name",
"song": "song.title",
"isrc": "song.isrc",
"album": "album.title",
"album_type": "album.album_type_string"
}
def __init__(self, values: dict, object_mappings: Dict[str, DatabaseObject] = None):
self.object_mappings: Dict[str, DatabaseObject] = object_mappings or dict()
super().__init__(values)
self["audio_format"] = AUDIO_FORMAT
def add_object(self, music_object: DatabaseObject):
self.object_mappings[type(music_object).__name__.lower()] = music_object
def copy(self) -> dict:
return type(self)(super().copy(), self.object_mappings.copy())
def __getitem__(self, key: str) -> str:
return super().__getitem__(key)
def default_value_for_name(self, name: str) -> str:
return f'Various {name.replace("_", " ").title()}'
def __missing__(self, key: str) -> str:
"""
TODO
add proper logging
"""
if "." not in key:
if key not in self.CUSTOM_KEYS:
return self.default_value_for_name(key)
key = self.CUSTOM_KEYS[key]
frag_list = key.split(".")
object_name = frag_list[0].strip().lower()
attribute_name = frag_list[-1].strip().lower()
if object_name not in self.object_mappings:
return self.default_value_for_name(attribute_name)
music_object = self.object_mappings[object_name]
try:
value = getattr(music_object, attribute_name)
if value is None:
return self.default_value_for_name(attribute_name)
return str(value)
except AttributeError:
return self.default_value_for_name(attribute_name)
def _clean_music_object(music_object: INDEPENDENT_DB_OBJECTS, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
if type(music_object) == Label:
return _clean_label(label=music_object, collections=collections)
if type(music_object) == Artist:
return _clean_artist(artist=music_object, collections=collections)
if type(music_object) == Album:
return _clean_album(album=music_object, collections=collections)
if type(music_object) == Song:
return _clean_song(song=music_object, collections=collections)
def _clean_collection(collection: Collection, collection_dict: Dict[INDEPENDENT_DB_TYPES, Collection]):
if collection.element_type not in collection_dict:
return
for i, element in enumerate(collection):
r = collection_dict[collection.element_type].append(element, merge_into_existing=True)
collection[i] = r.current_element
if not r.was_the_same:
_clean_music_object(r.current_element, collection_dict)
def _clean_label(label: Label, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(label.current_artist_collection, collections)
_clean_collection(label.album_collection, collections)
def _clean_artist(artist: Artist, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(artist.main_album_collection, collections)
_clean_collection(artist.feature_song_collection, collections)
_clean_collection(artist.label_collection, collections)
def _clean_album(album: Album, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(album.label_collection, collections)
_clean_collection(album.song_collection, collections)
_clean_collection(album.artist_collection, collections)
def _clean_song(song: Song, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(song.album_collection, collections)
_clean_collection(song.feature_artist_collection, collections)
_clean_collection(song.main_artist_collection, collections)
def clean_object(dirty_object: DatabaseObject) -> DatabaseObject:
if isinstance(dirty_object, INDEPENDENT_DB_OBJECTS):
collections = {
Label: Collection(element_type=Label),
Artist: Collection(element_type=Artist),
Album: Collection(element_type=Album),
Song: Collection(element_type=Song)
}
_clean_music_object(dirty_object, collections)
return dirty_object
def build_new_object(new_object: DatabaseObject) -> DatabaseObject:
new_object = clean_object(new_object)
new_object.compile(merge_into=False)
return new_object
def merge_together(old_object: DatabaseObject, new_object: DatabaseObject) -> DatabaseObject:
new_object = clean_object(new_object)
old_object.merge(new_object)
old_object.compile(merge_into=False)
return old_object
class Page:
@@ -30,151 +167,90 @@ class Page:
This is an abstract class, laying out the
functionality for every other class fetching something
"""
API_SESSION: requests.Session = requests.Session()
API_SESSION.proxies = shared.proxies
TIMEOUT = 5
POST_TIMEOUT = TIMEOUT
TRIES = 5
LOGGER = logging.getLogger("this shouldn't be used")
SOURCE_TYPE: SourcePages
LOGGER = logging.getLogger("this shouldn't be used")
# set this to true, if all song details can also be fetched by fetching album details
NO_ADDITIONAL_DATA_FROM_SONG = False
def __init__(self):
super().__init__()
"""
CODE I NEED WHEN I START WITH MULTITHREADING
def __init__(self, end_event: EndThread, search_queue: Queue, search_result_queue: Queue):
self.end_event = end_event
self.search_queue = search_queue
self.search_result_queue = search_result_queue
super().__init__()
@property
def _empty_working_queues(self):
return self.search_queue.empty()
@classmethod
def get_request(
cls,
url: str,
stream: bool = False,
accepted_response_codes: set = {200},
trie: int = 0
) -> Optional[requests.Response]:
retry = False
try:
r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT, stream=stream)
except requests.exceptions.Timeout:
cls.LOGGER.warning(f"request timed out at \"{url}\": ({trie}-{cls.TRIES})")
retry = True
except requests.exceptions.ConnectionError:
cls.LOGGER.warning(f"couldn't connect to \"{url}\": ({trie}-{cls.TRIES})")
retry = True
if not retry and r.status_code in accepted_response_codes:
return r
if not retry:
cls.LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})")
cls.LOGGER.debug(r.content)
if trie >= cls.TRIES:
cls.LOGGER.warning("to many tries. Aborting.")
return None
return cls.get_request(url=url, stream=stream, accepted_response_codes=accepted_response_codes, trie=trie + 1)
@classmethod
def post_request(cls, url: str, json: dict, accepted_response_codes: set = {200}, trie: int = 0) -> Optional[
requests.Response]:
retry = False
try:
r = cls.API_SESSION.post(url, json=json, timeout=cls.POST_TIMEOUT)
except requests.exceptions.Timeout:
cls.LOGGER.warning(f"request timed out at \"{url}\": ({trie}-{cls.TRIES})")
retry = True
except requests.exceptions.ConnectionError:
cls.LOGGER.warning(f"couldn't connect to \"{url}\": ({trie}-{cls.TRIES})")
retry = True
if not retry and r.status_code in accepted_response_codes:
return r
if not retry:
cls.LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at POST:{url}. ({trie}-{cls.TRIES})")
cls.LOGGER.debug(r.content)
if trie >= cls.TRIES:
cls.LOGGER.warning("to many tries. Aborting.")
return None
cls.LOGGER.warning(f"payload: {json}")
return cls.post_request(url=url, json=json, accepted_response_codes=accepted_response_codes, trie=trie + 1)
@classmethod
def get_soup_from_response(cls, r: requests.Response) -> BeautifulSoup:
def run(self) -> None:
while bool(self.end_event) and self._empty_working_queues:
if not self.search_queue.empty():
self.search(self.search_queue.get())
self.search_result_queue.put(FinishedSearch())
continue
"""
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
return None
def get_soup_from_response(self, r: requests.Response) -> BeautifulSoup:
return BeautifulSoup(r.content, "html.parser")
class Query:
def __init__(self, query: str):
self.query = query
self.is_raw = False
# to search stuff
def search(self, query: Query) -> List[DatabaseObject]:
music_object = query.music_object
search_functions = {
Song: self.song_search,
Album: self.album_search,
Artist: self.artist_search,
Label: self.label_search
}
if type(music_object) in search_functions:
r = search_functions[type(music_object)](music_object)
if len(r) > 0:
return r
r = []
for default_query in query.default_search:
for single_option in self.general_search(default_query):
r.append(single_option)
return r
def general_search(self, search_query: str) -> List[DatabaseObject]:
return []
def label_search(self, label: Label) -> List[Label]:
return []
def artist_search(self, artist: Artist) -> List[Artist]:
return []
def album_search(self, album: Album) -> List[Album]:
return []
def song_search(self, song: Song) -> List[Song]:
return []
self.artist = None
self.album = None
self.song = None
self.parse_query(query=query)
def __str__(self):
if self.is_raw:
return self.query
return f"{self.artist}; {self.album}; {self.song}"
def parse_query(self, query: str):
if not '#' in query:
self.is_raw = True
return
query = query.strip()
parameters = query.split('#')
parameters.remove('')
for parameter in parameters:
splitted = parameter.split(" ")
type_ = splitted[0]
input_ = " ".join(splitted[1:]).strip()
if type_ == "a":
self.artist = input_
continue
if type_ == "r":
self.album = input_
continue
if type_ == "t":
self.song = input_
continue
def get_str(self, string):
if string is None:
return ""
return string
artist_str = property(fget=lambda self: self.get_str(self.artist))
album_str = property(fget=lambda self: self.get_str(self.album))
song_str = property(fget=lambda self: self.get_str(self.song))
@classmethod
def search_by_query(cls, query: str) -> Options:
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
"""
# The Query
You can define a new parameter with "#",
the letter behind it defines the *type* of parameter, followed by a space
"#a Psychonaut 4 #r Tired, Numb and #t Drop by Drop"
if no # is in the query it gets treated as "unspecified query"
# Functionality
Returns the best matches from this page for the query, passed in.
:param query:
:return possible_music_objects:
"""
return Options()
@classmethod
def fetch_details(cls, music_object: Union[Song, Album, Artist, Label], stop_at_level: int = 1) -> DatabaseObject:
"""
when a music object with laccing data is passed in, it returns
when a music object with lacking data is passed in, it returns
the SAME object **(no copy)** with more detailed data.
If you for example put in an album, it fetches the tracklist
If you for example put in, an album, it fetches the tracklist
:param music_object:
:param stop_at_level:
@@ -187,362 +263,177 @@ class Page:
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
"""
# creating a new object, of the same type
new_music_object: DatabaseObject = type(music_object)()
had_sources = False
# only certain database objects, have a source list
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
source: Source
for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE):
new_music_object.merge(
self.fetch_object_from_source(
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
post_process=False
)
)
source: Source
for source in music_object.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
new_music_object.merge(
cls._fetch_object_from_source(source=source, obj_type=type(music_object), stop_at_level=stop_at_level))
had_sources = True
return merge_together(music_object, new_music_object)
if not had_sources:
music_object.compile(merge_into=True)
return music_object
collections = {
Label: Collection(element_type=Label),
Artist: Collection(element_type=Artist),
Album: Collection(element_type=Album),
Song: Collection(element_type=Song)
}
cls._clean_music_object(new_music_object, collections)
music_object.merge(new_music_object)
music_object.compile(merge_into=True)
return music_object
@classmethod
def fetch_object_from_source(cls, source: Source, stop_at_level: int = 2):
obj_type = cls._get_type_of_url(source.url)
def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]:
obj_type = self.get_source_type(source)
if obj_type is None:
return None
music_object = cls._fetch_object_from_source(source=source, obj_type=obj_type, stop_at_level=stop_at_level)
collections = {
Label: Collection(element_type=Label),
Artist: Collection(element_type=Artist),
Album: Collection(element_type=Album),
Song: Collection(element_type=Song)
if enforce_type != obj_type and enforce_type is not None:
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
return None
music_object: DatabaseObject = None
fetch_map = {
Song: self.fetch_song,
Album: self.fetch_album,
Artist: self.fetch_artist,
Label: self.fetch_label
}
if obj_type in fetch_map:
music_object = fetch_map[obj_type](source, stop_at_level)
else:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None
cls._clean_music_object(music_object, collections)
if post_process and music_object:
return build_new_object(music_object)
music_object.compile(merge_into=True)
return music_object
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()
@classmethod
def _fetch_object_from_source(cls, source: Source,
obj_type: Union[Type[Song], Type[Album], Type[Artist], Type[Label]],
stop_at_level: int = 1) -> Union[Song, Album, Artist, Label]:
if obj_type == Artist:
return cls._fetch_artist_from_source(source=source, stop_at_level=stop_at_level)
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
return Album()
if obj_type == Song:
return cls._fetch_song_from_source(source=source, stop_at_level=stop_at_level)
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
return Artist()
if obj_type == Album:
return cls._fetch_album_from_source(source=source, stop_at_level=stop_at_level)
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()
if obj_type == Label:
return cls._fetch_label_from_source(source=source, stop_at_level=stop_at_level)
@classmethod
def _clean_music_object(cls, music_object: Union[Label, Album, Artist, Song],
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
if type(music_object) == Label:
return cls._clean_label(label=music_object, collections=collections)
if type(music_object) == Artist:
return cls._clean_artist(artist=music_object, collections=collections)
if type(music_object) == Album:
return cls._clean_album(album=music_object, collections=collections)
if type(music_object) == Song:
return cls._clean_song(song=music_object, collections=collections)
@classmethod
def _clean_collection(cls, collection: Collection,
collection_dict: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
if collection.element_type not in collection_dict:
return
for i, element in enumerate(collection):
r = collection_dict[collection.element_type].append(element, merge_into_existing=True)
collection[i] = r.current_element
if not r.was_the_same:
cls._clean_music_object(r.current_element, collection_dict)
@classmethod
def _clean_label(cls, label: Label,
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
cls._clean_collection(label.current_artist_collection, collections)
cls._clean_collection(label.album_collection, collections)
@classmethod
def _clean_artist(cls, artist: Artist,
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
cls._clean_collection(artist.main_album_collection, collections)
cls._clean_collection(artist.feature_song_collection, collections)
cls._clean_collection(artist.label_collection, collections)
@classmethod
def _clean_album(cls, album: Album,
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
cls._clean_collection(album.label_collection, collections)
cls._clean_collection(album.song_collection, collections)
cls._clean_collection(album.artist_collection, collections)
@classmethod
def _clean_song(cls, song: Song,
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
cls._clean_collection(song.album_collection, collections)
cls._clean_collection(song.feature_artist_collection, collections)
cls._clean_collection(song.main_artist_collection, collections)
@classmethod
def download(
cls,
music_object: Union[Song, Album, Artist, Label],
download_features: bool = True,
default_target: DefaultTarget = None,
genre: str = None,
override_existing: bool = False,
create_target_on_demand: bool = True,
download_all: bool = False,
exclude_album_type: Set[AlbumType] = shared.ALBUM_TYPE_BLACKLIST
) -> DownloadResult:
"""
:param genre: The downloader will download to THIS folder (set the value of default_target.genre to genre)
:param music_object:
:param download_features:
:param default_target:
:param override_existing:
:param create_target_on_demand:
:param download_all:
:param exclude_album_type:
:return total downloads, failed_downloads:
"""
if default_target is None:
default_target = DefaultTarget()
if download_all:
exclude_album_type: Set[AlbumType] = set()
elif exclude_album_type is None:
exclude_album_type = {
AlbumType.COMPILATION_ALBUM,
AlbumType.LIVE_ALBUM,
AlbumType.MIXTAPE
}
if type(music_object) is Song:
return cls.download_song(
music_object,
override_existing=override_existing,
create_target_on_demand=create_target_on_demand,
genre=genre
)
if type(music_object) is Album:
return cls.download_album(
music_object,
default_target=default_target,
override_existing=override_existing,
genre=genre
)
if type(music_object) is Artist:
return cls.download_artist(
music_object,
default_target=default_target,
download_features=download_features,
exclude_album_type=exclude_album_type,
genre=genre
)
if type(music_object) is Label:
return cls.download_label(
music_object,
download_features=download_features,
default_target=default_target,
exclude_album_type=exclude_album_type,
genre=genre
)
return DownloadResult(error_message=f"{type(music_object)} can't be downloaded.")
@classmethod
def download_label(
cls,
label: Label,
exclude_album_type: Set[AlbumType],
download_features: bool = True,
override_existing: bool = False,
default_target: DefaultTarget = None,
genre: str = None
) -> DownloadResult:
default_target = DefaultTarget() if default_target is None else copy(default_target)
default_target.label_object(label)
r = DownloadResult()
cls.fetch_details(label)
for artist in label.current_artist_collection:
r.merge(cls.download_artist(
artist,
download_features=download_features,
override_existing=override_existing,
default_target=default_target,
exclude_album_type=exclude_album_type,
genre=genre
))
album: Album
for album in label.album_collection:
if album.album_type == AlbumType.OTHER:
cls.fetch_details(album)
if album.album_type in exclude_album_type:
cls.LOGGER.info(f"Skipping {album.option_string} due to the filter. ({album.album_type})")
continue
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
naming_dict: NamingDict = NamingDict({"genre": genre})
def fill_naming_objects(naming_music_object: DatabaseObject):
nonlocal naming_dict
r.merge(cls.download_album(
album,
override_existing=override_existing,
default_target=default_target,
genre=genre
))
for collection_name in naming_music_object.UPWARDS_COLLECTION_ATTRIBUTES:
collection: Collection = getattr(naming_music_object, collection_name)
if collection.empty:
continue
dom_ordered_music_object: DatabaseObject = collection[0]
naming_dict.add_object(dom_ordered_music_object)
return fill_naming_objects(dom_ordered_music_object)
fill_naming_objects(music_object)
return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway)
return r
@classmethod
def download_artist(
cls,
artist: Artist,
exclude_album_type: Set[AlbumType],
download_features: bool = True,
override_existing: bool = False,
default_target: DefaultTarget = None,
genre: str = None
) -> DownloadResult:
default_target = DefaultTarget() if default_target is None else copy(default_target)
default_target.artist_object(artist)
r = DownloadResult()
cls.fetch_details(artist)
album: Album
for album in artist.main_album_collection:
if album.album_type in exclude_album_type:
cls.LOGGER.info(f"Skipping {album.option_string} due to the filter. ({album.album_type})")
continue
def _download(self, music_object: DatabaseObject, naming_dict: NamingDict, download_all: bool = False, skip_details: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
skip_next_details = skip_details
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
if isinstance(music_object, Album):
if self.NO_ADDITIONAL_DATA_FROM_SONG:
skip_next_details = True
r.merge(cls.download_album(
album,
override_existing=override_existing,
default_target=default_target,
genre=genre
))
if not download_all and music_object.album_type in shared.ALBUM_TYPE_BLACKLIST:
return DownloadResult()
if download_features:
for song in artist.feature_album.song_collection:
r.merge(cls.download_song(
song,
override_existing=override_existing,
default_target=default_target,
genre=genre
))
if not isinstance(music_object, Song) or not self.NO_ADDITIONAL_DATA_FROM_SONG:
self.fetch_details(music_object=music_object, stop_at_level=2)
naming_dict.add_object(music_object)
return r
if isinstance(music_object, Song):
return self._download_song(music_object, naming_dict, process_metadata_anyway=process_metadata_anyway)
@classmethod
def download_album(
cls,
album: Album,
override_existing: bool = False,
default_target: DefaultTarget = None,
genre: str = None
) -> DownloadResult:
download_result: DownloadResult = DownloadResult()
default_target = DefaultTarget() if default_target is None else copy(default_target)
default_target.album_object(album)
for collection_name in music_object.DOWNWARDS_COLLECTION_ATTRIBUTES:
collection: Collection = getattr(music_object, collection_name)
r = DownloadResult()
sub_ordered_music_object: DatabaseObject
for sub_ordered_music_object in collection:
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy(), download_all, skip_details=skip_next_details, process_metadata_anyway=process_metadata_anyway))
cls.fetch_details(album)
return download_result
album.update_tracksort()
def _download_song(self, song: Song, naming_dict: NamingDict, process_metadata_anyway: bool = False):
if "genre" not in naming_dict and song.genre is not None:
naming_dict["genre"] = song.genre
cls.LOGGER.info(f"downloading album: {album.title}")
for song in album.song_collection:
r.merge(cls.download_song(
song,
override_existing=override_existing,
default_target=default_target,
genre=genre
))
if song.genre is None:
song.genre = naming_dict["genre"]
return r
path_parts = Formatter().parse(DOWNLOAD_PATH)
file_parts = Formatter().parse(DOWNLOAD_FILE)
new_target = Target(
relative_to_music_dir=True,
path=DOWNLOAD_PATH.format(**{part[1]: naming_dict[part[1]] for part in path_parts}),
file=DOWNLOAD_FILE.format(**{part[1]: naming_dict[part[1]] for part in file_parts})
)
@classmethod
def download_song(
cls,
song: Song,
override_existing: bool = False,
create_target_on_demand: bool = True,
default_target: DefaultTarget = None,
genre: str = None
) -> DownloadResult:
cls.LOGGER.debug(f"Setting genre of {song.option_string} to {genre}")
song.genre = genre
default_target = DefaultTarget() if default_target is None else copy(default_target)
default_target.song_object(song)
cls.fetch_details(song)
if song.target_collection.empty:
if create_target_on_demand and not song.main_artist_collection.empty and not song.album_collection.empty:
song.target_collection.append(default_target.target)
else:
return DownloadResult(error_message=f"No target exists for {song.title}, but create_target_on_demand is False.")
song.target_collection.append(new_target)
target: Target
if any(target.exists for target in song.target_collection) and not override_existing:
r = DownloadResult(total=1, fail=0)
existing_target: Target
for existing_target in song.target_collection:
if existing_target.exists:
r.merge(cls._post_process_targets(song=song, temp_target=existing_target))
break
return r
sources = song.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE)
if len(sources) == 0:
return DownloadResult(error_message=f"No source found for {song.title} as {cls.__name__}.")
return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.")
temp_target: Target = Target(
path=shared.TEMP_DIR,
file=str(random.randint(0, 999999))
)
r = DownloadResult(1)
r = cls._download_song_to_targets(source=sources[0], target=temp_target, desc=song.title)
found_on_disc = False
target: Target
for target in song.target_collection:
if target.exists:
if process_metadata_anyway:
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
if found_on_disc and not process_metadata_anyway:
self.LOGGER.info(f"{song.option_string} already exists, thus not downloading again.")
return r
source = sources[0]
if not found_on_disc:
r = self.download_song_to_target(source=source, target=temp_target, desc=song.title)
if not r.is_fatal_error:
r.merge(cls._post_process_targets(song, temp_target))
r.merge(self._post_process_targets(song, temp_target, [] if found_on_disc else self.get_skip_intervals(song, source)))
return r
@classmethod
def _post_process_targets(cls, song: Song, temp_target: Target) -> DownloadResult:
correct_codec(temp_target)
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List) -> DownloadResult:
correct_codec(temp_target, interval_list=interval_list)
self.post_process_hook(song, temp_target)
write_metadata_to_target(song.metadata, temp_target)
r = DownloadResult()
@@ -552,29 +443,17 @@ class Page:
if temp_target is not target:
temp_target.copy_content(target)
r.add_target(target)
temp_target.delete()
r.sponsor_segments += len(interval_list)
return r
@classmethod
def _fetch_song_from_source(cls, source: Source, stop_at_level: int = 1) -> Song:
return Song()
@classmethod
def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album:
return Album()
@classmethod
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
return Artist()
@classmethod
def _fetch_label_from_source(cls, source: Source, stop_at_level: int = 1) -> Label:
return Label()
@classmethod
def _get_type_of_url(cls, url: str) -> Optional[Union[Type[Song], Type[Album], Type[Artist], Type[Label]]]:
return None
@classmethod
def _download_song_to_targets(cls, source: Source, target: Target, desc: str = None) -> DownloadResult:
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
return []
def post_process_hook(self, song: Song, temp_target: Target, **kwargs):
pass
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
return DownloadResult()

View File

@@ -1,5 +0,0 @@
from . import search
from . import download
Download = download.Download
Search = search.Search

View File

@@ -1,45 +0,0 @@
from typing import Optional, Tuple, Type, Set, Union, List
from . import page_attributes
from ..abstract import Page
from ...objects import Song, Album, Artist, Label, Source
MusicObject = Union[Song, Album, Artist, Label]
class Download:
def __init__(
self,
pages: Tuple[Type[Page]] = page_attributes.ALL_PAGES,
exclude_pages: Set[Type[Page]] = set(),
exclude_shady: bool = False,
) -> None:
_page_list: List[Type[Page]] = []
_audio_page_list: List[Type[Page]] = []
for page in pages:
if exclude_shady and page in page_attributes.SHADY_PAGES:
continue
if page in exclude_pages:
continue
_page_list.append(page)
if page in page_attributes.AUDIO_PAGES:
_audio_page_list.append(page)
self.pages: Tuple[Type[Page]] = tuple(_page_list)
self.audio_pages: Tuple[Type[Page]] = tuple(_audio_page_list)
def fetch_details(self, music_object: MusicObject) -> MusicObject:
for page in self.pages:
page.fetch_details(music_object=music_object)
return music_object
def fetch_source(self, source: Source) -> Optional[MusicObject]:
source_page = page_attributes.SOURCE_PAGE_MAP[source.page_enum]
if source_page not in self.pages:
return
return source_page.fetch_object_from_source(source)

View File

@@ -1,100 +0,0 @@
from collections import defaultdict
from typing import Tuple, List, Dict, Type
from . import page_attributes
from ..abstract import Page
from ...objects import Options, DatabaseObject, Source
class MultiPageOptions:
def __init__(
self,
max_displayed_options: int = 10,
option_digits: int = 3,
derived_from: DatabaseObject = None
) -> None:
self.max_displayed_options = max_displayed_options
self.option_digits: int = option_digits
self._length = 0
self._current_option_dict: Dict[Type[Page], Options] = defaultdict(lambda: Options())
self._derive_from = derived_from
def __getitem__(self, key: Type[Page]):
return self._current_option_dict[key]
def __setitem__(self, key: Type[Page], value: Options):
self._current_option_dict[key] = value
self._length = 0
for key in self._current_option_dict:
self._length += 1
def __len__(self) -> int:
return self._length
def get_page_str(self, page: Type[Page]) -> str:
page_name_fill = "-"
max_page_len = 21
return f"({page_attributes.PAGE_NAME_MAP[page]}) ------------------------{page.__name__:{page_name_fill}<{max_page_len}}------------"
def string_from_all_pages(self) -> str:
if self._length == 1:
for key in self._current_option_dict:
return self.string_from_single_page(key)
lines: List[str] = []
j = 0
for page, options in self._current_option_dict.items():
lines.append(self.get_page_str(page))
i = -1
option_obj: DatabaseObject
for i, option_obj in enumerate(options):
if i >= self.max_displayed_options:
lines.append("...")
break
lines.append(f"{j + i:0{self.option_digits}} {option_obj.option_string}")
j += i + 1
return "\n".join(lines)
def choose_from_all_pages(self, index: int) -> Tuple[DatabaseObject, Type[Page]]:
if self._length == 1:
for key in self._current_option_dict:
return self.choose_from_single_page(key, index), key
sum_of_length = 0
for page, options in self._current_option_dict.items():
option_len = min((len(options), self.max_displayed_options))
index_of_list = index - sum_of_length
if index_of_list < option_len:
return options[index_of_list], page
sum_of_length += option_len
raise IndexError("index is out of range")
def string_from_single_page(self, page: Type[Page]) -> str:
lines: List[str] = [self.get_page_str(page)]
option_obj: DatabaseObject
for i, option_obj in enumerate(self._current_option_dict[page]):
lines.append(f"{i:0{self.option_digits}} {option_obj.option_string}")
return "\n".join(lines)
def choose_from_single_page(self, page: Type[Page], index: int) -> DatabaseObject:
return self._current_option_dict[page][index]
def __repr__(self) -> str:
return self.string_from_all_pages()

View File

@@ -1,34 +0,0 @@
from typing import Tuple, Type, Dict
from ...utils.enums.source import SourcePages
from ..abstract import Page
from ..encyclopaedia_metallum import EncyclopaediaMetallum
from ..musify import Musify
NAME_PAGE_MAP: Dict[str, Type[Page]] = dict()
PAGE_NAME_MAP: Dict[Type[Page], str] = dict()
SOURCE_PAGE_MAP: Dict[SourcePages, Type[Page]] = dict()
ALL_PAGES: Tuple[Type[Page]] = (
EncyclopaediaMetallum,
Musify
)
AUDIO_PAGES: Tuple[Type[Page]] = (
Musify,
)
SHADY_PAGES: Tuple[Type[Page]] = (
Musify,
)
# this needs to be case insensitive
SHORTHANDS = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z')
for i, page in enumerate(ALL_PAGES):
NAME_PAGE_MAP[page.__name__.lower()] = page
NAME_PAGE_MAP[SHORTHANDS[i].lower()] = page
PAGE_NAME_MAP[page] = SHORTHANDS[i]
SOURCE_PAGE_MAP[page.SOURCE_TYPE] = page

View File

@@ -1,145 +0,0 @@
from typing import Tuple, List, Set, Type, Optional
from . import page_attributes
from .download import Download
from .multiple_options import MultiPageOptions
from ..abstract import Page
from ..support_classes.download_result import DownloadResult
from ...objects import DatabaseObject, Source
class Search(Download):
def __init__(
self,
pages: Tuple[Type[Page]] = page_attributes.ALL_PAGES,
exclude_pages: Set[Type[Page]] = set(),
exclude_shady: bool = False,
max_displayed_options: int = 10,
option_digits: int = 3,
) -> None:
super().__init__(
pages=pages,
exclude_pages=exclude_pages,
exclude_shady=exclude_shady
)
self.max_displayed_options = max_displayed_options
self.option_digits: int = option_digits
self._option_history: List[MultiPageOptions] = []
self._current_option: MultiPageOptions = self.next_options()
def __repr__(self):
return self._current_option.__repr__()
def next_options(self, derive_from: DatabaseObject = None) -> MultiPageOptions:
mpo = MultiPageOptions(
max_displayed_options=self.max_displayed_options,
option_digits=self.option_digits,
derived_from=derive_from
)
self._option_history.append(mpo)
self._current_option = mpo
return mpo
def _previous_options(self) -> MultiPageOptions:
self._option_history.pop()
self._current_option = self._option_history[-1]
return self._option_history[-1]
def search(self, query: str):
"""
# The Query
You can define a new parameter with "#",
the letter behind it defines the *type* of parameter,
followed by a space "#a Psychonaut 4 #r Tired, Numb and #t Drop by Drop"
if no # is in the query it gets treated as "unspecified query"
doesn't set derived_from thus,
can't download right after
"""
for page in self.pages:
self._current_option[page] = page.search_by_query(query=query)
def choose_page(self, page: Type[Page]):
"""
doesn't set derived_from thus,
can't download right after
"""
if page not in page_attributes.ALL_PAGES:
raise ValueError(f"Page \"{page.__name__}\" does not exist in page_attributes.ALL_PAGES")
prev_mpo = self._current_option
mpo = self.next_options()
mpo[page] = prev_mpo[page]
def get_page_from_query(self, query: str) -> Optional[Type[Page]]:
"""
query can be for example:
"a" or "EncyclopaediaMetallum" to choose a page
"""
page = page_attributes.NAME_PAGE_MAP.get(query.lower().strip())
if page in self.pages:
return page
def _get_page_from_source(self, source: Source) -> Optional[Type[Page]]:
return page_attributes.SOURCE_PAGE_MAP.get(source.page_enum)
def choose_index(self, index: int):
db_object, page = self._current_option.choose_from_all_pages(index=index)
music_object = self.fetch_details(db_object)
mpo = self.next_options(derive_from=music_object)
mpo[page] = music_object.options
def goto_previous(self):
try:
self._previous_options()
except IndexError:
pass
def search_url(self, url: str) -> bool:
"""
sets derived_from, thus
can download directly after
"""
source = Source.match_url(url=url)
if source is None:
return False
new_object = self.fetch_source(source)
if new_object is None:
return False
page = page_attributes.SOURCE_PAGE_MAP[source.page_enum]
mpo = self.next_options(derive_from=new_object)
mpo[page] = new_object.options
return True
def download_chosen(self, genre: str = None, download_all: bool = False, **kwargs) -> DownloadResult:
if self._current_option._derive_from is None:
return DownloadResult(error_message="No option has been chosen yet.")
source: Source
for source in self._current_option._derive_from.source_collection:
page = self._get_page_from_source(source=source)
if page in self.audio_pages:
return page.download(music_object=self._current_option._derive_from, genre=genre, download_all=download_all, **kwargs)
return DownloadResult(error_message=f"Didn't find a source for {self._current_option._derive_from.option_string}.")

View File

@@ -1,15 +1,15 @@
from collections import defaultdict
from typing import List, Optional, Dict, Type, Union
import requests
from bs4 import BeautifulSoup
import pycountry
from urllib.parse import urlparse
from ..utils.shared import ENCYCLOPAEDIA_METALLUM_LOGGER, proxies
from ..utils import string_processing
from ..connection import Connection
from ..utils.shared import ENCYCLOPAEDIA_METALLUM_LOGGER
from .abstract import Page
from ..utils.enums.source import SourcePages
from ..utils.enums.album import AlbumType
from ..utils.support_classes import Query
from ..objects import (
Lyrics,
Artist,
@@ -20,98 +20,174 @@ from ..objects import (
FormattedText,
Label,
Options,
DatabaseObject
)
ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
"Full-length": AlbumType.STUDIO_ALBUM,
"Single": AlbumType.SINGLE,
"EP": AlbumType.EP,
"Demo": AlbumType.DEMO,
"Video": AlbumType.OTHER,
"Live album": AlbumType.LIVE_ALBUM,
"Compilation": AlbumType.COMPILATION_ALBUM
})
def _song_from_json(artist_html=None, album_html=None, release_type=None, title=None, lyrics_html=None) -> Song:
song_id = None
if lyrics_html is not None:
soup = BeautifulSoup(lyrics_html, 'html.parser')
anchor = soup.find('a')
raw_song_id = anchor.get('id')
song_id = raw_song_id.replace("lyricsLink_", "")
return Song(
title=title,
main_artist_list=[
_artist_from_json(artist_html=artist_html)
],
album_list=[
_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
],
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
]
)
def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
"""
TODO parse the country to a standard
"""
# parse the html
# parse the html for the band name and link on metal-archives
soup = BeautifulSoup(artist_html, 'html.parser')
anchor = soup.find('a')
artist_name = anchor.text
artist_url = anchor.get('href')
artist_id = artist_url.split("/")[-1]
anchor.decompose()
strong = soup.find('strong')
if strong is not None:
strong.decompose()
akronyms_ = soup.text[2:-2].split(', ')
return Artist(
name=artist_name,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
]
)
def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Album:
# parse the html
# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
soup = BeautifulSoup(album_html, 'html.parser')
anchor = soup.find('a')
album_name = anchor.text
album_url = anchor.get('href')
album_id = album_url.split("/")[-1]
album_type = ALBUM_TYPE_MAP[release_type.strip()]
return Album(
title=album_name,
album_type=album_type,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
],
artist_list=[
_artist_from_json(artist_html=artist_html)
]
)
class EncyclopaediaMetallum(Page):
API_SESSION: requests.Session = requests.Session()
API_SESSION.proxies = proxies
API_SESSION.headers = {
"Host": "www.metal-archives.com",
"Connection": "keep-alive"
}
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
"Full-length": AlbumType.STUDIO_ALBUM,
"Single": AlbumType.SINGLE,
"EP": AlbumType.EP,
"Demo": AlbumType.DEMO,
"Video": AlbumType.OTHER,
"Live album": AlbumType.LIVE_ALBUM,
"Compilation": AlbumType.COMPILATION_ALBUM
})
LOGGER = ENCYCLOPAEDIA_METALLUM_LOGGER
def __init__(self, **kwargs):
self.connection: Connection = Connection(
host="https://www.metal-archives.com/",
logger=ENCYCLOPAEDIA_METALLUM_LOGGER
)
super().__init__(**kwargs)
@classmethod
def search_by_query(cls, query: str) -> Options:
query_obj = cls.Query(query)
if query_obj.is_raw:
return cls.simple_search(query_obj)
return cls.advanced_search(query_obj)
@classmethod
def advanced_search(cls, query: Page.Query) -> Options:
if query.song is not None:
return Options(cls.search_for_song(query=query))
if query.album is not None:
return Options(cls.search_for_album(query=query))
if query.artist is not None:
return Options(cls.search_for_artist(query=query))
return Options
@classmethod
def search_for_song(cls, query: Page.Query) -> List[Song]:
def song_search(self, song: Song) -> List[Song]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \
"artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \
"&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
"=1674550595663"
r = cls.get_request(endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str))
if r.status_code != 200:
cls.LOGGER.warning(
f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")
return []
"""
The difficult question I am facing is, that if I try every artist, with every song, with every album,
I end up with a quadratic runtime complecety O(n^2), where every step means one web request.
This.
Is not good.
"""
return [cls.get_song_from_json(
artist_html=raw_song[0],
album_html=raw_song[1],
release_type=raw_song[2],
title=raw_song[3],
lyrics_html=raw_song[4]
) for raw_song in r.json()['aaData']]
song_title = song.title
album_titles = ["*"] if song.album_collection.empty else [album.title for album in song.album_collection]
artist_titles = ["*"] if song.main_artist_collection.empty else [artist.name for artist in song.main_artist_collection]
@classmethod
def search_for_album(cls, query: Page.Query) -> List[Album]:
search_results = []
for artist in artist_titles:
for album in album_titles:
r = self.connection.get(
endpoint.format(song=song_title, artist=artist, album=album)
)
if r is None:
return []
search_results.extend(_song_from_json(
artist_html=raw_song[0],
album_html=raw_song[1],
release_type=raw_song[2],
title=raw_song[3],
lyrics_html=raw_song[4]
) for raw_song in r.json()['aaData'])
return search_results
def album_search(self, album: Album) -> List[Album]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?bandName={" \
"artist}&releaseTitle={album}&releaseYearFrom=&releaseMonthFrom=&releaseYearTo=&releaseMonthTo" \
"=&country=&location=&releaseLabelName=&releaseCatalogNumber=&releaseIdentifiers" \
"=&releaseRecordingInfo=&releaseDescription=&releaseNotes=&genre=&sEcho=1&iColumns=3&sColumns" \
"=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&_=1674563943747"
r = cls.get_request(endpoint.format(artist=query.artist_str, album=query.album_str))
if r.status_code != 200:
cls.LOGGER.warning(
f"code {r.status_code} at {endpoint.format(song=query.song_str, artist=query.artist_str, album=query.album_str)}")
return []
return [cls.get_album_from_json(
artist_html=raw_album[0],
album_html=raw_album[1],
release_type=raw_album[2]
) for raw_album in r.json()['aaData']]
album_title = album.title
artist_titles = ["*"] if album.artist_collection.empty else [artist.name for artist in album.artist_collection]
@classmethod
def search_for_artist(cls, query: Page.Query) -> List[Artist]:
search_results = []
for artist in artist_titles:
r = self.connection.get(endpoint.format(artist=artist, album=album_title))
if r is None:
return []
search_results.extend(_album_from_json(
artist_html=raw_album[0],
album_html=raw_album[1],
release_type=raw_album[2]
) for raw_album in r.json()['aaData'])
def artist_search(self, artist: Artist) -> List[Artist]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?bandName={" \
"artist}&genre=&country=&yearCreationFrom=&yearCreationTo=&bandNotes=&status=&themes=&location" \
"=&bandLabelName=&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0" \
"&mDataProp_1=1&mDataProp_2=2&_=1674565459976"
r = cls.get_request(endpoint.format(artist=query.artist))
r = self.connection.get(endpoint.format(artist=artist.name))
if r is None:
return []
@@ -122,108 +198,34 @@ class EncyclopaediaMetallum(Page):
return []
return [
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
for raw_artist in r.json()['aaData']
]
@classmethod
def simple_search(cls, query: Page.Query) -> List[Artist]:
def general_search(self, query: str) -> List[DatabaseObject]:
"""
Searches the default endpoint from metal archives, which intern searches only
for bands, but it is the default, thus I am rolling with it
"""
endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"
r = cls.get_request(endpoint.format(query=query))
r = self.connection.get(endpoint.format(query=query))
if r is None:
return []
return [
cls.get_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
for raw_artist in r.json()['aaData']
]
@classmethod
def get_artist_from_json(cls, artist_html=None, genre=None, country=None) -> Artist:
"""
TODO parse the country to a standart
"""
# parse the html
# parse the html for the band name and link on metal-archives
soup = BeautifulSoup(artist_html, 'html.parser')
anchor = soup.find('a')
artist_name = anchor.text
artist_url = anchor.get('href')
artist_id = artist_url.split("/")[-1]
anchor.decompose()
strong = soup.find('strong')
if strong is not None:
strong.decompose()
akronyms_ = soup.text[2:-2].split(', ')
return Artist(
name=artist_name,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
]
)
@classmethod
def get_album_from_json(cls, album_html=None, release_type=None, artist_html=None) -> Album:
# parse the html
# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
soup = BeautifulSoup(album_html, 'html.parser')
anchor = soup.find('a')
album_name = anchor.text
album_url = anchor.get('href')
album_id = album_url.split("/")[-1]
album_type = cls.ALBUM_TYPE_MAP[release_type.strip()]
return Album(
title=album_name,
album_type=album_type,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
],
artist_list=[
cls.get_artist_from_json(artist_html=artist_html)
]
)
@classmethod
def get_song_from_json(cls, artist_html=None, album_html=None, release_type=None, title=None,
lyrics_html=None) -> Song:
song_id = None
if lyrics_html is not None:
soup = BeautifulSoup(lyrics_html, 'html.parser')
anchor = soup.find('a')
raw_song_id = anchor.get('id')
song_id = raw_song_id.replace("lyricsLink_", "")
return Song(
title=title,
main_artist_list=[
cls.get_artist_from_json(artist_html=artist_html)
],
album_list=[
cls.get_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
],
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
]
)
@classmethod
def _fetch_artist_discography(cls, ma_artist_id: str) -> List[Album]:
def _fetch_artist_discography(self, ma_artist_id: str) -> List[Album]:
discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
# make the request
r = cls.get_request(discography_url.format(ma_artist_id))
r = self.connection.get(discography_url.format(ma_artist_id))
if r is None:
return []
soup = cls.get_soup_from_response(r)
soup = self.get_soup_from_response(r)
discography = []
@@ -247,21 +249,20 @@ class EncyclopaediaMetallum(Page):
Album(
title=album_name,
date=date_obj,
album_type=cls.ALBUM_TYPE_MAP[raw_album_type],
source_list=[Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)]
album_type=ALBUM_TYPE_MAP[raw_album_type],
source_list=[Source(self.SOURCE_TYPE, album_url)]
)
)
return discography
@classmethod
def _fetch_artist_sources(cls, ma_artist_id: str) -> List[Source]:
def _fetch_artist_sources(self, ma_artist_id: str) -> List[Source]:
sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"
r = cls.get_request(sources_url.format(ma_artist_id))
r = self.connection.get(sources_url.format(ma_artist_id))
if r is None:
return []
soup = cls.get_soup_from_response(r)
soup = self.get_soup_from_response(r)
if soup.find("span", {"id": "noLinks"}) is not None:
return []
@@ -285,12 +286,11 @@ class EncyclopaediaMetallum(Page):
if url is None:
continue
source_list.append(Source.match_url(url, referer_page=cls.SOURCE_TYPE))
source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE))
return source_list
@classmethod
def _parse_artist_attributes(cls, artist_soup: BeautifulSoup) -> Artist:
def _parse_artist_attributes(self, artist_soup: BeautifulSoup) -> Artist:
name: str = None
country: pycountry.Countrie = None
formed_in_year: int = None
@@ -307,7 +307,7 @@ class EncyclopaediaMetallum(Page):
if title_text.count(bad_name_substring) == 1:
name = title_text.replace(bad_name_substring, "")
else:
cls.LOGGER.debug(f"the title of the page is \"{title_text}\"")
self.LOGGER.debug(f"the title of the page is \"{title_text}\"")
"""
TODO
@@ -337,7 +337,7 @@ class EncyclopaediaMetallum(Page):
href = anchor.get("href")
if href is not None:
source_list.append(Source(cls.SOURCE_TYPE, href))
source_list.append(Source(self.SOURCE_TYPE, href))
name = anchor.get_text(strip=True)
@@ -396,35 +396,32 @@ class EncyclopaediaMetallum(Page):
Label(
name=label_name,
source_list=[
Source(cls.SOURCE_TYPE, label_url)
Source(self.SOURCE_TYPE, label_url)
]
)
],
source_list=source_list
)
@classmethod
def _fetch_artist_attributes(cls, url: str) -> Artist:
r = cls.get_request(url)
def _fetch_artist_attributes(self, url: str) -> Artist:
r = self.connection.get(url)
if r is None:
return Artist()
soup: BeautifulSoup = cls.get_soup_from_response(r)
soup: BeautifulSoup = self.get_soup_from_response(r)
return cls._parse_artist_attributes(artist_soup=soup)
return self._parse_artist_attributes(artist_soup=soup)
@classmethod
def _fetch_band_notes(cls, ma_artist_id: str) -> Optional[FormattedText]:
def _fetch_band_notes(self, ma_artist_id: str) -> Optional[FormattedText]:
endpoint = "https://www.metal-archives.com/band/read-more/id/{}"
# make the request
r = cls.get_request(endpoint.format(ma_artist_id))
r = self.connection.get(endpoint.format(ma_artist_id))
if r is None:
return FormattedText()
return FormattedText(html=r.text)
@classmethod
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
"""
What it could fetch, and what is implemented:
@@ -436,28 +433,27 @@ class EncyclopaediaMetallum(Page):
[x] band notes: https://www.metal-archives.com/band/read-more/id/3540372489
"""
artist = cls._fetch_artist_attributes(source.url)
artist = self._fetch_artist_attributes(source.url)
artist_id = source.url.split("/")[-1]
artist_sources = cls._fetch_artist_sources(artist_id)
artist_sources = self._fetch_artist_sources(artist_id)
artist.source_collection.extend(artist_sources)
band_notes = cls._fetch_band_notes(artist_id)
band_notes = self._fetch_band_notes(artist_id)
if band_notes is not None:
artist.notes = band_notes
discography: List[Album] = cls._fetch_artist_discography(artist_id)
discography: List[Album] = self._fetch_artist_discography(artist_id)
if stop_at_level > 1:
for album in discography:
for source in album.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
album.merge(cls._fetch_album_from_source(source, stop_at_level=stop_at_level-1))
for source in album.source_collection.get_sources_from_page(self.SOURCE_TYPE):
album.merge(self._fetch_album_from_source(source, stop_at_level=stop_at_level-1))
artist.main_album_collection.extend(discography)
return artist
@classmethod
def _parse_album_track_row(cls, track_row: BeautifulSoup) -> Song:
def _parse_album_track_row(self, track_row: BeautifulSoup) -> Song:
"""
<tr class="even">
<td width="20"><a class="anchor" name="5948442"> </a>1.</td> # id and tracksort
@@ -478,7 +474,7 @@ class EncyclopaediaMetallum(Page):
track_id = track_sort_soup.find("a").get("name").strip()
if track_row.find("a", {"href": f"#{track_id}"}) is not None:
source_list.append(Source(cls.SOURCE_TYPE, track_id))
source_list.append(Source(self.SOURCE_TYPE, track_id))
title = row_list[1].text.strip()
@@ -496,9 +492,7 @@ class EncyclopaediaMetallum(Page):
source_list=source_list
)
@classmethod
def _parse_album_attributes(cls, album_soup: BeautifulSoup, stop_at_level: int = 1) -> Album:
def _parse_album_attributes(self, album_soup: BeautifulSoup, stop_at_level: int = 1) -> Album:
tracklist: List[Song] = []
artist_list = []
album_name: str = None
@@ -518,12 +512,12 @@ class EncyclopaediaMetallum(Page):
href = anchor.get("href")
if href is not None:
source_list.append(Source(cls.SOURCE_TYPE, href.strip()))
source_list.append(Source(self.SOURCE_TYPE, href.strip()))
album_name = anchor.get_text(strip=True)
elif len(album_soup_list) > 1:
cls.LOGGER.debug("there are more than 1 album soups")
self.LOGGER.debug("there are more than 1 album soups")
artist_soup_list = album_info_soup.find_all("h2", {"class": "band_name"})
@@ -533,7 +527,7 @@ class EncyclopaediaMetallum(Page):
href = anchor.get("href")
if href is not None:
artist_sources.append(Source(cls.SOURCE_TYPE, href.strip()))
artist_sources.append(Source(self.SOURCE_TYPE, href.strip()))
artist_name = anchor.get_text(strip=True)
@@ -543,13 +537,13 @@ class EncyclopaediaMetallum(Page):
))
elif len(artist_soup_list) > 1:
cls.LOGGER.debug("there are more than 1 artist soups")
self.LOGGER.debug("there are more than 1 artist soups")
_parse_album_info(album_info_soup=album_soup.find(id="album_info"))
tracklist_soup = album_soup.find("table", {"class": "table_lyrics"}).find("tbody")
for track_soup in tracklist_soup.find_all("tr", {"class": ["even", "odd"]}):
tracklist.append(cls._parse_album_track_row(track_row=track_soup))
tracklist.append(self._parse_album_track_row(track_row=track_soup))
return Album(
title=album_name,
@@ -558,8 +552,7 @@ class EncyclopaediaMetallum(Page):
song_list=tracklist
)
@classmethod
def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album:
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
"""
I am preeeety sure I can get way more data than... nothing from there
@@ -570,23 +563,22 @@ class EncyclopaediaMetallum(Page):
# <table class="display table_lyrics
r = cls.get_request(source.url)
r = self.connection.get(source.url)
if r is None:
return Album()
soup = cls.get_soup_from_response(r)
soup = self.get_soup_from_response(r)
album = cls._parse_album_attributes(soup, stop_at_level=stop_at_level)
album = self._parse_album_attributes(soup, stop_at_level=stop_at_level)
if stop_at_level > 1:
for song in album.song_collection:
for source in song.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
song.merge(cls._fetch_song_from_source(source=source, stop_at_level=stop_at_level-1))
for source in song.source_collection.get_sources_from_page(self.SOURCE_TYPE):
song.merge(self._fetch_song_from_source(source=source, stop_at_level=stop_at_level-1))
return album
@classmethod
def _fetch_lyrics(cls, song_id: str) -> Optional[Lyrics]:
def _fetch_lyrics(self, song_id: str) -> Optional[Lyrics]:
"""
function toggleLyrics(songId) {
var lyricsRow = $('#song' + songId);
@@ -610,7 +602,7 @@ class EncyclopaediaMetallum(Page):
endpoint = "https://www.metal-archives.com/release/ajax-view-lyrics/id/{id}".format(id=song_id)
r = cls.get_request(endpoint)
r = self.connection.get(endpoint)
if r is None:
return None
@@ -618,22 +610,27 @@ class EncyclopaediaMetallum(Page):
text=FormattedText(html=r.text),
language=pycountry.languages.get(alpha_2="en"),
source_list=[
Source(cls.SOURCE_TYPE, endpoint)
Source(self.SOURCE_TYPE, endpoint)
]
)
@classmethod
def _fetch_song_from_source(cls, source: Source, stop_at_level: int = 1) -> Song:
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song_id = source.url
return Song(
lyrics_list=[
cls._fetch_lyrics(song_id=song_id)
self._fetch_lyrics(song_id=song_id)
]
)
@classmethod
def _get_type_of_url(cls, url: str) -> Optional[Union[Type[Song], Type[Album], Type[Artist], Type[Label]]]:
def get_source_type(self, source: Source):
if self.SOURCE_TYPE != source.page_enum:
return None
url = source.url
if url is None:
return None
parsed_url = urlparse(url)
path: List[str] = parsed_url.path.split("/")

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,9 @@
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Type, Union
from typing import List, Optional, Type
from urllib.parse import urlparse
import logging
import pycountry
import requests
from bs4 import BeautifulSoup
from ..objects import Source, DatabaseObject
from .abstract import Page
from ..objects import (
Artist,
@@ -15,61 +11,54 @@ from ..objects import (
SourcePages,
Song,
Album,
ID3Timestamp,
FormattedText,
Label,
Options,
AlbumType,
AlbumStatus,
Target
)
from ..utils import string_processing, shared
from .support_classes.download_result import DownloadResult
from ..connection import Connection
from ..utils.support_classes import DownloadResult
class Preset(Page):
# CHANGE
SOURCE_TYPE = SourcePages.PRESET
LOGGER = logging.getLogger("preset")
class YouTube(Page):
API_SESSION: requests.Session = requests.Session()
API_SESSION.headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
"Connection": "keep-alive",
"Referer": "https://www.youtube.com/"
}
API_SESSION.proxies = shared.proxies
TIMEOUT = 7
POST_TIMEOUT = 15
TRIES = 5
HOST = "https://www.youtube.com"
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://www.preset.cum/",
logger=self.LOGGER
)
super().__init__(*args, **kwargs)
SOURCE_TYPE = SourcePages.YOUTUBE
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
return super().get_source_type(source)
def general_search(self, search_query: str) -> List[DatabaseObject]:
return []
def label_search(self, label: Label) -> List[Label]:
return []
def artist_search(self, artist: Artist) -> List[Artist]:
return []
def album_search(self, album: Album) -> List[Album]:
return []
def song_search(self, song: Song) -> List[Song]:
return []
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()
LOGGER = shared.YOUTUBE_LOGGER
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
return Album()
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
return Artist()
@classmethod
def search_by_query(cls, query: str) -> Options:
return Options()
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()
@classmethod
def plaintext_search(cls, query: str) -> Options:
search_results = []
return Options(search_results)
@classmethod
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
artist: Artist = Artist(source_list=[source])
return artist
@classmethod
def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album:
album: Album = Album(source_list=[source])
return album
@classmethod
def _get_type_of_url(cls, url: str) -> Optional[Union[Type[Song], Type[Album], Type[Artist], Type[Label]]]:
return None
@classmethod
def _download_song_to_targets(cls, source: Source, target: Target, desc: str = None) -> DownloadResult:
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
return DownloadResult()

View File

@@ -1,70 +0,0 @@
from dataclasses import dataclass
from ...utils.shared import DOWNLOAD_PATH, DOWNLOAD_FILE, DEFAULT_VALUES
from ...utils.string_processing import fit_to_file_system
from ...objects import (
Song,
Album,
Artist,
Target,
Label
)
@dataclass
class DefaultTarget:
genre: str = DEFAULT_VALUES["genre"]
label: str = DEFAULT_VALUES["label"]
artist: str = DEFAULT_VALUES["artist"]
album: str = DEFAULT_VALUES["album"]
album_type: str = DEFAULT_VALUES["album_type"]
song: str = DEFAULT_VALUES["song"]
audio_format: str = DEFAULT_VALUES["audio_format"]
def __setattr__(self, __name: str, __value: str) -> None:
if __name in DEFAULT_VALUES:
if type(__value) != str:
return
if self.__getattribute__(__name) == DEFAULT_VALUES[__name]:
super().__setattr__(__name, fit_to_file_system(__value))
return
super().__setattr__(__name, __value)
@property
def target(self) -> Target:
return Target(
relative_to_music_dir=True,
path=DOWNLOAD_PATH.format(genre=self.genre, label=self.label, artist=self.artist, album=self.album,
song=self.song, album_type=self.album_type, audio_format=self.audio_format),
file=DOWNLOAD_FILE.format(genre=self.genre, label=self.label, artist=self.artist, album=self.album,
song=self.song, album_type=self.album_type, audio_format=self.audio_format)
)
def song_object(self, song: Song):
self.song = song.title
self.genre = song.genre
if not song.album_collection.empty:
self.album_object(song.album_collection[0])
if not song.main_artist_collection.empty:
self.artist_object(song.main_artist_collection[0])
def album_object(self, album: Album):
self.album = album.title
self.album_type = album.album_type.value
if not album.artist_collection.empty:
self.artist_object(album.artist_collection[0])
if not album.label_collection.empty:
self.label_object(album.label_collection[0])
def artist_object(self, artist: Artist):
self.artist = artist.name
if not artist.label_collection.empty:
self.label_object(artist.label_collection[0])
def label_object(self, label: Label):
self.label = label.name

View File

@@ -1,46 +1,448 @@
from typing import List
import requests
from bs4 import BeautifulSoup
import pycountry
from typing import List, Optional, Type, Tuple
from urllib.parse import urlparse, urlunparse, parse_qs
from enum import Enum
from ..utils.shared import (
ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER
)
import sponsorblock
from sponsorblock.errors import HTTPException, NotFoundException
from ..objects import Source, DatabaseObject, Song, Target
from .abstract import Page
from ..database import (
MusicObject,
from ..objects import (
Artist,
Source,
SourcePages,
Song,
Album,
ID3Timestamp,
FormattedText
)
from ..utils import (
string_processing
Label,
Target,
FormattedText,
ID3Timestamp
)
from ..connection import Connection
from ..utils.support_classes import DownloadResult
from ..utils.shared import YOUTUBE_LOGGER, INVIDIOUS_INSTANCE, BITRATE, ENABLE_SPONSOR_BLOCK, PIPED_INSTANCE
INVIDIOUS_INSTANCE = "https://yewtu.be/feed/popular"
class Youtube(Page):
"""
- https://yt.artemislena.eu/api/v1/search?q=Zombiez+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance
- https://yt.artemislena.eu/api/v1/channels/playlists/UCV0Ntl3lVR7xDXKoCU6uUXA
- https://yt.artemislena.eu/api/v1/playlists/OLAK5uy_kcUBiDv5ATbl-R20OjNaZ5G28XFanQOmM
- https://yt.artemislena.eu/api/v1/videos/SULFl39UjgY
"""
def get_invidious_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str:
return urlunparse((INVIDIOUS_INSTANCE.scheme, INVIDIOUS_INSTANCE.netloc, path, params, query, fragment))
def get_piped_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str:
return urlunparse((PIPED_INSTANCE.scheme, PIPED_INSTANCE.netloc, path, params, query, fragment))
class YouTubeUrlType(Enum):
CHANNEL = "channel"
PLAYLIST = "playlist"
VIDEO = "watch"
NONE = ""
class YouTubeUrl:
"""
The youtube downloader should use https://invidious.io/
to make the request.
They are an alternative frontend.
To find an artist filter for chanel and search for
`{artist.name} - Topic`
and then ofc check for viable results.
Ofc you can also implement searching songs by isrc.
NOTE: I didn't look at the invidious api yet. If it sucks,
feel free to use projects like youtube-dl.
But don't implement you're own youtube client.
I don't wanna maintain that shit.
Artist
https://yt.artemislena.eu/channel/UCV0Ntl3lVR7xDXKoCU6uUXA
https://www.youtube.com/channel/UCV0Ntl3lVR7xDXKoCU6uUXA
Release
https://yt.artemislena.eu/playlist?list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw
https://www.youtube.com/playlist?list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw
Track
https://yt.artemislena.eu/watch?v=SULFl39UjgY&list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw&index=1
https://www.youtube.com/watch?v=SULFl39UjgY
"""
API_SESSION: requests.Session = requests.Session()
def __init__(self, url: str) -> None:
"""
Raises Index exception for wrong url, and value error for not found enum type
"""
self.id = ""
parsed = urlparse(url=url)
self.url_type: YouTubeUrlType
type_frag_list = parsed.path.split("/")
if len(type_frag_list) < 2:
self.url_type = YouTubeUrlType.NONE
else:
try:
self.url_type = YouTubeUrlType(type_frag_list[1].strip())
except ValueError:
self.url_type = YouTubeUrlType.NONE
if self.url_type == YouTubeUrlType.CHANNEL:
if len(type_frag_list) < 3:
self.couldnt_find_id(url)
else:
self.id = type_frag_list[2]
elif self.url_type == YouTubeUrlType.PLAYLIST:
query_stuff = parse_qs(parsed.query)
if "list" not in query_stuff:
self.couldnt_find_id(url)
else:
self.id = query_stuff["list"][0]
elif self.url_type == YouTubeUrlType.VIDEO:
query_stuff = parse_qs(parsed.query)
if "v" not in query_stuff:
self.couldnt_find_id(url)
else:
self.id = query_stuff["v"][0]
def couldnt_find_id(self, url: str):
YOUTUBE_LOGGER.warning(f"The id is missing: {url}")
self.url_type = YouTubeUrlType.NONE
@property
def api(self) -> str:
if self.url_type == YouTubeUrlType.CHANNEL:
return get_invidious_url(path=f"/api/v1/channels/playlists/{self.id}")
if self.url_type == YouTubeUrlType.PLAYLIST:
return get_invidious_url(path=f"/api/v1/playlists/{id}")
if self.url_type == YouTubeUrlType.VIDEO:
return get_invidious_url(path=f"/api/v1/videos/{self.id}")
return get_invidious_url()
@property
def normal(self) -> str:
if self.url_type.CHANNEL:
return get_invidious_url(path=f"/channel/{self.id}")
if self.url_type.PLAYLIST:
return get_invidious_url(path="/playlist", query=f"list={self.id}")
if self.url_type.VIDEO:
return get_invidious_url(path="/watch", query=f"v={self.id}")
class YouTube(Page):
# CHANGE
SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = YOUTUBE_LOGGER
NO_ADDITIONAL_DATA_FROM_SONG = True
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host=get_invidious_url(),
logger=self.LOGGER
)
self.piped_connection: Connection = Connection(
host=get_piped_url(),
logger=self.LOGGER
)
self.download_connection: Connection = Connection(
host="https://www.youtube.com/",
logger=self.LOGGER
)
# the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does
_sponsorblock_connection: Connection = Connection(host="https://sponsor.ajay.app/")
self.sponsorblock_client = sponsorblock.Client(session=_sponsorblock_connection.session)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
_url_type = {
YouTubeUrlType.CHANNEL: Artist,
YouTubeUrlType.PLAYLIST: Album,
YouTubeUrlType.VIDEO: Song,
}
parsed = YouTubeUrl(source.url)
if parsed.url_type in _url_type:
return _url_type[parsed.url_type]
def general_search(self, search_query: str) -> List[DatabaseObject]:
return self.artist_search(Artist(name=search_query, dynamic=True))
def _json_to_artist(self, artist_json: dict) -> Artist:#
return Artist(
name=artist_json["author"].replace(" - Topic", ""),
source_list=[
Source(self.SOURCE_TYPE, get_invidious_url(path=artist_json["authorUrl"]))
]
)
def artist_search(self, artist: Artist) -> List[Artist]:
# https://yt.artemislena.eu/api/v1/search?q=Zombiez+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance
endpoint = get_invidious_url(path="/api/v1/search", query=f"q={artist.name.replace(' ', '+')}+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance")
artist_list = []
r = self.connection.get(endpoint)
if r is None:
return []
for search_result in r.json():
if search_result["type"] != "channel":
continue
author: str = search_result["author"]
if not author.endswith(" - Topic"):
continue
artist_list.append(self._json_to_artist(search_result))
return artist_list
def _fetch_song_from_id(self, youtube_id: str) -> Tuple[Song, Optional[int]]:
# https://yt.artemislena.eu/api/v1/videos/SULFl39UjgY
r = self.connection.get(get_invidious_url(path=f"/api/v1/videos/{youtube_id}"))
if r is None:
return Song(), None
data = r.json()
if data["genre"] != "Music":
self.LOGGER.warning(f"Genre has to be music, trying anyways")
title = data["title"]
license_str = None
artist_list: List[Artist] = []
_author: str = data["author"]
if _author.endswith(" - Topic"):
artist_list.append(Artist(
name=_author.replace(" - Topic", ""),
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url(path=f"/channel/{data['authorId']}")
)]
))
else:
for music_track in data.get("musicTracks", []):
title = music_track["song"]
license_str = music_track["license"]
for artist_name in music_track["artist"].split(" x "):
artist_list.append(Artist(name=artist_name))
return Song(
title=title,
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url(path="/watch", query=f"v={data['videoId']}")
)],
notes=FormattedText(html=data["descriptionHtml"] + f"\n<p>{license_str}</ p>" ),
main_artist_list=artist_list
), int(data["published"])
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.VIDEO:
return Song()
song, _ = self._fetch_song_from_id(parsed.id)
return song
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
self.LOGGER.info(f"Getting the metadata of an album may take slightly longer, only panic in a couple minutes <333")
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.PLAYLIST:
return Album()
title = None
source_list = [source]
notes = None
song_list = []
# https://yt.artemislena.eu/api/v1/playlists/OLAK5uy_kcUBiDv5ATbl-R20OjNaZ5G28XFanQOmM
r = self.connection.get(get_invidious_url(path=f"/api/v1/playlists/{parsed.id}"))
if r is None:
return Album()
data = r.json()
if data["type"] != "playlist":
return Album()
title = data["title"]
notes = FormattedText(html=data["descriptionHtml"])
timestamps: List[int] = []
"""
TODO
fetch the song and don't get it from there
"""
for video in data["videos"]:
other_song = Song(
source_list=[
Source(
self.SOURCE_TYPE, get_invidious_url(path="/watch", query=f"v={video['videoId']}")
)
],
tracksort=video["index"]+1
)
song, utc_timestamp = self._fetch_song_from_id(video["videoId"])
song.merge(other_song)
if utc_timestamp is not None:
timestamps.append(utc_timestamp)
song_list.append(song)
return Album(
title=title,
source_list=source_list,
notes=notes,
song_list=song_list,
date=ID3Timestamp.fromtimestamp(round(sum(timestamps) / len(timestamps)))
)
def fetch_invidious_album_list(self, yt_id: str):
artist_name = None
album_list = []
# playlist
# https://yt.artemislena.eu/api/v1/channels/playlists/UCV0Ntl3lVR7xDXKoCU6uUXA
r = self.connection.get(get_invidious_url(f"/api/v1/channels/playlists/{yt_id}"))
if r is None:
return Artist()
for playlist_json in r.json()["playlists"]:
if playlist_json["type"] != "playlist":
continue
artist_name = playlist_json["author"].replace(" - Topic", "")
# /playlist?list=OLAK5uy_nbvQeskr8nbIuzeLxoceNLuCL_KjAmzVw
album_list.append(Album(
title=playlist_json["title"],
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url(path="/playlist", query=f"list={playlist_json['playlistId']}")
)],
artist_list=[Artist(
name=artist_name,
source_list=[
Source(self.SOURCE_TYPE, get_invidious_url(path=playlist_json["authorUrl"]))
]
)]
))
return album_list, artist_name
def fetch_piped_album_list(self, yt_id: str):
endpoint = get_piped_url(path=f"/channels/tabs", query='data={"originalUrl":"https://www.youtube.com/' + yt_id + '/playlists","url":"https://www.youtube.com/' + yt_id + 'playlists","id":"' + yt_id + '","contentFilters":["playlists"],"sortFilter":"","baseUrl":"https://www.youtube.com"}')
r = self.piped_connection.get(endpoint)
if r is None:
return [], None
content = r.json()["content"]
artist_name = None
album_list = []
for playlist in content:
if playlist["type"] != "playlist":
continue
artist_name = playlist["uploaderName"].replace(" - Topic", "")
album_list.append(Album(
title=playlist["name"],
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url() + playlist["url"]
)],
artist_list=[Artist(
name=artist_name,
source_list=[
Source(self.SOURCE_TYPE, get_invidious_url(path=playlist["uploaderUrl"]))
]
)]
))
return album_list, artist_name
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.CHANNEL:
return Artist(source_list=[source])
album_list, artist_name = self.fetch_piped_album_list(parsed.id)
if len(album_list) <= 0:
self.LOGGER.warning(f"didn't found any playlists with piped, falling back to invidious. (it is unusual)")
album_list, artist_name = self.fetch_invidious_album_list(parsed.id)
return Artist(name=artist_name, main_album_list=album_list, source_list=[source])
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
"""
1. getting the optimal source
Only audio sources allowed
not a bitrate that is smaller than the selected bitrate, but not one that is wayyy huger
2. download it
:param source:
:param target:
:param desc:
:return:
"""
r = self.connection.get(YouTubeUrl(source.url).api)
if r is None:
return DownloadResult(error_message="Api didn't even respond, maybe try another invidious Instance")
audio_format = None
best_bitrate = 0
for possible_format in r.json()["adaptiveFormats"]:
format_type: str = possible_format["type"]
if not format_type.startswith("audio"):
continue
bitrate = int(possible_format.get("bitrate", 0))
if bitrate >= BITRATE:
best_bitrate = bitrate
audio_format = possible_format
break
if bitrate > best_bitrate:
best_bitrate = bitrate
audio_format = possible_format
if audio_format is None:
return DownloadResult(error_message="Couldn't find the download link.")
endpoint = audio_format["url"]
self.download_connection.stream_into(endpoint, target, description=desc, raw_url=True)
if self.download_connection.get(endpoint, stream=True, raw_url=True):
return DownloadResult(total=1)
return DownloadResult(error_message=f"Streaming to the file went wrong: {endpoint}, {str(target.file_path)}")
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
if not ENABLE_SPONSOR_BLOCK:
return []
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.VIDEO:
self.LOGGER.warning(f"{source.url} is no video url.")
return []
segments = []
try:
segments = self.sponsorblock_client.get_skip_segments(parsed.id)
except NotFoundException:
self.LOGGER.debug(f"No sponsor found for the video {parsed.id}.")
except HTTPException as e:
self.LOGGER.warning(f"{e}")
return [(segment.start, segment.end) for segment in segments]

View File

@@ -1,81 +0,0 @@
CREATE TABLE Song
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
name TEXT,
isrc TEXT,
length INT, -- length is in milliseconds (could be wrong)
tracksort INT,
genre TEXT,
album_id BIGINT,
FOREIGN KEY(album_id) REFERENCES Album(id)
);
CREATE TABLE Source
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
type TEXT NOT NULL,
src TEXT NOT NULL,
url TEXT NOT NULL,
certainty INT NOT NULL DEFAULT 0, -- certainty=0 -> it is definitely a valid source
valid BOOLEAN NOT NULL DEFAULT 1,
song_id BIGINT,
FOREIGN KEY(song_id) REFERENCES Song(id)
);
CREATE TABLE Album
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
title TEXT,
label TEXT,
album_status TEXT,
language TEXT,
date TEXT,
date_format TEXT,
country TEXT,
barcode TEXT,
albumsort INT,
is_split BOOLEAN NOT NULL DEFAULT 0
);
CREATE TABLE Target
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
file TEXT,
path TEXT,
song_id BIGINT UNIQUE,
FOREIGN KEY(song_id) REFERENCES Song(id)
);
CREATE TABLE Lyrics
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
text TEXT,
language TEXT,
song_id BIGINT,
FOREIGN KEY(song_id) REFERENCES Song(id)
);
CREATE TABLE Artist
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
name TEXT
);
CREATE TABLE SongArtist
(
song_id BIGINT NOT NULL,
artist_id BIGINT NOT NULL,
is_feature BOOLEAN NOT NULL DEFAULT 0,
FOREIGN KEY(song_id) REFERENCES Song(id),
FOREIGN KEY(artist_id) REFERENCES Artist(id)
);
CREATE TABLE AlbumArtist
(
album_id BIGINT,
artist_id BIGINT,
FOREIGN KEY(album_id) REFERENCES Album(id),
FOREIGN KEY(artist_id) REFERENCES Artist(id)
);

View File

@@ -1,144 +0,0 @@
DROP TABLE IF EXISTS artist;
CREATE TABLE artist (
id TEXT PRIMARY KEY NOT NULL,
mb_id TEXT,
name TEXT
);
DROP TABLE IF EXISTS artist_release_group;
CREATE TABLE artist_release_group (
artist_id TEXT NOT NULL,
release_group_id TEXT NOT NULL
);
DROP TABLE IF EXISTS artist_track;
CREATE TABLE artist_track (
artist_id TEXT NOT NULL,
track_id TEXT NOT NULL
);
DROP TABLE IF EXISTS release_group;
CREATE TABLE release_group (
id TEXT PRIMARY KEY NOT NULL,
albumartist TEXT,
albumsort INT,
musicbrainz_albumtype TEXT,
compilation TEXT,
album_artist_id TEXT
);
DROP TABLE IF EXISTS release_;
CREATE TABLE release_ (
id TEXT PRIMARY KEY NOT NULL,
release_group_id TEXT NOT NULL,
title TEXT,
copyright TEXT,
album_status TEXT,
language TEXT,
year TEXT,
date TEXT,
country TEXT,
barcode TEXT
);
DROP TABLE IF EXISTS track;
CREATE TABLE track (
id TEXT PRIMARY KEY NOT NULL,
downloaded BOOLEAN NOT NULL DEFAULT 0,
release_id TEXT NOT NULL,
mb_id TEXT,
track TEXT,
length INT,
tracknumber TEXT,
isrc TEXT,
genre TEXT,
lyrics TEXT,
path TEXT,
file TEXT,
url TEXT,
src TEXT
);
DROP TABLE IF EXISTS lyrics;
CREATE TABLE lyrics (
track_id TEXT NOT NULL,
text TEXT,
language TEXT
);
DROP TABLE IF EXISTS target;
CREATE TABLE target (
track_id TEXT NOT NULL,
file TEXT,
path TEXT
);
DROP TABLE IF EXISTS source;
CREATE TABLE source (
track_id TEXT NOT NULL,
src TEXT NOT NULL,
url TEXT NOT NULL,
certainty INT NOT NULL DEFAULT 0, -- certainty=0 -> it is definitly a valid source
valid BOOLEAN NOT NULL DEFAULT 1
);
DROP TABLE IF EXISTS easy_id3;
CREATE TABLE easy_id3 (
track_id TEXT NOT NULL,
album TEXT,
bpm TEXT,
compilation TEXT,
composer TEXT,
copyright TEXT,
encodedby TEXT,
lyricist TEXT,
length TEXT,
media TEXT,
mood TEXT,
grouping TEXT,
title TEXT,
version TEXT,
artist TEXT,
albumartist TEXT,
conductor TEXT,
arranger TEXT,
discnumber TEXT,
organization TEXT,
tracknumber TEXT,
author TEXT,
albumartistsort TEXT,
albumsort TEXT,
composersort TEXT,
artistsort TEXT,
titlesort TEXT,
isrc TEXT,
discsubtitle TEXT,
language TEXT,
genre TEXT,
date TEXT,
originaldate TEXT,
performer TEXT,
musicbrainz_trackid TEXT,
website TEXT,
replaygain_gain TEXT,
replaygain_peak TEXT,
musicbrainz_artistid TEXT,
musicbrainz_albumid TEXT,
musicbrainz_albumartistid TEXT,
musicbrainz_trmid TEXT,
musicip_puid TEXT,
musicip_fingerprint TEXT,
musicbrainz_albumstatus TEXT,
musicbrainz_albumtype TEXT,
releasecountry TEXT,
musicbrainz_discid TEXT,
asin TEXT,
performer TEXT,
barcode TEXT,
catalognumber TEXT,
musicbrainz_releasetrackid TEXT,
musicbrainz_releasegroupid TEXT,
musicbrainz_workid TEXT,
acoustid_fingerprint TEXT,
acoustid_id TEXT
);

View File

@@ -1,4 +1 @@
from .config import config, read, write
# tells what exists
__all__ = ["shared", "object_handeling", "phonetic_compares", "functions"]
from .config import config, read_config, write_config

View File

@@ -4,7 +4,22 @@ from .connection import CONNECTION_SECTION
from .misc import MISC_SECTION
from .paths import PATHS_SECTION
from .config import read, write, config
from .paths import LOCATIONS
from .config import Config
read()
config = Config()
def read_config():
if not LOCATIONS.CONFIG_FILE.is_file():
write_config()
config.read_from_config_file(LOCATIONS.CONFIG_FILE)
def write_config():
config.write_to_config_file(LOCATIONS.CONFIG_FILE)
set_name_to_value = config.set_name_to_value
read_config()

View File

@@ -106,7 +106,7 @@ ID3.1: {', '.join(_sorted_id3_1_formats)}
self.DOWNLOAD_PATH = StringAttribute(
name="download_path",
value="{genre}/{artist}/{album_type}/{album}",
value="{genre}/{artist}/{album}",
description="The folder music kraken should put the songs into."
)
@@ -116,42 +116,7 @@ ID3.1: {', '.join(_sorted_id3_1_formats)}
description="The filename of the audio file."
)
self.DEFAULT_GENRE = StringAttribute(
name="default_genre",
value="Various Genre",
description="The default value for the genre field."
)
self.DEFAULT_LABEL = StringAttribute(
name="default_label",
value="Various Labels",
description="The Label refers to a lable that signs artists."
)
self.DEFAULT_ARTIST = StringAttribute(
name="default_artist",
value="Various Artists",
description="You know Various Artist."
)
self.DEFAULT_ALBUM = StringAttribute(
name="default_album",
value="Various Album",
description="This value will hopefully not be used."
)
self.DEFAULT_SONG = StringAttribute(
name="default_song",
value="Various Song",
description="If it has to fall back to this value, something did go really wrong."
)
self.DEFAULT_ALBUM_TYPE = StringAttribute(
name="default_album_type",
value="Other",
description="Weirdly enough I barely see this used in file systems."
)
self.ALBUM_TYPE_BLACKLIST = AlbumTypeListAttribute(
name="album_type_blacklist",
description="Music Kraken ignores all albums of those types.\n"
@@ -181,11 +146,6 @@ There are multiple fields, you can use for the path and file name:
""".strip()),
self.DOWNLOAD_PATH,
self.DOWNLOAD_FILE,
self.DEFAULT_ALBUM_TYPE,
self.DEFAULT_ARTIST,
self.DEFAULT_GENRE,
self.DEFAULT_LABEL,
self.DEFAULT_SONG,
self.ALBUM_TYPE_BLACKLIST,
]
super().__init__()

View File

@@ -144,6 +144,9 @@ class ListAttribute(Attribute):
self.value = []
self.has_default_values = False
if value in self.value:
return
self.value.append(value)
def __str__(self):

View File

@@ -58,7 +58,7 @@ class Config:
self._name_section_map[name] = element
self._length += 1
def set_name_to_value(self, name: str, value: str):
def set_name_to_value(self, name: str, value: str, silent: bool = True):
"""
:raises SettingValueError, SettingNotFound:
:param name:
@@ -66,6 +66,9 @@ class Config:
:return:
"""
if name not in self._name_section_map:
if silent:
LOGGER.warning(f"The setting \"{name}\" is either deprecated, or doesn't exist.")
return
raise SettingNotFound(setting_name=name)
LOGGER.debug(f"setting: {name} value: {value}")
@@ -122,17 +125,3 @@ class Config:
for section in self._section_list:
for name, attribute in section.name_attribute_map.items():
yield attribute
config = Config()
def read():
if not LOCATIONS.CONFIG_FILE.is_file():
LOGGER.debug("Creating default config file.")
write()
config.read_from_config_file(LOCATIONS.CONFIG_FILE)
def write():
config.write_to_config_file(LOCATIONS.CONFIG_FILE)

View File

@@ -1,4 +1,9 @@
from .base_classes import Section, FloatAttribute, IntAttribute, BoolAttribute, ListAttribute
from urllib.parse import urlparse, ParseResult
import re
from .base_classes import Section, FloatAttribute, IntAttribute, BoolAttribute, ListAttribute, StringAttribute
from ..regex import URL_PATTERN
from ..exception.config import SettingValueError
class ProxAttribute(ListAttribute):
@@ -10,6 +15,38 @@ class ProxAttribute(ListAttribute):
}
class UrlStringAttribute(StringAttribute):
def validate(self, value: str):
v = value.strip()
url = re.match(URL_PATTERN, v)
if url is None:
raise SettingValueError(
setting_name=self.name,
setting_value=v,
rule="has to be a valid url"
)
@property
def object_from_value(self) -> ParseResult:
return urlparse(self.value)
class UrlListAttribute(ListAttribute):
def validate(self, value: str):
v = value.strip()
url = re.match(URL_PATTERN, v)
if url is None:
raise SettingValueError(
setting_name=self.name,
setting_value=v,
rule="has to be a valid url"
)
def single_object_from_element(self, value: str):
return urlparse(value)
class ConnectionSection(Section):
def __init__(self):
self.PROXIES = ProxAttribute(
@@ -43,11 +80,54 @@ class ConnectionSection(Section):
value="0.3"
)
# INVIDIOUS INSTANCES LIST
self.INVIDIOUS_INSTANCE = UrlStringAttribute(
name="invidious_instance",
description="This is an attribute, where you can define the invidious instances,\n"
"the youtube downloader should use.\n"
"Here is a list of active ones: https://docs.invidious.io/instances/\n"
"Instances that use cloudflare or have source code changes could cause issues.\n"
"Hidden instances (.onion) will only work, when setting 'tor=true'.",
value="https://yt.artemislena.eu/"
)
self.PIPED_INSTANCE = UrlStringAttribute(
name="piped_instance",
description="This is an attribute, where you can define the pioed instances,\n"
"the youtube downloader should use.\n"
"Here is a list of active ones: https://github.com/TeamPiped/Piped/wiki/Instances\n"
"Instances that use cloudflare or have source code changes could cause issues.\n"
"Hidden instances (.onion) will only work, when setting 'tor=true'.",
value="https://pipedapi.kavin.rocks"
)
self.ALL_YOUTUBE_URLS = UrlListAttribute(
name="youtube_url",
description="This is used to detect, if an url is from youtube, or any alternativ frontend.\n"
"If any instance seems to be missing, run music kraken with the -f flag.",
value=[
"https://www.youtube.com/",
"https://www.youtu.be/",
"https://redirect.invidious.io/",
"https://piped.kavin.rocks/"
]
)
self.SPONSOR_BLOCK = BoolAttribute(
name="use_sponsor_block",
value="true",
description="Use sponsor block to remove adds or simmilar from the youtube videos."
)
self.attribute_list = [
self.USE_TOR,
self.TOR_PORT,
self.CHUNK_SIZE,
self.SHOW_DOWNLOAD_ERRORS_THRESHOLD
self.SHOW_DOWNLOAD_ERRORS_THRESHOLD,
self.INVIDIOUS_INSTANCE,
self.PIPED_INSTANCE,
self.ALL_YOUTUBE_URLS,
self.SPONSOR_BLOCK
]
super().__init__()

View File

@@ -3,6 +3,21 @@ from .base_classes import Section, IntAttribute, ListAttribute, BoolAttribute
class MiscSection(Section):
def __init__(self):
self.ENABLE_RESULT_HISTORY = BoolAttribute(
name="result_history",
description="If enabled, you can go back to the previous results.\n"
"The consequence is a higher meory consumption, because every result is saved.",
value="false"
)
self.HISTORY_LENGTH = IntAttribute(
name="history_length",
description="You can choose how far back you can go in the result history.\n"
"The further you choose to be able to go back, the higher the memory usage.\n"
"'-1' removes the Limit entirely.",
value="8"
)
self.HAPPY_MESSAGES = ListAttribute(
name="happy_messages",
description="Just some nice and wholesome messages.\n"
@@ -12,11 +27,11 @@ class MiscSection(Section):
"Support the artist.",
"Star Me: https://github.com/HeIIow2/music-downloader",
"🏳️‍⚧️🏳️‍⚧️ Trans rights are human rights. 🏳️‍⚧️🏳️‍⚧️",
"🏳️‍⚧️🏳️‍⚧️ Trans women are women, trans men are men. 🏳️‍⚧️🏳️‍⚧️",
"🏴‍☠️🏴‍☠️ Unite under one flag, fuck borders. 🏴‍☠️🏴‍☠️",
"🏳️‍⚧️🏳️‍⚧️ Trans women are women, trans men are men, and enbies are enbies. 🏳️‍⚧️🏳️‍⚧️",
"🏴‍☠️🏴‍☠️ Unite under one flag, fck borders. 🏴‍☠️🏴‍☠️",
"Join my Matrix Space: https://matrix.to/#/#music-kraken:matrix.org",
"Gotta love the BPJM!! >:(",
"🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️"
"Gotta love the BPJM ;-;",
"🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️",
]
)
@@ -37,6 +52,8 @@ class MiscSection(Section):
)
self.attribute_list = [
self.ENABLE_RESULT_HISTORY,
self.HISTORY_LENGTH,
self.HAPPY_MESSAGES,
self.MODIFY_GC,
self.ID_BITS

View File

@@ -25,6 +25,10 @@ class SourcePages(Enum):
TWITTER = "twitter" # I will use nitter though lol
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
MANUAL = "manual"
PRESET = "preset"
@classmethod
def get_homepage(cls, attribute) -> str:
homepage_map = {

View File

@@ -0,0 +1,11 @@
class DownloadException(Exception):
pass
class UrlNotFoundException(DownloadException):
def __init__(self, url: str, *args: object) -> None:
self.url = url
super().__init__(*args)
def __str__(self) -> str:
return f"Couldn't find the page of {self.url}"

View File

@@ -0,0 +1,2 @@
URL_PATTERN = 'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'

View File

@@ -1,7 +1,8 @@
import logging
import random
from pathlib import Path
from typing import List, Tuple, Set
from typing import List, Tuple, Set, Dict
from urllib.parse import ParseResult
from .path_manager import LOCATIONS
from .config import LOGGING_SECTION, AUDIO_SECTION, CONNECTION_SECTION, MISC_SECTION, PATHS_SECTION
@@ -66,17 +67,9 @@ AUDIO_FORMAT = AUDIO_SECTION.AUDIO_FORMAT.object_from_value
DOWNLOAD_PATH = AUDIO_SECTION.DOWNLOAD_PATH.object_from_value
DOWNLOAD_FILE = AUDIO_SECTION.DOWNLOAD_FILE.object_from_value
DEFAULT_VALUES = {
"genre": AUDIO_SECTION.DEFAULT_GENRE.object_from_value,
"label": AUDIO_SECTION.DEFAULT_LABEL.object_from_value,
"artist": AUDIO_SECTION.DEFAULT_ARTIST.object_from_value,
"album": AUDIO_SECTION.DEFAULT_ALBUM.object_from_value,
"song": AUDIO_SECTION.DEFAULT_SONG.object_from_value,
"album_type": AUDIO_SECTION.DEFAULT_ALBUM_TYPE.object_from_value,
"audio_format": AUDIO_FORMAT
}
TOR: bool = CONNECTION_SECTION.USE_TOR.object_from_value
PROXIES_LIST: List[Dict[str, str]] = CONNECTION_SECTION.PROXIES.object_from_value
proxies = {}
if len(CONNECTION_SECTION.PROXIES) > 0:
"""
@@ -89,6 +82,11 @@ if TOR:
'http': f'socks5h://127.0.0.1:{CONNECTION_SECTION.TOR_PORT.object_from_value}',
'https': f'socks5h://127.0.0.1:{CONNECTION_SECTION.TOR_PORT.object_from_value}'
}
INVIDIOUS_INSTANCE: ParseResult = CONNECTION_SECTION.INVIDIOUS_INSTANCE.object_from_value
PIPED_INSTANCE: ParseResult = CONNECTION_SECTION.PIPED_INSTANCE.object_from_value
ALL_YOUTUBE_URLS: List[ParseResult] = CONNECTION_SECTION.ALL_YOUTUBE_URLS.object_from_value
ENABLE_SPONSOR_BLOCK: bool = CONNECTION_SECTION.SPONSOR_BLOCK.object_from_value
# size of the chunks that are streamed
CHUNK_SIZE = CONNECTION_SECTION.CHUNK_SIZE.object_from_value
@@ -102,3 +100,23 @@ SORT_BY_DATE = AUDIO_SECTION.SORT_BY_DATE.object_from_value
SORT_BY_ALBUM_TYPE = AUDIO_SECTION.SORT_BY_ALBUM_TYPE.object_from_value
ALBUM_TYPE_BLACKLIST: Set[AlbumType] = set(AUDIO_SECTION.ALBUM_TYPE_BLACKLIST.object_from_value)
THREADED = False
ENABLE_RESULT_HISTORY: bool = MISC_SECTION.ENABLE_RESULT_HISTORY.object_from_value
HISTORY_LENGTH: int = MISC_SECTION.HISTORY_LENGTH.object_from_value
HELP_MESSAGE = """
to search:
> s: {query or url}
> s: https://musify.club/release/some-random-release-183028492
> s: #a {artist} #r {release} #t {track}
to download:
> d: {option ids or direct url}
> d: 0, 3, 4
> d: 1
> d: https://musify.club/release/some-random-release-183028492
have fun :3
""".strip()

View File

@@ -0,0 +1,3 @@
from .download_result import DownloadResult
from .query import Query
from .thread_classes import EndThread, FinishedSearch

View File

@@ -12,8 +12,10 @@ UNIT_DIVISOR = 1024
class DownloadResult:
total: int = 0
fail: int = 0
sponsor_segments: int = 0
error_message: str = None
total_size = 0
found_on_disk: int = 0
_error_message_list: List[str] = field(default_factory=list)
@@ -71,15 +73,19 @@ class DownloadResult:
self.fail += other.fail
self._error_message_list.extend(other._error_message_list)
self.sponsor_segments += other.sponsor_segments
self.total_size += other.total_size
self.found_on_disk += other.found_on_disk
def __str__(self):
if self.is_fatal_error:
return self.error_message
head = f"{self.fail} from {self.total} downloads failed:\n" \
f"successrate:\t{int(self.success_percentage * 100)}%\n" \
f"failrate:\t{int(self.failure_percentage * 100)}%\n" \
f"total size:\t{self.formated_size}"
head = f"{self.fail} from {self.total} downloads failed:\n" \
f"successrate:\t{int(self.success_percentage * 100)}%\n" \
f"failrate:\t{int(self.failure_percentage * 100)}%\n" \
f"total size:\t{self.formated_size}\n" \
f"skipped segments:\t{self.sponsor_segments}\n" \
f"found on disc:\t{self.found_on_disk}"
if not self.is_mild_failure:
return head

View File

@@ -0,0 +1,32 @@
from typing import Optional, List
from ...objects import DatabaseObject, Artist, Album, Song
class Query:
def __init__(
self,
raw_query: str = "",
music_object: DatabaseObject = None
) -> None:
self.raw_query: str = raw_query
self.music_object: Optional[DatabaseObject] = music_object
@property
def is_raw(self) -> bool:
return self.music_object is None
@property
def default_search(self) -> List[str]:
if self.music_object is None:
return [self.raw_query]
if isinstance(self.music_object, Artist):
return [self.music_object.name]
if isinstance(self.music_object, Song):
return [f"{artist.name} - {self.music_object}" for artist in self.music_object.main_artist_collection]
if isinstance(self.music_object, Album):
return [f"{artist.name} - {self.music_object}" for artist in self.music_object.artist_collection]
return [self.raw_query]

View File

@@ -0,0 +1,12 @@
class EndThread:
_has_ended: bool = False
def __bool__(self):
return self._has_ended
def exit(self):
self._has_ended
class FinishedSearch:
pass