feat: build

This commit is contained in:
2024-04-09 10:32:17 +02:00
parent 5ba38916d6
commit fc2414dc68
93 changed files with 42 additions and 26 deletions

65
music_kraken/__init__.py Normal file
View File

@@ -0,0 +1,65 @@
import logging
import gc
import sys
from pathlib import Path
from rich.logging import RichHandler
from rich.console import Console
from .utils.shared import DEBUG, DEBUG_LOGGING
from .utils.config import logging_settings, main_settings, read_config
read_config()
console: Console = Console()
def init_logging():
log_file = main_settings['log_file']
if log_file.is_file():
last_log_file = Path(log_file.parent, "prev." + log_file.name)
with log_file.open("r", encoding="utf-8") as current_file:
with last_log_file.open("w", encoding="utf-8") as last_file:
last_file.write(current_file.read())
rich_handler = RichHandler(rich_tracebacks=True, console=console)
rich_handler.setLevel(logging_settings['log_level'] if not DEBUG_LOGGING else logging.DEBUG)
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.DEBUG)
# configure logger default
logging.basicConfig(
level=logging.DEBUG,
format=logging_settings['logging_format'],
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[
file_handler,
rich_handler,
]
)
init_logging()
from . import cli
if DEBUG:
sys.setrecursionlimit(100)
if main_settings['modify_gc']:
"""
At the start I modify the garbage collector to run a bit fewer times.
This should increase speed:
https://mkennedy.codes/posts/python-gc-settings-change-this-and-make-your-app-go-20pc-faster/
"""
# Clean up what might be garbage so far.
gc.collect(2)
allocs, gen1, gen2 = gc.get_threshold()
allocs = 50_000 # Start the GC sequence every 50K not 700 allocations.
gen1 = gen1 * 2
gen2 = gen2 * 2
gc.set_threshold(allocs, gen1, gen2)

152
music_kraken/__main__.py Normal file
View File

@@ -0,0 +1,152 @@
def cli():
import argparse
parser = argparse.ArgumentParser(
description="A simple yet powerful cli to download music with music-kraken.",
epilog="This is a cli for the developers, and it is shipped with music-krakens core.\n"
"While it is a nice and solid cli it will lack some features.\n"
"The proper cli and other frontends will be made or already have been made.\n"
"To see all current frontends check the docs at: https://github.com/HeIIow2/music-downloader"
)
# arguments for debug purposes
parser.add_argument(
'-v', '--verbose',
action="store_true",
help="Sets the logging level to debug."
)
parser.add_argument(
'-m', '--force-post-process',
action="store_true",
help="If a to downloaded thing is skipped due to being found on disc,\nit will still update the metadata accordingly."
)
parser.add_argument(
'-t', '--test',
action="store_true",
help="For the sake of testing. Equals: '-vp -g test'"
)
# general arguments
parser.add_argument(
'-a', '--all',
action="store_true",
help="If set it will download EVERYTHING the music downloader can find.\n"
"For example weird compilations from musify."
)
parser.add_argument(
'-g', '--genre',
help="Specifies the genre. (Will be overwritten by -t)"
)
parser.add_argument(
'-u', '--url',
help="Downloads the content of given url."
)
parser.add_argument(
'--settings',
help="Opens a menu to modify the settings",
action="store_true"
)
parser.add_argument(
'-s',
'--setting',
help="Modifies a setting directly.",
nargs=2
)
parser.add_argument(
"--paths",
"-p",
help="Prints an overview over all music-kraken paths.",
action="store_true"
)
parser.add_argument(
"-r",
help="Resets the config file to the default one.",
action="store_true"
)
parser.add_argument(
"--frontend",
"-f",
help="Set a good and fast invidious/piped instance from your homecountry, to reduce the latency.",
action="store_true"
)
parser.add_argument(
"--clear-cache",
help="Deletes the cache.",
action="store_true"
)
parser.add_argument(
"--clean-cache",
help="Deletes the outdated cache. (all expired cached files, and not indexed files)",
action="store_true"
)
arguments = parser.parse_args()
if arguments.verbose or arguments.test:
import logging
print("Setting logging-level to DEBUG")
logging.getLogger().setLevel(logging.DEBUG)
from . import cli
from .utils.config import read_config
from .utils import shared
if arguments.r:
import os
for file in shared.CONFIG_DIRECTORY.iterdir():
if file.is_file():
print(f"Deleting {file}....")
file.unlink()
read_config()
exit()
read_config()
if arguments.setting is not None:
cli.settings(*arguments.setting)
if arguments.settings:
cli.settings()
if arguments.paths:
cli.print_paths()
if arguments.frontend:
cli.set_frontend(silent=False)
if arguments.clear_cache:
from .cli.options import cache
cache.clear_cache()
if arguments.clean_cache:
from .cli.options import cache
cache.clean_cache()
# getting the genre
genre: str = arguments.genre
if arguments.test:
genre = "test"
cli.download(
genre=genre,
download_all=arguments.all,
direct_download_url=arguments.url,
process_metadata_anyway=arguments.force_post_process or arguments.test
)
if __name__ == "__main__":
cli()

View File

@@ -0,0 +1,9 @@
from . import metadata
from . import codec
AudioMetadata = metadata.AudioMetadata
write_many_metadata = metadata.write_many_metadata
write_metadata = metadata.write_metadata
write_metadata_to_target = metadata.write_metadata_to_target
correct_codec = codec.correct_codec

View File

@@ -0,0 +1,57 @@
from pathlib import Path
from typing import List, Tuple
from tqdm import tqdm
from ffmpeg_progress_yield import FfmpegProgress
from ..utils.config import main_settings, logging_settings
from ..objects import Target
LOGGER = logging_settings["codex_logger"]
def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], interval_list: List[Tuple[float, float]] = None):
if not target.exists:
LOGGER.warning(f"Target doesn't exist: {target.file_path}")
return
interval_list = interval_list or []
bitrate_b = int(bitrate_kb / 1024)
output_target = Target(
file_path=Path(str(target.file_path) + "." + audio_format)
)
# get the select thingie
# https://stackoverflow.com/questions/50594412/cut-multiple-parts-of-a-video-with-ffmpeg
aselect_list: List[str] = []
start = 0
next_start = 0
for end, next_start in interval_list:
aselect_list.append(f"between(t,{start},{end})")
start = next_start
aselect_list.append(f"gte(t,{next_start})")
select = f"aselect='{'+'.join(aselect_list)}',asetpts=N/SR/TB"
# build the ffmpeg command
ffmpeg_command = [
str(main_settings["ffmpeg_binary"]),
"-i", str(target.file_path),
"-af", select,
"-b", str(bitrate_b),
str(output_target.file_path)
]
# run the ffmpeg command with a progressbar
ff = FfmpegProgress(ffmpeg_command)
with tqdm(total=100, desc=f"removing {len(interval_list)} segments") as pbar:
for progress in ff.run_command_with_progress():
pbar.update(progress-pbar.n)
LOGGER.debug(ff.stderr)
output_target.copy_content(target)
output_target.delete()

View File

@@ -0,0 +1,88 @@
import mutagen
from mutagen.id3 import ID3, Frame
from pathlib import Path
from typing import List
import logging
from ..utils.config import logging_settings
from ..objects import Song, Target, Metadata
LOGGER = logging_settings["tagging_logger"]
class AudioMetadata:
def __init__(self, file_location: str = None) -> None:
self._file_location = None
self.frames: ID3 = ID3()
if file_location is not None:
self.file_location = file_location
def add_metadata(self, metadata: Metadata):
for value in metadata:
"""
https://www.programcreek.com/python/example/84797/mutagen.id3.ID3
"""
self.frames.add(value)
def add_song_metadata(self, song: Song):
self.add_metadata(song.metadata)
def save(self, file_location: Path = None):
LOGGER.debug(f"saving following frames: {self.frames.pprint()}")
if file_location is not None:
self.file_location = file_location
if self.file_location is None:
raise Exception("no file target provided to save the data to")
self.frames.save(self.file_location, v2_version=4)
def set_file_location(self, file_location: Path):
# try loading the data from the given file. if it doesn't succeed the frame remains empty
try:
self.frames.load(file_location, v2_version=4)
LOGGER.debug(f"loaded following from \"{file_location}\"\n{self.frames.pprint()}")
except mutagen.MutagenError:
LOGGER.warning(f"couldn't find any metadata at: \"{self.file_location}\"")
self._file_location = file_location
file_location = property(fget=lambda self: self._file_location, fset=set_file_location)
def write_metadata_to_target(metadata: Metadata, target: Target):
if not target.exists:
return
id3_object = AudioMetadata(file_location=target.file_path)
id3_object.add_metadata(metadata)
id3_object.save()
def write_metadata(song: Song, ignore_file_not_found: bool = True):
target: Target
for target in song.target:
if not target.exists:
if ignore_file_not_found:
continue
else:
raise ValueError(f"{song.target.file} not found")
id3_object = AudioMetadata(file_location=target.file_path)
id3_object.add_song_metadata(song=song)
id3_object.save()
def write_many_metadata(song_list: List[Song]):
for song in song_list:
write_metadata(song=song, ignore_file_not_found=True)
if __name__ == "__main__":
print("called directly")
filepath = "/home/lars/Music/deathcore/Archspire/Bleed the Future/Bleed the Future.mp3"
audio_metadata = AudioMetadata(file_location=filepath)
print(audio_metadata.frames.pprint())

View File

@@ -0,0 +1,5 @@
from .informations import print_paths
from .main_downloader import download
from .options.settings import settings
from .options.frontend import set_frontend

View File

@@ -0,0 +1 @@
from .paths import print_paths

View File

@@ -0,0 +1,22 @@
from ..utils import cli_function
from ...utils.path_manager import LOCATIONS
from ...utils.config import main_settings
def all_paths():
return {
"Temp dir": main_settings["temp_directory"],
"Music dir": main_settings["music_directory"],
"Conf dir": LOCATIONS.CONFIG_DIRECTORY,
"Conf file": LOCATIONS.CONFIG_FILE,
"logging file": main_settings["log_file"],
"FFMPEG bin": main_settings["ffmpeg_binary"],
"Cache Dir": main_settings["cache_directory"],
}
@cli_function
def print_paths():
for name, path in all_paths().items():
print(f"{name}:\t{path}")

View File

@@ -0,0 +1,420 @@
import random
from typing import Set, Type, Dict, List
from pathlib import Path
import re
from .utils import cli_function
from .options.first_config import initial_config
from ..utils.config import write_config, main_settings
from ..utils.regex import URL_PATTERN
from ..utils.string_processing import fit_to_file_system
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
from ..utils.exception.download import UrlNotFoundException
from ..utils.enums.colors import BColors
from ..download.results import Results, Option, PageResults
from ..download.page_attributes import Pages
from ..pages import Page
from ..objects import Song, Album, Artist, DatabaseObject
"""
This is the implementation of the Shell
# Behaviour
## Searching
```mkshell
> s: {querry or url}
# examples
> s: https://musify.club/release/some-random-release-183028492
> s: r: #a an Artist #r some random Release
```
Searches for an url, or an query
### Query Syntax
```
#a {artist} #r {release} #t {track}
```
You can escape stuff like `#` doing this: `\#`
## Downloading
To download something, you either need a direct link, or you need to have already searched for options
```mkshell
> d: {option ids or direct url}
# examples
> d: 0, 3, 4
> d: 1
> d: https://musify.club/release/some-random-release-183028492
```
## Misc
### Exit
```mkshell
> q
> quit
> exit
> abort
```
### Current Options
```mkshell
> .
```
### Previous Options
```
> ..
```
"""
EXIT_COMMANDS = {"q", "quit", "exit", "abort"}
ALPHABET = "abcdefghijklmnopqrstuvwxyz"
PAGE_NAME_FILL = "-"
MAX_PAGE_LEN = 21
def get_existing_genre() -> List[str]:
"""
gets the name of all subdirectories of shared.MUSIC_DIR,
but filters out all directories, where the name matches with any patern
from shared.NOT_A_GENRE_REGEX.
"""
existing_genres: List[str] = []
# get all subdirectories of MUSIC_DIR, not the files in the dir.
existing_subdirectories: List[Path] = [f for f in main_settings["music_directory"].iterdir() if f.is_dir()]
for subdirectory in existing_subdirectories:
name: str = subdirectory.name
if not any(re.match(regex_pattern, name) for regex_pattern in main_settings["not_a_genre_regex"]):
existing_genres.append(name)
existing_genres.sort()
return existing_genres
def get_genre():
existing_genres = get_existing_genre()
for i, genre_option in enumerate(existing_genres):
print(f"{i + 1:0>2}: {genre_option}")
while True:
genre = input("Id or new genre: ")
if genre.isdigit():
genre_id = int(genre) - 1
if genre_id >= len(existing_genres):
print(f"No genre under the id {genre_id + 1}.")
continue
return existing_genres[genre_id]
new_genre = fit_to_file_system(genre)
agree_inputs = {"y", "yes", "ok"}
verification = input(f"create new genre \"{new_genre}\"? (Y/N): ").lower()
if verification in agree_inputs:
return new_genre
def help_message():
print()
print(random.choice(main_settings["happy_messages"]))
print()
class Downloader:
def __init__(
self,
exclude_pages: Set[Type[Page]] = None,
exclude_shady: bool = False,
max_displayed_options: int = 10,
option_digits: int = 3,
genre: str = None,
process_metadata_anyway: bool = False,
) -> None:
self.pages: Pages = Pages(exclude_pages=exclude_pages, exclude_shady=exclude_shady)
self.page_dict: Dict[str, Type[Page]] = dict()
self.max_displayed_options = max_displayed_options
self.option_digits: int = option_digits
self.current_results: Results = None
self._result_history: List[Results] = []
self.genre = genre or get_genre()
self.process_metadata_anyway = process_metadata_anyway
print()
print(f"Downloading to: \"{self.genre}\"")
print()
def print_current_options(self):
self.page_dict = dict()
print()
page_count = 0
for option in self.current_results.formated_generator(max_items_per_page=self.max_displayed_options):
if isinstance(option, Option):
color = BColors.BOLD.value if self.pages.is_downloadable(option.music_object) else BColors.GREY.value
print(f"{color}{option.index:0{self.option_digits}} {option.music_object.option_string}{BColors.ENDC.value}")
else:
prefix = ALPHABET[page_count % len(ALPHABET)]
print(
f"{BColors.HEADER.value}({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------{BColors.ENDC.value}")
self.page_dict[prefix] = option
self.page_dict[option.__name__] = option
page_count += 1
print()
def set_current_options(self, current_options: Results):
if main_settings["result_history"]:
self._result_history.append(current_options)
if main_settings["history_length"] != -1:
if len(self._result_history) > main_settings["history_length"]:
self._result_history.pop(0)
self.current_results = current_options
def previous_option(self) -> bool:
if not main_settings["result_history"]:
print("History is turned of.\nGo to main_settings, and change the value at 'result_history' to 'true'.")
return False
if len(self._result_history) <= 1:
print(f"No results in history.")
return False
self._result_history.pop()
self.current_results = self._result_history[-1]
return True
def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query:
song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True)
album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True)
artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True)
if song is not None:
if album is not None:
song.album_collection.append(album)
if artist is not None:
song.main_artist_collection.append(artist)
return Query(raw_query=query, music_object=song)
if album is not None:
if artist is not None:
album.artist_collection.append(artist)
return Query(raw_query=query, music_object=album)
if artist is not None:
return Query(raw_query=query, music_object=artist)
return Query(raw_query=query)
def search(self, query: str):
if re.match(URL_PATTERN, query) is not None:
try:
page, data_object = self.pages.fetch_url(query)
except UrlNotFoundException as e:
print(f"{e.url} could not be attributed/parsed to any yet implemented site.\n"
f"PR appreciated if the site isn't implemented.\n"
f"Recommendations and suggestions on sites to implement appreciated.\n"
f"But don't be a bitch if I don't end up implementing it.")
return
self.set_current_options(PageResults(page, data_object.options))
self.print_current_options()
return
special_characters = "#\\"
query = query + " "
key_text = {}
skip_next = False
escape_next = False
new_text = ""
latest_key: str = None
for i in range(len(query) - 1):
current_char = query[i]
next_char = query[i + 1]
if skip_next:
skip_next = False
continue
if escape_next:
new_text += current_char
escape_next = False
# escaping
if current_char == "\\":
if next_char in special_characters:
escape_next = True
continue
if current_char == "#":
if latest_key is not None:
key_text[latest_key] = new_text
new_text = ""
latest_key = next_char
skip_next = True
continue
new_text += current_char
if latest_key is not None:
key_text[latest_key] = new_text
parsed_query: Query = self._process_parsed(key_text, query)
self.set_current_options(self.pages.search(parsed_query))
self.print_current_options()
def goto(self, index: int):
page: Type[Page]
music_object: DatabaseObject
try:
page, music_object = self.current_results.get_music_object_by_index(index)
except KeyError:
print()
print(f"The option {index} doesn't exist.")
print()
return
self.pages.fetch_details(music_object)
print(music_object)
print(music_object.options)
self.set_current_options(PageResults(page, music_object.options))
self.print_current_options()
def download(self, download_str: str, download_all: bool = False) -> bool:
to_download: List[DatabaseObject] = []
if re.match(URL_PATTERN, download_str) is not None:
_, music_objects = self.pages.fetch_url(download_str)
to_download.append(music_objects)
else:
index: str
for index in download_str.split(", "):
if not index.strip().isdigit():
print()
print(f"Every download thingie has to be an index, not {index}.")
print()
return False
for index in download_str.split(", "):
to_download.append(self.current_results.get_music_object_by_index(int(index))[1])
print()
print("Downloading:")
for download_object in to_download:
print(download_object.option_string)
print()
_result_map: Dict[DatabaseObject, DownloadResult] = dict()
for database_object in to_download:
r = self.pages.download(music_object=database_object, genre=self.genre, download_all=download_all,
process_metadata_anyway=self.process_metadata_anyway)
_result_map[database_object] = r
for music_object, result in _result_map.items():
print()
print(music_object.option_string)
print(result)
return True
def process_input(self, input_str: str) -> bool:
input_str = input_str.strip()
processed_input: str = input_str.lower()
if processed_input in EXIT_COMMANDS:
return True
if processed_input == ".":
self.print_current_options()
return False
if processed_input == "..":
if self.previous_option():
self.print_current_options()
return False
if processed_input.startswith("s: "):
self.search(input_str[3:])
return False
if processed_input.startswith("d: "):
return self.download(input_str[3:])
if processed_input.isdigit():
self.goto(int(processed_input))
return False
if processed_input != "help":
print(f"{BColors.WARNING.value}Invalid input.{BColors.ENDC.value}")
help_message()
return False
def mainloop(self):
while True:
if self.process_input(input("> ")):
return
@cli_function
def download(
genre: str = None,
download_all: bool = False,
direct_download_url: str = None,
command_list: List[str] = None,
process_metadata_anyway: bool = False,
):
if main_settings["hasnt_yet_started"]:
code = initial_config()
if code == 0:
main_settings["hasnt_yet_started"] = False
write_config()
print(f"{BColors.OKGREEN.value}Restart the programm to use it.{BColors.ENDC.value}")
else:
print(f"{BColors.FAIL.value}Something went wrong configuring.{BColors.ENDC.value}")
shell = Downloader(genre=genre, process_metadata_anyway=process_metadata_anyway)
if command_list is not None:
for command in command_list:
shell.process_input(command)
return
if direct_download_url is not None:
if shell.download(direct_download_url, download_all=download_all):
return
shell.mainloop()

View File

View File

@@ -0,0 +1,26 @@
from logging import getLogger
from ..utils import cli_function
from ...connection.cache import Cache
@cli_function
def clear_cache():
"""
Deletes the cache.
:return:
"""
Cache("main", getLogger("cache")).clear()
print("Cleared cache")
@cli_function
def clean_cache():
"""
Deletes the outdated cache. (all expired cached files, and not indexed files)
:return:
"""
Cache("main", getLogger("cache")).clean()
print("Cleaned cache")

View File

@@ -0,0 +1,6 @@
from .frontend import set_frontend
def initial_config():
code = set_frontend(no_cli=True)
return code

View File

@@ -0,0 +1,196 @@
from typing import Dict, List
from dataclasses import dataclass
from collections import defaultdict
from urllib.parse import urlparse
from ..utils import cli_function
from ...objects import Country
from ...utils import config, write_config
from ...utils.config import youtube_settings
from ...connection import Connection
@dataclass
class Instance:
"""
Attributes which influence the quality of an instance:
- users
"""
name: str
uri: str
regions: List[Country]
users: int = 0
def __str__(self) -> str:
return f"{self.name} with {self.users} users."
class FrontendInstance:
SETTING_NAME = "placeholder"
def __init__(self) -> None:
self.region_instances: Dict[Country, List[Instance]] = defaultdict(list)
self.all_instances: List[Instance] = []
def add_instance(self, instance: Instance):
self.all_instances.append(instance)
youtube_lists = youtube_settings["youtube_url"]
existing_netlocs = set(tuple(url.netloc for url in youtube_lists))
parsed_instance = urlparse(instance.uri)
instance_netloc = parsed_instance.netloc
if instance_netloc not in existing_netlocs:
youtube_lists.append(parsed_instance)
youtube_settings.__setitem__("youtube_url", youtube_lists, is_parsed=True)
for region in instance.regions:
self.region_instances[region].append(instance)
def fetch(self, silent: bool = False):
if not silent:
print(f"Downloading {type(self).__name__} instances...")
def set_instance(self, instance: Instance):
youtube_settings.__setitem__(self.SETTING_NAME, instance.uri)
def _choose_country(self) -> List[Instance]:
print("Input the country code, an example would be \"US\"")
print('\n'.join(f'{region.name} ({region.alpha_2})' for region in self.region_instances))
print()
available_instances = set(i.alpha_2 for i in self.region_instances)
chosen_region = ""
while chosen_region not in available_instances:
chosen_region = input("nearest country: ").strip().upper()
return self.region_instances[Country.by_alpha_2(chosen_region)]
def choose(self, silent: bool = False):
instances = self.all_instances if silent else self._choose_country()
instances.sort(key=lambda x: x.users, reverse=True)
if silent:
self.set_instance(instances[0])
return
# output the options
print("Choose your instance (input needs to be a digit):")
for i, instance in enumerate(instances):
print(f"{i}) {instance}")
print()
# ask for index
index = ""
while not index.isdigit() or int(index) >= len(instances):
index = input("> ").strip()
instance = instances[int(index)]
print()
print(f"Setting the instance to {instance}")
self.set_instance(instance)
class Invidious(FrontendInstance):
SETTING_NAME = "invidious_instance"
def __init__(self) -> None:
self.connection = Connection(host="https://api.invidious.io/")
self.endpoint = "https://api.invidious.io/instances.json"
super().__init__()
def _process_instance(self, all_instance_data: dict):
instance_data = all_instance_data[1]
stats = instance_data["stats"]
if not instance_data["api"]:
return
if instance_data["type"] != "https":
return
region = instance_data["region"]
instance = Instance(
name=all_instance_data[0],
uri=instance_data["uri"],
regions=[Country.by_alpha_2(region)],
users=stats["usage"]["users"]["total"]
)
self.add_instance(instance)
def fetch(self, silent: bool):
r = self.connection.get(self.endpoint)
if r is None:
return
for instance in r.json():
self._process_instance(all_instance_data=instance)
class Piped(FrontendInstance):
SETTING_NAME = "piped_instance"
def __init__(self) -> None:
self.connection = Connection(host="https://raw.githubusercontent.com")
super().__init__()
def process_instance(self, instance_data: str):
cells = instance_data.split(" | ")
instance = Instance(
name=cells[0].strip(),
uri=cells[1].strip(),
regions=[Country.by_emoji(flag) for flag in cells[2].split(", ")]
)
self.add_instance(instance)
def fetch(self, silent: bool = False):
r = self.connection.get("https://raw.githubusercontent.com/wiki/TeamPiped/Piped-Frontend/Instances.md")
if r is None:
return
process = False
for line in r.content.decode("utf-8").split("\n"):
line = line.strip()
if line != "" and process:
self.process_instance(line)
if line.startswith("---"):
process = True
class FrontendSelection:
def __init__(self):
self.invidious = Invidious()
self.piped = Piped()
def choose(self, silent: bool = False):
self.invidious.fetch(silent)
self.invidious.choose(silent)
self.piped.fetch(silent)
self.piped.choose(silent)
@cli_function
def set_frontend(silent: bool = False):
shell = FrontendSelection()
shell.choose(silent=silent)
return 0

View File

@@ -0,0 +1,71 @@
from ..utils import cli_function
from ...utils.config import config, write_config
from ...utils import exception
def modify_setting(_name: str, _value: str, invalid_ok: bool = True) -> bool:
try:
config.set_name_to_value(_name, _value)
except exception.config.SettingException as e:
if invalid_ok:
print(e)
return False
else:
raise e
write_config()
return True
def print_settings():
for i, attribute in enumerate(config):
print(f"{i:0>2}: {attribute.name}={attribute.value}")
def modify_setting_by_index(index: int) -> bool:
attribute = list(config)[index]
print()
print(attribute)
input__ = input(f"{attribute.name}=")
if not modify_setting(attribute.name, input__.strip()):
return modify_setting_by_index(index)
return True
def modify_setting_by_index(index: int) -> bool:
attribute = list(config)[index]
print()
print(attribute)
input__ = input(f"{attribute.name}=")
if not modify_setting(attribute.name, input__.strip()):
return modify_setting_by_index(index)
return True
@cli_function
def settings(
name: str = None,
value: str = None,
):
if name is not None and value is not None:
modify_setting(name, value, invalid_ok=True)
return
while True:
print_settings()
input_ = input("Id of setting to modify: ")
print()
if input_.isdigit() and int(input_) < len(config):
if modify_setting_by_index(int(input_)):
return
else:
print("Please input a valid ID.")
print()

42
music_kraken/cli/utils.py Normal file
View File

@@ -0,0 +1,42 @@
from ..utils.shared import get_random_message
def cli_function(function):
def wrapper(*args, **kwargs):
silent = kwargs.get("no_cli", False)
if "no_cli" in kwargs:
del kwargs["no_cli"]
if silent:
return function(*args, **kwargs)
return
code = 0
print_cute_message()
print()
try:
code = function(*args, **kwargs)
except KeyboardInterrupt:
print("\n\nRaise an issue if I fucked up:\nhttps://github.com/HeIIow2/music-downloader/issues")
finally:
print()
print_cute_message()
print("See you soon! :3")
exit()
return wrapper
def print_cute_message():
message = get_random_message()
try:
print(message)
except UnicodeEncodeError:
message = str(c for c in message if 0 < ord(c) < 127)
print(message)

View File

@@ -0,0 +1 @@
from .connection import Connection

View File

@@ -0,0 +1,200 @@
import json
from pathlib import Path
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import List, Optional
from functools import lru_cache
import logging
from ..utils.config import main_settings
@dataclass
class CacheAttribute:
module: str
name: str
created: datetime
expires: datetime
@property
def id(self):
return f"{self.module}_{self.name}"
@property
def is_valid(self):
if isinstance(self.expires, str):
self.expires = datetime.fromisoformat(self.expires)
return datetime.now() < self.expires
def __eq__(self, other):
return self.__dict__ == other.__dict__
class Cache:
def __init__(self, module: str, logger: logging.Logger):
self.module = module
self.logger: logging.Logger = logger
self._dir = main_settings["cache_directory"]
self.index = Path(self._dir, "index.json")
if not self.index.is_file():
with self.index.open("w") as i:
i.write(json.dumps([]))
self.cached_attributes: List[CacheAttribute] = []
self._id_to_attribute = {}
self._time_fields = {"created", "expires"}
with self.index.open("r") as i:
for c in json.loads(i.read()):
for key in self._time_fields:
c[key] = datetime.fromisoformat(c[key])
ca = CacheAttribute(**c)
self.cached_attributes.append(ca)
self._id_to_attribute[ca.id] = ca
@lru_cache()
def _init_module(self, module: str) -> Path:
"""
:param module:
:return: the module path
"""
r = Path(self._dir, module)
r.mkdir(exist_ok=True)
return r
def _write_index(self, indent: int = 4):
_json = []
for c in self.cached_attributes:
d = c.__dict__
for key in self._time_fields:
d[key] = d[key].isoformat()
_json.append(d)
with self.index.open("w") as f:
f.write(json.dumps(_json, indent=indent))
def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool:
existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id)
if existing_attribute is not None:
# the attribute exists
if existing_attribute == cached_attribute:
return True
if existing_attribute.is_valid:
return False
existing_attribute.__dict__ = cached_attribute.__dict__
else:
self.cached_attributes.append(cached_attribute)
self._id_to_attribute[cached_attribute.id] = cached_attribute
if write:
self._write_index()
return True
def set(self, content: bytes, name: str, expires_in: float = 10, module: str = ""):
"""
:param content:
:param module:
:param name:
:param expires_in: the unit is days
:return:
"""
if name == "":
return
module = self.module if module == "" else module
module_path = self._init_module(module)
cache_attribute = CacheAttribute(
module=module,
name=name,
created=datetime.now(),
expires=datetime.now() + timedelta(days=expires_in),
)
self._write_attribute(cache_attribute)
cache_path = Path(module_path, name)
with cache_path.open("wb") as content_file:
self.logger.debug(f"writing cache to {cache_path}")
content_file.write(content)
def get(self, name: str) -> Optional[bytes]:
path = Path(self._dir, self.module, name)
if not path.is_file():
return None
# check if it is outdated
existing_attribute: CacheAttribute = self._id_to_attribute[f"{self.module}_{name}"]
if not existing_attribute.is_valid:
return
with path.open("rb") as f:
return f.read()
def clean(self):
keep = set()
for ca in self.cached_attributes.copy():
if ca.name == "":
continue
file = Path(self._dir, ca.module, ca.name)
if not ca.is_valid:
self.logger.debug(f"deleting cache {ca.id}")
file.unlink()
self.cached_attributes.remove(ca)
del self._id_to_attribute[ca.id]
else:
keep.add(file)
# iterate through every module (folder)
for module_path in self._dir.iterdir():
if not module_path.is_dir():
continue
# delete all files not in keep
for path in module_path.iterdir():
if path not in keep:
self.logger.info(f"Deleting cache {path}")
path.unlink()
# delete all empty directories
for path in module_path.iterdir():
if path.is_dir() and not list(path.iterdir()):
self.logger.debug(f"Deleting cache directory {path}")
path.rmdir()
self._write_index()
def clear(self):
"""
delete every file in the cache directory
:return:
"""
for path in self._dir.iterdir():
if path.is_dir():
for file in path.iterdir():
file.unlink()
path.rmdir()
else:
path.unlink()
self.cached_attributes.clear()
self._id_to_attribute.clear()
self._write_index()
def __repr__(self):
return f"<Cache {self.module}>"

View File

@@ -0,0 +1,341 @@
from __future__ import annotations
import logging
import threading
import time
from typing import List, Dict, Optional, Set
from urllib.parse import urlparse, urlunsplit, ParseResult
import copy
import inspect
import requests
import responses
from tqdm import tqdm
from .cache import Cache
from .rotating import RotatingProxy
from ..objects import Target
from ..utils.config import main_settings
from ..utils.support_classes.download_result import DownloadResult
from ..utils.hacking import merge_args
class Connection:
def __init__(
self,
host: str,
proxies: List[dict] = None,
tries: int = (len(main_settings["proxies"]) + 1) * main_settings["tries_per_proxy"],
timeout: int = 7,
logger: logging.Logger = logging.getLogger("connection"),
header_values: Dict[str, str] = None,
accepted_response_codes: Set[int] = None,
semantic_not_found: bool = True,
sleep_after_404: float = 0.0,
heartbeat_interval=0,
module: str = "general",
cache_expiring_duration: float = 10
):
if proxies is None:
proxies = main_settings["proxies"]
self.cache: Cache = Cache(module=module, logger=logger)
self.cache_expiring_duration = cache_expiring_duration
self.HEADER_VALUES = dict() if header_values is None else header_values
self.LOGGER = logger
self.HOST = urlparse(host)
self.TRIES = tries
self.TIMEOUT = timeout
self.rotating_proxy = RotatingProxy(proxy_list=proxies)
self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200}
self.SEMANTIC_NOT_FOUND = semantic_not_found
self.sleep_after_404 = sleep_after_404
self.session = requests.Session()
self.session.headers = self.get_header(**self.HEADER_VALUES)
self.session.proxies = self.rotating_proxy.current_proxy
self.heartbeat_thread = None
self.heartbeat_interval = heartbeat_interval
self.lock: bool = False
def start_heartbeat(self):
if self.heartbeat_interval <= 0:
self.LOGGER.warning(f"Can't start a heartbeat with {self.heartbeat_interval}s in between.")
self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,), daemon=True)
self.heartbeat_thread.start()
def heartbeat_failed(self):
self.LOGGER.warning(f"The hearth couldn't beat.")
def heartbeat(self):
# Your code to send heartbeat requests goes here
raise NotImplementedError("please implement the heartbeat function.")
def _heartbeat_loop(self, interval: float):
def heartbeat_wrapper():
self.LOGGER.debug(f"The hearth is beating.")
self.heartbeat()
while True:
heartbeat_wrapper()
time.sleep(interval)
def base_url(self, url: ParseResult = None):
if url is None:
url = self.HOST
return urlunsplit((url.scheme, url.netloc, "", "", ""))
def get_header(self, **header_values) -> Dict[str, str]:
return {
"user-agent": main_settings["user_agent"],
"User-Agent": main_settings["user_agent"],
"Connection": "keep-alive",
"Host": self.HOST.netloc,
"Referer": self.base_url(),
"Accept-Language": main_settings["language"],
**header_values
}
def rotate(self):
self.session.proxies = self.rotating_proxy.rotate()
def _update_headers(
self,
headers: Optional[dict],
refer_from_origin: bool,
url: ParseResult
) -> Dict[str, str]:
headers = self.get_header(**(headers or {}))
if not refer_from_origin:
headers["Referer"] = self.base_url(url=url)
return headers
def save(self, r: requests.Response, name: str, error: bool = False, **kwargs):
n_kwargs = {}
if error:
n_kwargs["module"] = "failed_requests"
self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration), **n_kwargs)
def request(
self,
url: str,
timeout: float = None,
headers: Optional[dict] = None,
try_count: int = 0,
accepted_response_codes: set = None,
refer_from_origin: bool = True,
raw_url: bool = False,
raw_headers: bool = False,
sleep_after_404: float = None,
is_heartbeat: bool = False,
disable_cache: bool = None,
method: str = None,
name: str = "",
**kwargs
) -> Optional[requests.Response]:
if method is None:
raise AttributeError("method is not set.")
method = method.upper()
headers = dict() if headers is None else headers
disable_cache = headers.get("Cache-Control", "").lower() == "no-cache" if disable_cache is None else disable_cache
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES if accepted_response_codes is None else accepted_response_codes
current_kwargs = copy.copy(locals())
current_kwargs.pop("kwargs")
current_kwargs.update(**kwargs)
parsed_url = urlparse(url)
if not raw_headers:
_headers = copy.copy(self.HEADER_VALUES)
_headers.update(headers)
headers = self._update_headers(
headers=_headers,
refer_from_origin=refer_from_origin,
url=parsed_url
)
else:
headers = headers or {}
request_url = parsed_url.geturl() if not raw_url else url
if name != "" and not disable_cache:
cached = self.cache.get(name)
if cached is not None:
with responses.RequestsMock() as resp:
resp.add(
method=method,
url=request_url,
body=cached,
)
return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)
if sleep_after_404 is None:
sleep_after_404 = self.sleep_after_404
if try_count >= self.TRIES:
return
if timeout is None:
timeout = self.TIMEOUT
r = None
connection_failed = False
try:
if self.lock:
self.LOGGER.info(f"Waiting for the heartbeat to finish.")
while self.lock and not is_heartbeat:
pass
self.lock = True
r: requests.Response = requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)
if r.status_code in accepted_response_codes:
if not disable_cache:
self.save(r, name, **kwargs)
return r
if self.SEMANTIC_NOT_FOUND and r.status_code == 404:
self.LOGGER.warning(f"Couldn't find url (404): {request_url}")
return None
# the server rejected the request, or the internet is lacking
except requests.exceptions.Timeout:
self.LOGGER.warning(f"Request timed out at \"{request_url}\": ({try_count}-{self.TRIES})")
connection_failed = True
except requests.exceptions.ConnectionError:
self.LOGGER.warning(f"Couldn't connect to \"{request_url}\": ({try_count}-{self.TRIES})")
connection_failed = True
# this is important for thread safety
finally:
self.lock = False
if not connection_failed:
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} at {url}. ({try_count}-{self.TRIES})")
if r is not None:
self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.request.headers.items()))
self.LOGGER.debug("response headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.headers.items()))
self.LOGGER.debug(r.content)
if name != "":
self.save(r, name, error=True, **kwargs)
if sleep_after_404 != 0:
self.LOGGER.warning(f"Waiting for {sleep_after_404} seconds.")
time.sleep(sleep_after_404)
self.rotate()
current_kwargs["try_count"] = current_kwargs.get("try_count", 0) + 1
return Connection.request(**current_kwargs)
@merge_args(request)
def get(self, *args, **kwargs) -> Optional[requests.Response]:
return self.request(
*args,
method="GET",
**kwargs
)
@merge_args(request)
def post(
self,
*args,
json: dict = None,
**kwargs
) -> Optional[requests.Response]:
r = self.request(
*args,
method="POST",
json=json,
**kwargs
)
if r is None:
self.LOGGER.warning(f"payload: {json}")
return r
@merge_args(request)
def stream_into(
self,
url: str,
target: Target,
name: str = "download",
chunk_size: int = main_settings["chunk_size"],
progress: int = 0,
method: str = "GET",
try_count: int = 0,
accepted_response_codes: set = None,
**kwargs
) -> DownloadResult:
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES if accepted_response_codes is None else accepted_response_codes
stream_kwargs = copy.copy(locals())
stream_kwargs.update(stream_kwargs.pop("kwargs"))
if "description" in kwargs:
name = kwargs.pop("description")
if progress > 0:
headers = dict() if headers is None else headers
headers["Range"] = f"bytes={target.size}-"
r = self.request(
url=url,
name=name,
method=method,
stream=True,
accepted_response_codes=accepted_response_codes,
**kwargs
)
if r is None:
return DownloadResult(error_message=f"Could not establish a stream from: {url}")
target.create_path()
total_size = int(r.headers.get('content-length'))
progress = 0
retry = False
with target.open("ab") as f:
"""
https://en.wikipedia.org/wiki/Kilobyte
> The internationally recommended unit symbol for the kilobyte is kB.
"""
with tqdm(total=total_size, initial=target.size, unit='B', unit_scale=True, unit_divisor=1024, desc=name) as t:
try:
for chunk in r.iter_content(chunk_size=chunk_size):
size = f.write(chunk)
progress += size
t.update(size)
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.ChunkedEncodingError):
if try_count >= self.TRIES:
self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.")
return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunk_size might help.")
self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})")
retry = True
try_count += 1
if total_size > progress:
retry = True
if retry:
self.LOGGER.warning(f"Retrying stream...")
accepted_response_codes.add(206)
return Connection.stream_into(**stream_kwargs)
return DownloadResult()

View File

@@ -0,0 +1,43 @@
from typing import Dict, List
import requests
class RotatingObject:
"""
This will be used for RotatingProxies and invidious instances.
"""
def __init__(self, object_list: list):
self._object_list: list = object_list
if len(self._object_list) <= 0:
raise ValueError("There needs to be at least one item in a Rotating structure.")
self._current_index = 0
@property
def object(self):
return self._object_list[self._current_index]
def __len__(self):
return len(self._object_list)
@property
def next(self):
self._current_index = (self._current_index + 1) % len(self._object_list)
return self._object_list[self._current_index]
class RotatingProxy(RotatingObject):
def __init__(self, proxy_list: List[Dict[str, str]]):
super().__init__(
proxy_list if len(proxy_list) > 0 else [None]
)
def rotate(self) -> Dict[str, str]:
return self.next
@property
def current_proxy(self) -> Dict[str, str]:
return super().object

View File

View File

@@ -0,0 +1,197 @@
from typing import List, Union, Type, Optional
from peewee import (
SqliteDatabase,
PostgresqlDatabase,
MySQLDatabase,
Model,
CharField,
IntegerField,
BooleanField,
ForeignKeyField,
TextField
)
"""
**IMPORTANT**:
never delete, modify the datatype or add constrains to ANY existing collumns,
between the versions, that gets pushed out to the users.
Else my function can't update legacy databases, to new databases,
while keeping the data of the old ones.
EVEN if that means to for example keep decimal values stored in strings.
(not in my codebase though.)
"""
class BaseModel(Model):
notes: str = CharField(null=True)
class Meta:
database = None
@classmethod
def Use(cls, database: Union[SqliteDatabase, PostgresqlDatabase, MySQLDatabase]) -> Model:
cls._meta.database = database
return cls
def use(self, database: Union[SqliteDatabase, PostgresqlDatabase, MySQLDatabase]) -> Model:
self._meta.database = database
return self
class ObjectModel(BaseModel):
id: str = CharField(primary_key=True)
class MainModel(BaseModel):
additional_arguments: str = CharField(null=True)
notes: str = CharField(null=True)
class Song(MainModel):
"""A class representing a song in the music database."""
title: str = CharField(null=True)
isrc: str = CharField(null=True)
length: int = IntegerField(null=True)
tracksort: int = IntegerField(null=True)
genre: str = CharField(null=True)
class Album(MainModel):
"""A class representing an album in the music database."""
title: str = CharField(null=True)
album_status: str = CharField(null=True)
album_type: str = CharField(null=True)
language: str = CharField(null=True)
date_string: str = CharField(null=True)
date_format: str = CharField(null=True)
barcode: str = CharField(null=True)
albumsort: int = IntegerField(null=True)
class Artist(MainModel):
"""A class representing an artist in the music database."""
name: str = CharField(null=True)
country: str = CharField(null=True)
formed_in_date: str = CharField(null=True)
formed_in_format: str = CharField(null=True)
general_genre: str = CharField(null=True)
class Label(MainModel):
name: str = CharField(null=True)
class Target(ObjectModel):
"""A class representing a target of a song in the music database."""
file: str = CharField()
path: str = CharField()
song = ForeignKeyField(Song, backref='targets')
class Lyrics(ObjectModel):
"""A class representing lyrics of a song in the music database."""
text: str = TextField()
language: str = CharField()
song = ForeignKeyField(Song, backref='lyrics')
class Source(BaseModel):
"""A class representing a source of a song in the music database."""
ContentTypes = Union[Song, Album, Artist, Lyrics]
page: str = CharField()
url: str = CharField()
content_type: str = CharField()
content_id: int = CharField()
# content: ForeignKeyField = ForeignKeyField('self', backref='content_items', null=True)
@property
def content_object(self) -> Union[Song, Album, Artist]:
"""Get the content associated with the source as an object."""
if self.content_type == 'Song':
return Song.get(Song.id == self.content_id)
if self.content_type == 'Album':
return Album.get(Album.id == self.content_id)
if self.content_type == 'Artist':
return Artist.get(Artist.id == self.content_id)
if self.content_type == 'Label':
return Label.get(Label.id == self.content_id)
if self.content_type == 'Lyrics':
return Lyrics.get(Lyrics.id == self.content_id)
@content_object.setter
def content_object(self, value: Union[Song, Album, Artist]) -> None:
"""Set the content associated with the source as an object."""
self.content_type = value.__class__.__name__
self.content_id = value.id
class SongArtist(BaseModel):
"""A class representing the relationship between a song and an artist."""
song: ForeignKeyField = ForeignKeyField(Song, backref='song_artists')
artist: ForeignKeyField = ForeignKeyField(Artist, backref='song_artists')
is_feature: bool = BooleanField(default=False)
class ArtistAlbum(BaseModel):
"""A class representing the relationship between an album and an artist."""
album: ForeignKeyField = ForeignKeyField(Album, backref='album_artists')
artist: ForeignKeyField = ForeignKeyField(Artist, backref='album_artists')
class AlbumSong(BaseModel):
"""A class representing the relationship between an album and an song."""
album: ForeignKeyField = ForeignKeyField(Album, backref='album_artists')
song: ForeignKeyField = ForeignKeyField(Song, backref='album_artists')
class LabelAlbum(BaseModel):
label: ForeignKeyField = ForeignKeyField(Label, backref='label_album')
album: ForeignKeyField = ForeignKeyField(Album, backref='label_album')
class LabelArtist(BaseModel):
label: ForeignKeyField = ForeignKeyField(Label, backref='label_artist')
artist: ForeignKeyField = ForeignKeyField(Artist, backref='label_artists')
ALL_MODELS = [
Song,
Album,
Artist,
Source,
Lyrics,
ArtistAlbum,
Target,
SongArtist
]
if __name__ == "__main__":
database_1 = SqliteDatabase(":memory:")
database_1.create_tables([Song.Use(database_1)])
database_2 = SqliteDatabase(":memory:")
database_2.create_tables([Song.Use(database_2)])
# creating songs, adding it to db_2 if i is even, else to db_1
for i in range(100):
song = Song(name=str(i) + "hs")
db_to_use = database_2 if i % 2 == 0 else database_1
song.use(db_to_use).save()
print("database 1")
for song in Song.Use(database_1).select():
print(song.name)
print("database 2")
for song in Song.Use(database_1).select():
print(song.name)

View File

@@ -0,0 +1,188 @@
# Standard library
from typing import Optional, Union, List
from enum import Enum
from playhouse.migrate import *
# third party modules
from peewee import (
SqliteDatabase,
MySQLDatabase,
PostgresqlDatabase,
)
# own modules
from . import (
data_models,
write
)
from .. import objects
class DatabaseType(Enum):
SQLITE = "sqlite"
POSTGRESQL = "postgresql"
MYSQL = "mysql"
class Database:
database: Union[SqliteDatabase, PostgresqlDatabase, MySQLDatabase]
def __init__(
self,
db_type: DatabaseType,
db_name: str,
db_user: Optional[str] = None,
db_password: Optional[str] = None,
db_host: Optional[str] = None,
db_port: Optional[int] = None
):
self.db_type = db_type
self.db_name = db_name
self.db_user = db_user
self.db_password = db_password
self.db_host = db_host
self.db_port = db_port
self.initialize_database()
def create_database(self) -> Union[SqliteDatabase, PostgresqlDatabase, MySQLDatabase]:
"""Create a database instance based on the configured database type and parameters.
Returns:
The created database instance, or None if an invalid database type was specified.
"""
# SQLITE
if self.db_type == DatabaseType.SQLITE:
return SqliteDatabase(self.db_name)
# POSTGRES
if self.db_type == DatabaseType.POSTGRESQL:
return PostgresqlDatabase(
self.db_name,
user=self.db_user,
password=self.db_password,
host=self.db_host,
port=self.db_port,
)
# MYSQL
if self.db_type == DatabaseType.MYSQL:
return MySQLDatabase(
self.db_name,
user=self.db_user,
password=self.db_password,
host=self.db_host,
port=self.db_port,
)
raise ValueError("Invalid database type specified.")
@property
def migrator(self) -> SchemaMigrator:
if self.db_type == DatabaseType.SQLITE:
return SqliteMigrator(self.database)
if self.db_type == DatabaseType.MYSQL:
return MySQLMigrator(self.database)
if self.db_type == DatabaseType.POSTGRESQL:
return PostgresqlMigrator(self.database)
def initialize_database(self):
"""
Connect to the database
initialize the previously defined databases
create tables if they don't exist.
"""
self.database = self.create_database()
self.database.connect()
migrator = self.migrator
for model in data_models.ALL_MODELS:
model = model.Use(self.database)
if self.database.table_exists(model):
migration_operations = [
migrator.add_column(
"some field", field[0], field[1]
)
for field in model._meta.fields.items()
]
migrate(*migration_operations)
else:
self.database.create_tables([model], safe=True)
#self.database.create_tables([model.Use(self.database) for model in data_models.ALL_MODELS], safe=True)
"""
upgrade old databases.
If a collumn has been added in a new version this adds it to old Tables,
without deleting the data in legacy databases
"""
for model in data_models.ALL_MODELS:
model = model.Use(self.database)
print(model._meta.fields)
def push(self, database_object: objects.DatabaseObject):
"""
Adds a new music object to the database using the corresponding method from the `write` session.
When possible, rather use the `push_many` function.
This gets even more important, when using a remote database server.
Args:
database_object (objects.MusicObject): The music object to add to the database.
Returns:
The newly added music object.
"""
with write.WritingSession(self.database) as writing_session:
if isinstance(database_object, objects.Song):
return writing_session.add_song(database_object)
if isinstance(database_object, objects.Album):
return writing_session.add_album(database_object)
if isinstance(database_object, objects.Artist):
return writing_session.add_artist(database_object)
def push_many(self, database_objects: List[objects.DatabaseObject]) -> None:
"""
Adds a list of MusicObject instances to the database.
This function sends only needs one querry for each type of table added.
Beware that if you have for example an object like this:
- Album
- Song
- Song
you already have 3 different Tables.
Unlike the function `push`, this function doesn't return the added database objects.
Args:
database_objects: List of MusicObject instances to be added to the database.
"""
with write.WritingSession(self.database) as writing_session:
for obj in database_objects:
if isinstance(obj, objects.Song):
writing_session.add_song(obj)
continue
if isinstance(obj, objects.Album):
writing_session.add_album(obj)
continue
if isinstance(obj, objects.Artist):
writing_session.add_artist(obj)
continue
def __del__(self):
self.database.close()

View File

View File

@@ -0,0 +1,133 @@
from typing import Tuple, Type, Dict, Set
from .results import SearchResults
from ..objects import DatabaseObject, Source
from ..utils.config import youtube_settings
from ..utils.enums.source import SourcePages
from ..utils.support_classes.download_result import DownloadResult
from ..utils.support_classes.query import Query
from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, INDEPENDENT_DB_OBJECTS
ALL_PAGES: Set[Type[Page]] = {
EncyclopaediaMetallum,
Musify,
YoutubeMusic,
Bandcamp
}
if youtube_settings["use_youtube_alongside_youtube_music"]:
ALL_PAGES.add(YouTube)
AUDIO_PAGES: Set[Type[Page]] = {
Musify,
YouTube,
YoutubeMusic,
Bandcamp
}
SHADY_PAGES: Set[Type[Page]] = {
Musify,
}
if DEBUG_PAGES:
DEBUGGING_PAGE = Bandcamp
print(f"Only downloading from page {DEBUGGING_PAGE}.")
ALL_PAGES = {DEBUGGING_PAGE}
AUDIO_PAGES = ALL_PAGES.union(AUDIO_PAGES)
class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None:
# initialize all page instances
self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourcePages, Type[Page]] = dict()
exclude_pages = exclude_pages if exclude_pages is not None else set()
if exclude_shady:
exclude_pages = exclude_pages.union(SHADY_PAGES)
if not exclude_pages.issubset(ALL_PAGES):
raise ValueError(f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}")
def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]:
return tuple(sorted(page_set, key=lambda page: page.__name__))
self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages)
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set)
self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES)
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
for page_type in self.pages:
self._page_instances[page_type] = page_type()
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def search(self, query: Query) -> SearchResults:
result = SearchResults()
for page_type in self.pages:
result.add(
page=page_type,
search_result=self._page_instances[page_type].search(query=query)
)
return result
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return music_object
for source_page in music_object.source_collection.source_pages:
if source_page not in self._source_to_page:
continue
page_type = self._source_to_page[source_page]
if page_type in self._pages_set:
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
return music_object
def is_downloadable(self, music_object: DatabaseObject) -> bool:
_page_types = set(self._source_to_page)
for src in music_object.source_collection.source_pages:
if src in self._source_to_page:
_page_types.add(self._source_to_page[src])
audio_pages = self._audio_pages_set.intersection(_page_types)
return len(audio_pages) > 0
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
self.fetch_details(music_object)
_page_types = set(self._source_to_page)
for src in music_object.source_collection.source_pages:
if src in self._source_to_page:
_page_types.add(self._source_to_page[src])
audio_pages = self._audio_pages_set.intersection(_page_types)
for download_page in audio_pages:
return self._page_instances[download_page].download(music_object=music_object, genre=genre, download_all=download_all, process_metadata_anyway=process_metadata_anyway)
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]:
source = Source.match_url(url, SourcePages.MANUAL)
if source is None:
raise UrlNotFoundException(url=url)
_actual_page = self._source_to_page[source.page_enum]
return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level)

View File

@@ -0,0 +1,94 @@
from typing import Tuple, Type, Dict, List, Generator, Union
from dataclasses import dataclass
from ..objects import DatabaseObject
from ..utils.enums.source import SourcePages
from ..pages import Page, EncyclopaediaMetallum, Musify
@dataclass
class Option:
index: int
music_object: DatabaseObject
class Results:
def __init__(self) -> None:
self._by_index: Dict[int, DatabaseObject] = dict()
self._page_by_index: Dict[int: Type[Page]] = dict()
def __iter__(self) -> Generator[DatabaseObject, None, None]:
for option in self.formated_generator():
if isinstance(option, Option):
yield option.music_object
def formated_generator(self, max_items_per_page: int = 10) -> Generator[Union[Type[Page], Option], None, None]:
self._by_index = dict()
self._page_by_index = dict()
def get_music_object_by_index(self, index: int) -> Tuple[Type[Page], DatabaseObject]:
# if this throws a key error, either the formatted generator needs to be iterated, or the option doesn't exist.
return self._page_by_index[index], self._by_index[index]
class SearchResults(Results):
def __init__(
self,
pages: Tuple[Type[Page], ...] = None
) -> None:
super().__init__()
self.pages = pages or []
# this would initialize a list for every page, which I don't think I want
# self.results = Dict[Type[Page], List[DatabaseObject]] = {page: [] for page in self.pages}
self.results: Dict[Type[Page], List[DatabaseObject]] = {}
def add(self, page: Type[Page], search_result: List[DatabaseObject]):
"""
adds a list of found music objects to the according page
WARNING: if a page already has search results, they are just gonna be overwritten
"""
self.results[page] = search_result
def get_page_results(self, page: Type[Page]) -> "PageResults":
return PageResults(page, self.results.get(page, []))
def formated_generator(self, max_items_per_page: int = 10):
super().formated_generator()
i = 0
for page in self.results:
yield page
j = 0
for option in self.results[page]:
yield Option(i, option)
self._by_index[i] = option
self._page_by_index[i] = page
i += 1
j += 1
if j >= max_items_per_page:
break
class PageResults(Results):
def __init__(self, page: Type[Page], results: List[DatabaseObject]) -> None:
super().__init__()
self.page: Type[Page] = page
self.results: List[DatabaseObject] = results
def formated_generator(self, max_items_per_page: int = 10):
super().formated_generator()
i = 0
yield self.page
for option in self.results:
yield Option(i, option)
self._by_index[i] = option
self._page_by_index[i] = self.page
i += 1

View File

@@ -0,0 +1,25 @@
from typing_extensions import TypeVar
from .option import Options
from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp
from .source import Source, SourcePages, SourceTypes
from .song import (
Song,
Album,
Artist,
Target,
Lyrics,
Label
)
from .formatted_text import FormattedText
from .collection import Collection
from .country import Country
from .contact import Contact
from .parents import OuterProxy
DatabaseObject = TypeVar('T', bound=OuterProxy)

View File

@@ -0,0 +1,110 @@
from collections import defaultdict
from typing import Dict, List, Optional
import weakref
from .parents import DatabaseObject
"""
This is a cache for the objects, that et pulled out of the database.
This is necessary, to not have duplicate objects with the same id.
Using a cache that maps the ojects to their id has multiple benefits:
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
- less ram usage
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
"""
class ObjectCache:
"""
ObjectCache is a cache for the objects retrieved from a database.
It maps each object to its id and uses weak references to manage its memory usage.
Using a cache for these objects provides several benefits:
- Modifying an object updates all objects with the same id (due to copy by reference)
- Reduced memory usage
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
:method exists: Check if a DatabaseObject already exists in the cache.
:method append: Add a DatabaseObject to the cache if it does not already exist.
:method extent: Add a list of DatabaseObjects to the cache.
:method remove: Remove a DatabaseObject from the cache by its id.
:method get: Retrieve a DatabaseObject from the cache by its id. """
object_to_id: Dict[str, DatabaseObject]
weakref_map: Dict[weakref.ref, str]
def __init__(self) -> None:
self.object_to_id = dict()
self.weakref_map = defaultdict()
def exists(self, database_object: DatabaseObject) -> bool:
"""
Check if a DatabaseObject with the same id already exists in the cache.
:param database_object: The DatabaseObject to check for.
:return: True if the DatabaseObject exists, False otherwise.
"""
if database_object.dynamic:
return True
return database_object.id in self.object_to_id
def on_death(self, weakref_: weakref.ref) -> None:
"""
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
This function removes the DatabaseObject from the cache.
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
"""
data_id = self.weakref_map.pop(weakref_)
self.object_to_id.pop(data_id)
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
return weakref.ref(database_object, self.on_death)
def append(self, database_object: DatabaseObject) -> bool:
"""
Add a DatabaseObject to the cache.
:param database_object: The DatabaseObject to add to the cache.
:return: True if the DatabaseObject already exists in the cache, False otherwise.
"""
if self.exists(database_object):
return True
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
self.object_to_id[database_object.id] = database_object
return False
def extent(self, database_object_list: List[DatabaseObject]):
"""
adjacent to the extent method of list, this appends n Object
"""
for database_object in database_object_list:
self.append(database_object)
def remove(self, _id: str):
"""
Remove a DatabaseObject from the cache.
:param _id: The id of the DatabaseObject to remove from the cache.
"""
data = self.object_to_id.get(_id)
if data:
self.weakref_map.pop(weakref.ref(data))
self.object_to_id.pop(_id)
def __getitem__(self, item) -> Optional[DatabaseObject]:
"""
this returns the data obj
:param item: the id of the music object
:return:
"""
return self.object_to_id.get(item)
def get(self, _id: str) -> Optional[DatabaseObject]:
return self.__getitem__(_id)

View File

@@ -0,0 +1,331 @@
from __future__ import annotations
from collections import defaultdict
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple
from .parents import OuterProxy
T = TypeVar('T', bound=OuterProxy)
class Collection(Generic[T]):
__is_collection__ = True
_data: List[T]
_indexed_values: Dict[str, set]
_indexed_to_objects: Dict[any, list]
shallow_list = property(fget=lambda self: self.data)
def __init__(
self,
data: Optional[Iterable[T]] = None,
sync_on_append: Dict[str, Collection] = None,
contain_given_in_attribute: Dict[str, Collection] = None,
contain_attribute_in_given: Dict[str, Collection] = None,
append_object_to_attribute: Dict[str, T] = None
) -> None:
self._contains_ids = set()
self._data = []
self.parents: List[Collection[T]] = []
self.children: List[Collection[T]] = []
# List of collection attributes that should be modified on append
# Key: collection attribute (str) of appended element
# Value: main collection to sync to
self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {}
self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {}
self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {}
self.contain_self_on_append: List[str] = []
self._indexed_values = defaultdict(set)
self._indexed_to_objects = defaultdict(list)
self.extend(data)
def _map_element(self, __object: T, from_map: bool = False):
if __object.id in self._contains_ids:
return
self._contains_ids.add(__object.id)
for name, value in __object.indexing_values:
if value is None:
continue
self._indexed_values[name].add(value)
self._indexed_to_objects[value].append(__object)
if not from_map:
for attribute, new_object in self.contain_given_in_attribute.items():
__object.__getattribute__(attribute).contain_collection_inside(new_object)
for attribute, new_object in self.contain_attribute_in_given.items():
new_object.contain_collection_inside(__object.__getattribute__(attribute))
for attribute, new_object in self.append_object_to_attribute.items():
__object.__getattribute__(attribute).append(new_object)
def _unmap_element(self, __object: T):
if __object.id in self._contains_ids:
self._contains_ids.remove(__object.id)
for name, value in __object.indexing_values:
if value is None:
continue
if value not in self._indexed_values[name]:
continue
try:
self._indexed_to_objects[value].remove(__object)
except ValueError:
continue
if not len(self._indexed_to_objects[value]):
self._indexed_values[name].remove(value)
def _contained_in_self(self, __object: T) -> bool:
if __object.id in self._contains_ids:
return True
for name, value in __object.indexing_values:
if value is None:
continue
if value in self._indexed_values[name]:
return True
return False
def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List[Collection]:
"""
Gets the collection this object is found in, if it is found in any.
:param __object:
:param break_at_first:
:return:
"""
results = []
if self._contained_in_self(__object):
return [self]
for collection in self.children:
results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first))
if break_at_first:
return results
return results
def _get_root_collections(self) -> List[Collection]:
if not len(self.parents):
return [self]
root_collections = []
for upper_collection in self.parents:
root_collections.extend(upper_collection._get_root_collections())
return root_collections
@property
def _is_root(self) -> bool:
return len(self.parents) <= 0
def _get_parents_of_multiple_contained_children(self, __object: T):
results = []
if len(self.children) < 2 or self._contained_in_self(__object):
return results
count = 0
for collection in self.children:
sub_results = collection._get_parents_of_multiple_contained_children(__object)
if len(sub_results) > 0:
count += 1
results.extend(sub_results)
if count >= 2:
results.append(self)
return results
def merge_into_self(self, __object: T, from_map: bool = False):
"""
1. find existing objects
2. merge into existing object
3. remap existing object
"""
if __object.id in self._contains_ids:
return
existing_object: T = None
for name, value in __object.indexing_values:
if value is None:
continue
if value in self._indexed_values[name]:
existing_object = self._indexed_to_objects[value][0]
if existing_object.id == __object.id:
return None
break
if existing_object is None:
return None
existing_object.merge(__object)
# just a check if it really worked
if existing_object.id != __object.id:
raise ValueError("This should NEVER happen. Merging doesn't work.")
self._map_element(existing_object, from_map=from_map)
def contains(self, __object: T) -> bool:
return len(self._contained_in_sub(__object)) > 0
def _append(self, __object: T, from_map: bool = False):
print(self, __object)
self._map_element(__object, from_map=from_map)
self._data.append(__object)
def _find_object_in_self(self, __object: T) -> Optional[T]:
for name, value in __object.indexing_values:
if value is None:
continue
if value in self._indexed_values[name]:
return self._indexed_to_objects[value][0]
def _find_object(self, __object: T) -> Tuple[Collection[T], Optional[T]]:
other_object = self._find_object_in_self(__object)
if other_object is not None:
return self, other_object
for c in self.children:
o, other_object = c._find_object(__object)
if other_object is not None:
return o, other_object
return self, None
def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False):
"""
If an object, that represents the same entity exists in a relevant collection,
merge into this object. (and remap)
Else append to this collection.
:param __object:
:param already_is_parent:
:param from_map:
:return:
"""
if __object is None or __object.id in self._contains_ids:
return
append_to, existing_object = self._find_object(__object)
if existing_object is None:
# append
# print("appending", existing_object, __object)
append_to._data.append(__object)
else:
# merge
append_to._unmap_element(existing_object)
existing_object.merge(__object)
append_to._map_element(__object, from_map=from_map)
"""
exists_in_collection = self._contained_in_sub(__object)
if len(exists_in_collection) and self is exists_in_collection[0]:
# assuming that the object already is contained in the correct collections
if not already_is_parent:
self.merge_into_self(__object, from_map=from_map)
return
if not len(exists_in_collection):
self._append(__object, from_map=from_map)
else:
exists_in_collection[0].merge_into_self(__object, from_map=from_map)
if not already_is_parent or not self._is_root:
for parent_collection in self._get_parents_of_multiple_contained_children(__object):
pass
parent_collection.append(__object, already_is_parent=True, from_map=from_map)
"""
def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False):
if __iterable is None:
return
for __object in __iterable:
self.append(__object, from_map=from_map)
def sync_with_other_collection(self, equal_collection: Collection):
"""
If two collections always need to have the same values, this can be used.
Internally:
1. import the data from other to self
- _data
- contained_collections
2. replace all refs from the other object, with refs from this object
"""
if equal_collection is self:
return
# don't add the elements from the subelements from the other collection.
# this will be done in the next step.
self.extend(equal_collection._data)
# add all submodules
for equal_sub_collection in equal_collection.children:
self.contain_collection_inside(equal_sub_collection)
def contain_collection_inside(self, sub_collection: Collection):
"""
This collection will ALWAYS contain everything from the passed in collection
"""
if self is sub_collection or sub_collection in self.children:
return
self.children.append(sub_collection)
sub_collection.parents.append(self)
@property
def data(self) -> List[T]:
return [*self._data,
*(__object for collection in self.children for __object in collection.shallow_list)]
def __len__(self) -> int:
return len(self._data) + sum(len(collection) for collection in self.children)
@property
def empty(self) -> bool:
return self.__len__() <= 0
def __iter__(self) -> Iterator[T]:
for element in self._data:
yield element
for c in self.children:
for element in c:
yield element
def __merge__(self, __other: Collection, override: bool = False):
self.extend(__other._data, from_map=True)
def __getitem__(self, item: int):
if item < len(self._data):
return self._data[item]
item = item - len(self._data)
for c in self.children:
if item < len(c):
return c.__getitem__(item)
item = item - len(c._data)
raise IndexError

View File

@@ -0,0 +1,38 @@
from typing import Optional, List, Tuple
from ..utils.enums.contact import ContactMethod
from .parents import OuterProxy
class Contact(OuterProxy):
COLLECTION_STRING_ATTRIBUTES = tuple()
SIMPLE_STRING_ATTRIBUTES = {
"contact_method": None,
"value": None,
}
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
('id', self.id),
('value', self.value),
]
def __init__(self, contact_method: ContactMethod, value: str, **kwargs) -> None:
super().__init__(**kwargs)
self.contact_method: ContactMethod = contact_method
self.value: str = value
@classmethod
def match_url(cls, url: str) -> Optional["Contact"]:
url = url.strip()
if url.startswith("mailto:"):
return cls(ContactMethod.EMAIL, url.replace("mailto:", "", 1))
if url.startswith("tel:"):
return cls(ContactMethod.PHONE, url.replace("tel:", "", 1))
if url.startswith("fax:"):
return cls(ContactMethod.FAX, url.replace("fax:", "", 1))

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,78 @@
import pandoc
"""
TODO
implement in setup.py a skript to install pandocs
https://pandoc.org/installing.html
!!!!!!!!!!!!!!!!!!IMPORTANT!!!!!!!!!!!!!!!!!!
"""
class FormattedText:
"""
the self.html value should be saved to the database
"""
doc = None
def __init__(
self,
plaintext: str = None,
markdown: str = None,
html: str = None
) -> None:
self.set_plaintext(plaintext)
self.set_markdown(markdown)
self.set_html(html)
def set_plaintext(self, plaintext: str):
if plaintext is None:
return
self.doc = pandoc.read(plaintext)
def set_markdown(self, markdown: str):
if markdown is None:
return
self.doc = pandoc.read(markdown, format="markdown")
def set_html(self, html: str):
if html is None:
return
self.doc = pandoc.read(html, format="html")
def get_markdown(self) -> str:
if self.doc is None:
return ""
return pandoc.write(self.doc, format="markdown").strip()
def get_html(self) -> str:
if self.doc is None:
return ""
return pandoc.write(self.doc, format="html").strip()
def get_plaintext(self) -> str:
if self.doc is None:
return ""
return pandoc.write(self.doc, format="plain").strip()
@property
def is_empty(self) -> bool:
return self.doc is None
def __eq__(self, other) -> False:
if type(other) != type(self):
return False
if self.is_empty and other.is_empty:
return True
return self.doc == other.doc
def __str__(self) -> str:
return self.plaintext
plaintext = property(fget=get_plaintext, fset=set_plaintext)
markdown = property(fget=get_markdown, fset=set_markdown)
html = property(fget=get_html, fset=set_html)

View File

@@ -0,0 +1,96 @@
from typing import List, TypeVar, Type
from .country import Language
from .lyrics import Lyrics
from .parents import OuterProxy
from .song import Song, Album, Artist, Label
from .source import Source
from .target import Target
T = TypeVar('T', bound=OuterProxy)
ALL_CLASSES: List[Type[T]] = [Song, Album, Artist, Label, Source, Target, Lyrics]
def print_lint_res(missing_values: dict):
print("_default_factories = {")
for key, value in missing_values.items():
print(f'\t"{key}": {value},')
print("}")
# def __init__(self, foo: str, bar) -> None: ...
def lint_type(cls: T):
all_values: dict = {}
missing_values: dict = {}
for key, value in cls.__annotations__.items():
if value is None:
continue
if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")):
continue
if key in cls._default_factories:
continue
factory = "lambda: None"
if isinstance(value, str):
if value == "SourceCollection":
factory = "SourceCollection"
elif "collection" in value.lower():
factory = "Collection"
elif value.istitle():
factory = value
else:
if value is Language:
factory = 'Language.by_alpha_2("en")'
else:
try:
value()
factory = value.__name__
except TypeError:
pass
missing_values[key] = factory
if len(missing_values) > 0:
print(f"{cls.__name__}:")
print_lint_res(missing_values)
print()
else:
print(f"Everything is fine at {cls.__name__}")
p = []
s = []
for key, value in cls.__annotations__.items():
has_default = key in cls._default_factories
if not isinstance(value, str):
value = value.__name__
if key.endswith("_collection"):
key = key.replace("_collection", "_list")
if isinstance(value, str):
if value.startswith("Collection[") and value.endswith("]"):
value = value.replace("Collection", "List")
if isinstance(value, str) and has_default:
value = value + " = None"
p.append(f'{key}: {value}')
s.append(f'{key}={key}')
p.append("**kwargs")
s.append("**kwargs")
print("# This is automatically generated")
print(f"def __init__(self, {', '.join(p)}) -> None:")
print(f"\tsuper().__init__({', '.join(s)})")
print()
def lint():
for i in ALL_CLASSES:
lint_type(i)
print()

View File

@@ -0,0 +1,27 @@
from typing import List
from collections import defaultdict
import pycountry
from .parents import OuterProxy
from .source import Source, SourceCollection
from .formatted_text import FormattedText
from .country import Language
class Lyrics(OuterProxy):
text: FormattedText
language: Language
source_collection: SourceCollection
_default_factories = {
"text": FormattedText,
"language": lambda: Language.by_alpha_2("en"),
"source_collection": SourceCollection,
}
# This is automatically generated
def __init__(self, text: FormattedText = None, language: Language = None, source_list: SourceCollection = None,
**kwargs) -> None:
super().__init__(text=text, language=language, source_list=source_list, **kwargs)

View File

@@ -0,0 +1,400 @@
from enum import Enum
from typing import List, Dict, Tuple
from mutagen import id3
import datetime
class Mapping(Enum):
"""
These frames belong to the id3 standart
https://web.archive.org/web/20220830091059/https://id3.org/id3v2.4.0-frames
https://id3lib.sourceforge.net/id3/id3v2com-00.html
https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.4.0-frames.html
"""
# Textframes
TITLE = "TIT2"
ISRC = "TSRC"
LENGTH = "TLEN" # in milliseconds
# The 'Date' frame is a numeric string in the DDMM format containing the date for the recording. This field is always four characters long.
DATE = "TDAT"
# The 'Time' frame is a numeric string in the HHMM format containing the time for the recording. This field is always four characters long.
TIME = "TIME"
YEAR = "TYER"
TRACKNUMBER = "TRCK"
TOTALTRACKS = "TRCK" # Stored in the same frame with TRACKNUMBER, separated by '/': e.g. '4/9'.
TITLESORTORDER = "TSOT"
ENCODING_SETTINGS = "TSSE"
SUBTITLE = "TIT3"
SET_SUBTITLE = "TSST"
RELEASE_DATE = "TDRL"
RECORDING_DATES = "TXXX"
PUBLISHER_URL = "WPUB"
PUBLISHER = "TPUB"
RATING = "POPM"
DISCNUMBER = "TPOS"
MOVEMENT_COUNT = "MVIN"
TOTALDISCS = "TPOS"
ORIGINAL_RELEASE_DATE = "TDOR"
ORIGINAL_ARTIST = "TOPE"
ORIGINAL_ALBUM = "TOAL"
MEDIA_TYPE = "TMED"
LYRICIST = "TEXT"
WRITER = "TEXT"
ARTIST = "TPE1"
LANGUAGE = "TLAN" # https://en.wikipedia.org/wiki/ISO_639-2
ITUNESCOMPILATION = "TCMP"
REMIXED_BY = "TPE4"
RADIO_STATION_OWNER = "TRSO"
RADIO_STATION = "TRSN"
INITIAL_KEY = "TKEY"
OWNER = "TOWN"
ENCODED_BY = "TENC"
COPYRIGHT = "TCOP"
GENRE = "TCON"
GROUPING = "TIT1"
CONDUCTOR = "TPE3"
COMPOSERSORTORDER = "TSOC"
COMPOSER = "TCOM"
BPM = "TBPM"
ALBUM_ARTIST = "TPE2"
BAND = "TPE2"
ARTISTSORTORDER = "TSOP"
ALBUM = "TALB"
ALBUMSORTORDER = "TSOA"
ALBUMARTISTSORTORDER = "TSO2"
TAGGING_TIME = "TDTG"
SOURCE_WEBPAGE_URL = "WOAS"
FILE_WEBPAGE_URL = "WOAF"
INTERNET_RADIO_WEBPAGE_URL = "WORS"
ARTIST_WEBPAGE_URL = "WOAR"
COPYRIGHT_URL = "WCOP"
COMMERCIAL_INFORMATION_URL = "WCOM"
PAYMEMT_URL = "WPAY"
MOVEMENT_INDEX = "MVIN"
MOVEMENT_NAME = "MVNM"
UNSYNCED_LYRICS = "USLT"
COMMENT = "COMM"
@classmethod
def get_text_instance(cls, key: str, value: str):
return id3.Frames[key](encoding=3, text=value)
@classmethod
def get_url_instance(cls, key: str, url: str):
return id3.Frames[key](encoding=3, url=url)
@classmethod
def get_mutagen_instance(cls, attribute, value):
key = attribute.value
if key[0] == 'T':
# a text fiel
return cls.get_text_instance(key, value)
if key[0] == "W":
# an url field
return cls.get_url_instance(key, value)
class ID3Timestamp:
def __init__(
self,
year: int = None,
month: int = None,
day: int = None,
hour: int = None,
minute: int = None,
second: int = None
):
self.year = year
self.month = month
self.day = day
self.hour = hour
self.minute = minute
self.second = second
self.has_year = year is not None
self.has_month = month is not None
self.has_day = day is not None
self.has_hour = hour is not None
self.has_minute = minute is not None
self.has_second = second is not None
if not self.has_year:
year = 1
if not self.has_month:
month = 1
if not self.has_day:
day = 1
if not self.has_hour:
hour = 1
if not self.has_minute:
minute = 1
if not self.has_second:
second = 1
self.date_obj = datetime.datetime(
year=year,
month=month,
day=day,
hour=hour,
minute=minute,
second=second
)
def __hash__(self):
return self.date_obj.__hash__()
def __lt__(self, other):
return self.date_obj < other.date_obj
def __le__(self, other):
return self.date_obj <= other.date_obj
def __gt__(self, other):
return self.date_obj > other.date_obj
def __ge__(self, other):
return self.date_obj >= other.date_obj
def __eq__(self, other):
if type(other) != type(self):
return False
return self.date_obj == other.date_obj
def get_time_format(self) -> str:
"""
https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.4.0-structure.html
The timestamp fields are based on a subset of ISO 8601. When being as precise as possible the format of a
time string is
- yyyy-MM-ddTHH:mm:ss
- (year[%Y], “-”, month[%m], “-”, day[%d], “T”, hour (out of 24)[%H], ”:”, minutes[%M], ”:”, seconds[%S])
- %Y-%m-%dT%H:%M:%S
but the precision may be reduced by removing as many time indicators as wanted. Hence valid timestamps are
- yyyy
- yyyy-MM
- yyyy-MM-dd
- yyyy-MM-ddTHH
- yyyy-MM-ddTHH:mm
- yyyy-MM-ddTHH:mm:ss
All time stamps are UTC. For durations, use the slash character as described in 8601,
and for multiple non-contiguous dates, use multiple strings, if allowed by the frame definition.
:return timestamp: as timestamp in the format of the id3 time as above described
"""
if self.has_year and self.has_month and self.has_day and self.has_hour and self.has_minute and self.has_second:
return "%Y-%m-%dT%H:%M:%S"
if self.has_year and self.has_month and self.has_day and self.has_hour and self.has_minute:
return "%Y-%m-%dT%H:%M"
if self.has_year and self.has_month and self.has_day and self.has_hour:
return "%Y-%m-%dT%H"
if self.has_year and self.has_month and self.has_day:
return "%Y-%m-%d"
if self.has_year and self.has_month:
return "%Y-%m"
if self.has_year:
return "%Y"
return ""
def get_timestamp(self) -> str:
time_format = self.get_time_format()
return self.date_obj.strftime(time_format)
def get_timestamp_w_format(self) -> Tuple[str, str]:
time_format = self.get_time_format()
return time_format, self.date_obj.strftime(time_format)
@classmethod
def fromtimestamp(cls, utc_timestamp: int):
date_obj = datetime.datetime.fromtimestamp(utc_timestamp)
return cls(
year=date_obj.year,
month=date_obj.month,
day=date_obj.day,
hour=date_obj.hour,
minute=date_obj.minute,
second=date_obj.second
)
@classmethod
def strptime(cls, time_stamp: str, format: str):
"""
day: "%d"
month: "%b", "%B", "%m"
year: "%y", "%Y"
hour: "%H", "%I"
minute: "%M"
second: "%S"
"""
date_obj = datetime.datetime.strptime(time_stamp, format)
day = None
if "%d" in format:
day = date_obj.day
month = None
if any([i in format for i in ("%b", "%B", "%m")]):
month = date_obj.month
year = None
if any([i in format for i in ("%y", "%Y")]):
year = date_obj.year
hour = None
if any([i in format for i in ("%H", "%I")]):
hour = date_obj.hour
minute = None
if "%M" in format:
minute = date_obj.minute
second = None
if "%S" in format:
second = date_obj.second
return cls(
year=year,
month=month,
day=day,
hour=hour,
minute=minute,
second=second
)
@classmethod
def now(cls):
date_obj = datetime.datetime.now()
return cls(
year=date_obj.year,
month=date_obj.month,
day=date_obj.day,
hour=date_obj.hour,
minute=date_obj.minute,
second=date_obj.second
)
def strftime(self, format: str) -> str:
return self.date_obj.strftime(format)
def __str__(self) -> str:
return self.timestamp
def __repr__(self) -> str:
return self.timestamp
timestamp: str = property(fget=get_timestamp)
timeformat: str = property(fget=get_time_format)
class Metadata:
# it's a null byte for the later concatenation of text frames
NULL_BYTE: str = "\x00"
# this is pretty self-explanatory
# the key is an enum from Mapping
# the value is a list with each value
# the mutagen object for each frame will be generated dynamically
id3_dict: Dict[Mapping, list]
def __init__(self, id3_dict: Dict[any, list] = None) -> None:
self.id3_dict = dict()
if id3_dict is not None:
self.add_metadata_dict(id3_dict)
def __setitem__(self, frame: Mapping, value_list: list, override_existing: bool = True):
if type(value_list) != list:
raise ValueError(f"can only set attribute to list, not {type(value_list)}")
new_val = [i for i in value_list if i not in {None, ''}]
if len(new_val) == 0:
return
if override_existing:
self.id3_dict[frame] = new_val
else:
if frame not in self.id3_dict:
self.id3_dict[frame] = new_val
return
self.id3_dict[frame].extend(new_val)
def __getitem__(self, key):
if key not in self.id3_dict:
return None
return self.id3_dict[key]
def delete_field(self, key: str):
if key in self.id3_dict:
return self.id3_dict.pop(key)
def add_metadata_dict(self, metadata_dict: dict, override_existing: bool = True):
for field_enum, value in metadata_dict.items():
self.__setitem__(field_enum, value, override_existing=override_existing)
def merge(self, other, override_existing: bool = False):
"""
adds the values of another metadata obj to this one
other is a value of the type MetadataAttribute.Metadata
"""
self.add_metadata_dict(other.id3_dict, override_existing=override_existing)
def merge_many(self, many_other):
"""
adds the values of many other metadata objects to this one
"""
for other in many_other:
self.merge(other)
def get_id3_value(self, field):
if field not in self.id3_dict:
return None
list_data = self.id3_dict[field]
# convert for example the time objects to timestamps
for i, element in enumerate(list_data):
# for performances sake I don't do other checks if it is already the right type
if type(element) == str:
continue
if type(element) in {int}:
list_data[i] = str(element)
if type(element) == ID3Timestamp:
list_data[i] = element.timestamp
continue
"""
Version 2.4 of the specification prescribes that all text fields (the fields that start with a T, except for TXXX) can contain multiple values separated by a null character.
Thus if above conditions are met, I concatenate the list,
else I take the first element
"""
if field.value[0].upper() == "T" and field.value.upper() != "TXXX":
return self.NULL_BYTE.join(list_data)
return list_data[0]
def get_mutagen_object(self, field):
return Mapping.get_mutagen_instance(field, self.get_id3_value(field))
def __str__(self) -> str:
rows = []
for key, value in self.id3_dict.items():
rows.append(f"{key} - {str(value)}")
return "\n".join(rows)
def __iter__(self):
"""
returns a generator, you can iterate through,
to directly tagg a file with id3 container.
"""
# set the tagging timestamp to the current time
self.__setitem__(Mapping.TAGGING_TIME, [ID3Timestamp.now()])
for field in self.id3_dict:
yield self.get_mutagen_object(field)

View File

@@ -0,0 +1,257 @@
from __future__ import annotations
from collections import defaultdict
from typing import TypeVar, Generic, Dict, Optional, Iterable, List
from .parents import OuterProxy
T = TypeVar('T', bound=OuterProxy)
class Collection(Generic[T]):
_data: List[T]
_indexed_values: Dict[str, set]
_indexed_to_objects: Dict[any, list]
shallow_list = property(fget=lambda self: self.data)
def __init__(
self,
data: Optional[Iterable[T]] = None,
sync_on_append: Dict[str, "Collection"] = None,
contain_given_in_attribute: Dict[str, "Collection"] = None,
contain_attribute_in_given: Dict[str, "Collection"] = None,
append_object_to_attribute: Dict[str, T] = None
) -> None:
self._contains_ids = set()
self._data = []
self.upper_collections: List[Collection[T]] = []
self.contained_collections: List[Collection[T]] = []
# List of collection attributes that should be modified on append
# Key: collection attribute (str) of appended element
# Value: main collection to sync to
self.sync_on_append: Dict[str, Collection] = sync_on_append or {}
self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {}
self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {}
self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {}
self.contain_self_on_append: List[str] = []
self._indexed_values = defaultdict(set)
self._indexed_to_objects = defaultdict(list)
self.extend(data)
def _map_element(self, __object: T, from_map: bool = False):
self._contains_ids.add(__object.id)
for name, value in __object.indexing_values:
if value is None:
continue
self._indexed_values[name].add(value)
self._indexed_to_objects[value].append(__object)
if not from_map:
for attribute, new_object in self.contain_given_in_attribute.items():
__object.__getattribute__(attribute).contain_collection_inside(new_object)
for attribute, new_object in self.contain_given_in_attribute.items():
new_object.contain_collection_inside(__object.__getattribute__(attribute))
for attribute, new_object in self.append_object_to_attribute.items():
__object.__getattribute__(attribute).append(new_object, from_map=True)
def _unmap_element(self, __object: T):
self._contains_ids.remove(__object.id)
for name, value in __object.indexing_values:
if value is None:
continue
if value not in self._indexed_values[name]:
continue
try:
self._indexed_to_objects[value].remove(__object)
except ValueError:
continue
if not len(self._indexed_to_objects[value]):
self._indexed_values[name].remove(value)
def _contained_in_self(self, __object: T) -> bool:
if __object.id in self._contains_ids:
return True
for name, value in __object.indexing_values:
if value is None:
continue
if value in self._indexed_values[name]:
return True
return False
def _get_root_collections(self) -> List["Collection"]:
if not len(self.upper_collections):
return [self]
root_collections = []
for upper_collection in self.upper_collections:
root_collections.extend(upper_collection._get_root_collections())
return root_collections
@property
def _is_root(self) -> bool:
return len(self.upper_collections) <= 0
def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List["Collection"]:
results = []
if self._contained_in_self(__object):
return [self]
for collection in self.contained_collections:
results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first))
if break_at_first:
return results
return results
def _get_parents_of_multiple_contained_children(self, __object: T):
results = []
if len(self.contained_collections) < 2 or self._contained_in_self(__object):
return results
count = 0
for collection in self.contained_collections:
sub_results = collection._get_parents_of_multiple_contained_children(__object)
if len(sub_results) > 0:
count += 1
results.extend(sub_results)
if count >= 2:
results.append(self)
return results
def _merge_in_self(self, __object: T, from_map: bool = False):
"""
1. find existing objects
2. merge into existing object
3. remap existing object
"""
if __object.id in self._contains_ids:
return
existing_object: DatabaseObject = None
for name, value in __object.indexing_values:
if value is None:
continue
if value in self._indexed_values[name]:
existing_object = self._indexed_to_objects[value][0]
if existing_object.id == __object.id:
return None
break
if existing_object is None:
return None
existing_object.merge(__object, replace_all_refs=True)
# just a check if it really worked
if existing_object.id != __object.id:
raise ValueError("This should NEVER happen. Merging doesn't work.")
self._map_element(existing_object, from_map=from_map)
def contains(self, __object: T) -> bool:
return len(self._contained_in_sub(__object)) > 0
def _append(self, __object: T, from_map: bool = False):
for attribute, to_sync_with in self.sync_on_append.items():
pass
to_sync_with.sync_with_other_collection(__object.__getattribute__(attribute))
self._map_element(__object, from_map=from_map)
self._data.append(__object)
def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False):
if __object is None:
return
if __object.id in self._contains_ids:
return
exists_in_collection = self._contained_in_sub(__object)
if len(exists_in_collection) and self is exists_in_collection[0]:
# assuming that the object already is contained in the correct collections
if not already_is_parent:
self._merge_in_self(__object, from_map=from_map)
return
if not len(exists_in_collection):
self._append(__object, from_map=from_map)
else:
pass
exists_in_collection[0]._merge_in_self(__object, from_map=from_map)
if not already_is_parent or not self._is_root:
for parent_collection in self._get_parents_of_multiple_contained_children(__object):
pass
parent_collection.append(__object, already_is_parent=True, from_map=from_map)
def extend(self, __iterable: Optional[Iterable[T]]):
if __iterable is None:
return
for __object in __iterable:
self.append(__object)
def sync_with_other_collection(self, equal_collection: "Collection"):
"""
If two collections always need to have the same values, this can be used.
Internally:
1. import the data from other to self
- _data
- contained_collections
2. replace all refs from the other object, with refs from this object
"""
if equal_collection is self:
return
# don't add the elements from the subelements from the other collection.
# this will be done in the next step.
self.extend(equal_collection._data)
# add all submodules
for equal_sub_collection in equal_collection.contained_collections:
self.contain_collection_inside(equal_sub_collection)
# now the ugly part
# replace all refs of the other element with this one
self._risky_merge(equal_collection)
def contain_collection_inside(self, sub_collection: "Collection"):
"""
This collection will ALWAYS contain everything from the passed in collection
"""
if sub_collection in self.contained_collections:
return
self.contained_collections.append(sub_collection)
sub_collection.upper_collections.append(self)
@property
def data(self) -> List[T]:
return [*self._data,
*(__object for collection in self.contained_collections for __object in collection.shallow_list)]
def __len__(self) -> int:
return len(self._data) + sum(len(collection) for collection in self.contained_collections)
def __iter__(self) -> Iterator[T]:
for element in self._data:
yield element

View File

@@ -0,0 +1,256 @@
from typing import List, Iterable, Iterator, Optional, TypeVar, Generic, Dict, Type
from collections import defaultdict
from .parents import DatabaseObject
from ..utils.support_classes.hacking import MetaClass
T = TypeVar('T', bound=DatabaseObject)
class Collection(Generic[T]):
_data: List[T]
_indexed_values: Dict[str, set]
_indexed_to_objects: Dict[any, list]
shallow_list = property(fget=lambda self: self.data)
def __init__(
self, data: Optional[Iterable[T]] = None,
sync_on_append: Dict[str, "Collection"] = None,
contain_given_in_attribute: Dict[str, "Collection"] = None,
contain_attribute_in_given: Dict[str, "Collection"] = None,
append_object_to_attribute: Dict[str, DatabaseObject] = None
) -> None:
self._contains_ids = set()
self._data = []
self.upper_collections: List[Collection[T]] = []
self.contained_collections: List[Collection[T]] = []
# List of collection attributes that should be modified on append
# Key: collection attribute (str) of appended element
# Value: main collection to sync to
self.sync_on_append: Dict[str, Collection] = sync_on_append or {}
self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {}
self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {}
self.append_object_to_attribute: Dict[str, DatabaseObject] = append_object_to_attribute or {}
self.contain_self_on_append: List[str] = []
self._indexed_values = defaultdict(set)
self._indexed_to_objects = defaultdict(list)
self.extend(data)
def _map_element(self, __object: T, from_map: bool = False):
self._contains_ids.add(__object.id)
for name, value in __object.indexing_values:
if value is None:
continue
self._indexed_values[name].add(value)
self._indexed_to_objects[value].append(__object)
if not from_map:
for attribute, new_object in self.contain_given_in_attribute.items():
__object.__getattribute__(attribute).contain_collection_inside(new_object)
for attribute, new_object in self.contain_given_in_attribute.items():
new_object.contain_collection_inside(__object.__getattribute__(attribute))
for attribute, new_object in self.append_object_to_attribute.items():
__object.__getattribute__(attribute).append(new_object, from_map=True)
def _unmap_element(self, __object: T):
self._contains_ids.remove(__object.id)
for name, value in __object.indexing_values:
if value is None:
continue
if value not in self._indexed_values[name]:
continue
try:
self._indexed_to_objects[value].remove(__object)
except ValueError:
continue
if not len(self._indexed_to_objects[value]):
self._indexed_values[name].remove(value)
def _contained_in_self(self, __object: T) -> bool:
if __object.id in self._contains_ids:
return True
for name, value in __object.indexing_values:
if value is None:
continue
if value in self._indexed_values[name]:
return True
return False
def _get_root_collections(self) -> List["Collection"]:
if not len(self.upper_collections):
return [self]
root_collections = []
for upper_collection in self.upper_collections:
root_collections.extend(upper_collection._get_root_collections())
return root_collections
@property
def _is_root(self) -> bool:
return len(self.upper_collections) <= 0
def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List["Collection"]:
results = []
if self._contained_in_self(__object):
return [self]
for collection in self.contained_collections:
results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first))
if break_at_first:
return results
return results
def _get_parents_of_multiple_contained_children(self, __object: T):
results = []
if len(self.contained_collections) < 2 or self._contained_in_self(__object):
return results
count = 0
for collection in self.contained_collections:
sub_results = collection._get_parents_of_multiple_contained_children(__object)
if len(sub_results) > 0:
count += 1
results.extend(sub_results)
if count >= 2:
results.append(self)
return results
def _merge_in_self(self, __object: T, from_map: bool = False):
"""
1. find existing objects
2. merge into existing object
3. remap existing object
"""
if __object.id in self._contains_ids:
return
existing_object: DatabaseObject = None
for name, value in __object.indexing_values:
if value is None:
continue
if value in self._indexed_values[name]:
existing_object = self._indexed_to_objects[value][0]
if existing_object.id == __object.id:
return None
break
if existing_object is None:
return None
existing_object.merge(__object, replace_all_refs=True)
# just a check if it really worked
if existing_object.id != __object.id:
raise ValueError("This should NEVER happen. Merging doesn't work.")
self._map_element(existing_object, from_map=from_map)
def contains(self, __object: T) -> bool:
return len(self._contained_in_sub(__object)) > 0
def _append(self, __object: T, from_map: bool = False):
for attribute, to_sync_with in self.sync_on_append.items():
pass
to_sync_with.sync_with_other_collection(__object.__getattribute__(attribute))
self._map_element(__object, from_map=from_map)
self._data.append(__object)
def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False):
if __object is None:
return
if __object.id in self._contains_ids:
return
exists_in_collection = self._contained_in_sub(__object)
if len(exists_in_collection) and self is exists_in_collection[0]:
# assuming that the object already is contained in the correct collections
if not already_is_parent:
self._merge_in_self(__object, from_map=from_map)
return
if not len(exists_in_collection):
self._append(__object, from_map=from_map)
else:
pass
exists_in_collection[0]._merge_in_self(__object, from_map=from_map)
if not already_is_parent or not self._is_root:
for parent_collection in self._get_parents_of_multiple_contained_children(__object):
pass
parent_collection.append(__object, already_is_parent=True, from_map=from_map)
def extend(self, __iterable: Optional[Iterable[T]]):
if __iterable is None:
return
for __object in __iterable:
self.append(__object)
def sync_with_other_collection(self, equal_collection: "Collection"):
"""
If two collections always need to have the same values, this can be used.
Internally:
1. import the data from other to self
- _data
- contained_collections
2. replace all refs from the other object, with refs from this object
"""
if equal_collection is self:
return
# don't add the elements from the subelements from the other collection.
# this will be done in the next step.
self.extend(equal_collection._data)
# add all submodules
for equal_sub_collection in equal_collection.contained_collections:
self.contain_collection_inside(equal_sub_collection)
# now the ugly part
# replace all refs of the other element with this one
self._risky_merge(equal_collection)
def contain_collection_inside(self, sub_collection: "Collection"):
"""
This collection will ALWAYS contain everything from the passed in collection
"""
if sub_collection in self.contained_collections:
return
self.contained_collections.append(sub_collection)
sub_collection.upper_collections.append(self)
@property
def data(self) -> List[T]:
return [*self._data,
*(__object for collection in self.contained_collections for __object in collection.shallow_list)]
def __len__(self) -> int:
return len(self._data) + sum(len(collection) for collection in self.contained_collections)
def __iter__(self) -> Iterator[T]:
for element in self._data:
yield element

View File

@@ -0,0 +1,40 @@
from typing import TYPE_CHECKING, List, Iterable
if TYPE_CHECKING:
from .parents import DatabaseObject
class Options:
def __init__(self, option_list: List['DatabaseObject'] = None):
self._data: List['DatabaseObject'] = option_list or list()
def __str__(self):
return "\n".join(f"{i:02d}: {database_object.option_string}" for i, database_object in enumerate(self._data))
def __iter__(self):
for database_object in self._data:
yield database_object
def append(self, element: 'DatabaseObject'):
self._data.append(element)
def extend(self, iterable: Iterable['DatabaseObject']):
for element in iterable:
self.append(element)
def get_next_options(self, index: int) -> 'Options':
if index >= len(self._data):
raise ValueError("Index out of bounds")
return self._data[index].options
def __getitem__(self, item: int) -> 'DatabaseObject':
if type(item) != int:
raise TypeError("Key needs to be an Integer")
if item >= len(self._data):
raise ValueError("Index out of bounds")
return self._data[item]
def __len__(self) -> int:
return len(self._data)

View File

@@ -0,0 +1,220 @@
from __future__ import annotations
import random
from collections import defaultdict
from functools import lru_cache
from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
from .metadata import Metadata
from ..utils.config import logging_settings
from ..utils.shared import HIGHEST_ID
from ..utils.support_classes.hacking import MetaClass
LOGGER = logging_settings["object_logger"]
P = TypeVar("P", bound="OuterProxy")
class InnerData:
"""
This is the core class, which is used for every Data class.
The attributes are set, and can be merged.
The concept is, that the outer class proxies this class.
If the data in the wrapper class has to be merged, then this class is just replaced and garbage collected.
"""
def __init__(self, **kwargs):
for key, value in kwargs.items():
self.__setattr__(key, value)
def __merge__(self, __other: InnerData, override: bool = False):
"""
TODO
is default is totally ignored
:param __other:
:param override:
:return:
"""
for key, value in __other.__dict__.copy().items():
# just set the other value if self doesn't already have it
if key not in self.__dict__:
self.__setattr__(key, value)
continue
# if the object of value implemented __merge__, it merges
existing = self.__getattribute__(key)
if hasattr(type(existing), "__merge__"):
existing.__merge__(value, override)
continue
# override the existing value if requested
if override:
self.__setattr__(key, value)
class OuterProxy:
"""
Wraps the inner data, and provides apis, to naturally access those values.
"""
_default_factories: dict = {}
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
def __init__(self, _id: int = None, dynamic: bool = False, **kwargs):
_automatic_id: bool = False
if _id is None and not dynamic:
"""
generates a random integer id
the range is defined in the config
"""
_id = random.randint(0, HIGHEST_ID)
_automatic_id = True
kwargs["automatic_id"] = _automatic_id
kwargs["id"] = _id
kwargs["dynamic"] = dynamic
for name, factory in type(self)._default_factories.items():
if kwargs.get(name, None) is None:
kwargs[name] = factory()
collection_data: Dict[str, list] = {}
for name, value in kwargs.copy().items():
if isinstance(value, list) and name.endswith("_list"):
collection_name = name.replace("_list", "_collection")
collection_data[collection_name] = value
del kwargs[name]
self._inner: InnerData = InnerData(**kwargs)
self.__init_collections__()
for name, data_list in collection_data.items():
collection = self._inner.__getattribute__(name)
collection.extend(data_list)
self._inner.__setattr__(name, collection)
def __init_collections__(self):
pass
def __getattribute__(self, __name: str) -> Any:
"""
Returns the attribute of _inner if the attribute exists,
else it returns the attribute of self.
That the _inner gets checked first is essential for the type hints.
:param __name:
:return:
"""
if __name.startswith("_") or __name in self._outer_attribute or __name.isupper():
return object.__getattribute__(self, __name)
_inner: InnerData = super().__getattribute__("_inner")
try:
return _inner.__getattribute__(__name)
except AttributeError:
return super().__getattribute__(__name)
def __setattr__(self, __name, __value):
if not __name.startswith("_") and hasattr(self, "_inner"):
_inner: InnerData = super().__getattribute__("_inner")
return _inner.__setattr__(__name, __value)
return super().__setattr__(__name, __value)
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
pass
def add_list_of_other_objects(self, object_list: List[OuterProxy]):
d: Dict[Type[OuterProxy], List[OuterProxy]] = defaultdict(list)
for db_object in object_list:
d[type(db_object)].append(db_object)
for key, value in d.items():
self._add_other_db_objects(key, value)
def __hash__(self):
"""
:raise: IsDynamicException
:return:
"""
if self.dynamic:
return id(self._inner)
return self.id
def __eq__(self, other: Any):
return self.__hash__() == other.__hash__()
def merge(self, __other: Optional[OuterProxy], override: bool = False):
"""
1. merges the data of __other in self
2. replaces the data of __other with the data of self
:param __other:
:param override:
:return:
"""
if __other is None:
_ = "debug"
return
self._inner.__merge__(__other._inner, override=override)
__other._inner = self._inner
@property
def metadata(self) -> Metadata:
"""
This is an interface.
:return:
"""
return Metadata()
@property
def options(self) -> List[P]:
return [self]
@property
def indexing_values(self) -> List[Tuple[str, object]]:
"""
This is an interface.
It is supposed to return a map of the name and values for all important attributes.
This helps in comparing classes for equal data (e.g. being the same song but different attributes)
TODO
Rewrite this approach into a approach, that is centered around statistics, and not binaries.
Instead of: one of this matches, it is the same
This: If enough attributes are similar enough, they are the same
Returns:
List[Tuple[str, object]]: the first element in the tuple is the name of the attribute, the second the value.
"""
return []
@property
@lru_cache()
def all_collections(self):
r = []
for key in self._default_factories:
val = self._inner.__getattribute__(key)
if hasattr(val, "__is_collection__"):
r.append(val)
return r
def __repr__(self):
return f"{type(self).__name__}({', '.join(key + ': ' + str(val) for key, val in self.indexing_values)})"

View File

@@ -0,0 +1,688 @@
from __future__ import annotations
import random
from collections import defaultdict
from typing import List, Optional, Dict, Tuple, Type, Union
import pycountry
from ..utils.enums.album import AlbumType, AlbumStatus
from .collection import Collection
from .formatted_text import FormattedText
from .lyrics import Lyrics
from .contact import Contact
from .metadata import (
Mapping as id3Mapping,
ID3Timestamp,
Metadata
)
from .option import Options
from .parents import OuterProxy, P
from .source import Source, SourceCollection
from .target import Target
from .country import Language, Country
from ..utils.string_processing import unify
from .parents import OuterProxy as Base
from ..utils.config import main_settings
"""
All Objects dependent
"""
CountryTyping = type(list(pycountry.countries)[0])
OPTION_STRING_DELIMITER = " | "
class Song(Base):
title: str
unified_title: str
isrc: str
length: int
genre: str
note: FormattedText
tracksort: int
source_collection: SourceCollection
target_collection: Collection[Target]
lyrics_collection: Collection[Lyrics]
main_artist_collection: Collection[Artist]
feature_artist_collection: Collection[Artist]
album_collection: Collection[Album]
_default_factories = {
"note": FormattedText,
"length": lambda: 0,
"source_collection": SourceCollection,
"target_collection": Collection,
"lyrics_collection": Collection,
"main_artist_collection": Collection,
"album_collection": Collection,
"feature_artist_collection": Collection,
"title": lambda: "",
"unified_title": lambda: None,
"isrc": lambda: None,
"genre": lambda: None,
"tracksort": lambda: 0,
}
def __init__(self, title: str = "", unified_title: str = None, isrc: str = None, length: int = None,
genre: str = None, note: FormattedText = None, source_list: List[Source] = None,
target_list: List[Target] = None, lyrics_list: List[Lyrics] = None,
main_artist_list: List[Artist] = None, feature_artist_list: List[Artist] = None,
album_list: List[Album] = None, tracksort: int = 0, **kwargs) -> None:
Base.__init__(**locals())
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection")
def __init_collections__(self) -> None:
self.album_collection.contain_given_in_attribute = {
"artist_collection": self.main_artist_collection,
}
self.album_collection.append_object_to_attribute = {
"song_collection": self,
}
self.main_artist_collection.contain_given_in_attribute = {
"main_album_collection": self.album_collection
}
self.feature_artist_collection.append_object_to_attribute = {
"feature_song_collection": self
}
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
if object_type is Song:
return
if isinstance(object_list, Lyrics):
self.lyrics_collection.extend(object_list)
return
if isinstance(object_list, Artist):
self.main_artist_collection.extend(object_list)
return
if isinstance(object_list, Album):
self.album_collection.extend(object_list)
return
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
('id', self.id),
('title', self.unified_title),
('isrc', self.isrc),
*[('url', source.url) for source in self.source_collection]
]
@property
def metadata(self) -> Metadata:
metadata = Metadata({
id3Mapping.TITLE: [self.title],
id3Mapping.ISRC: [self.isrc],
id3Mapping.LENGTH: [self.length],
id3Mapping.GENRE: [self.genre],
id3Mapping.TRACKNUMBER: [self.tracksort_str]
})
# metadata.merge_many([s.get_song_metadata() for s in self.source_collection]) album sources have no relevant metadata for id3
metadata.merge_many([a.metadata for a in self.album_collection])
metadata.merge_many([a.metadata for a in self.main_artist_collection])
metadata.merge_many([a.metadata for a in self.feature_artist_collection])
metadata.merge_many([lyrics.metadata for lyrics in self.lyrics_collection])
return metadata
def get_artist_credits(self) -> str:
main_artists = ", ".join([artist.name for artist in self.main_artist_collection])
feature_artists = ", ".join([artist.name for artist in self.feature_artist_collection])
if len(feature_artists) == 0:
return main_artists
return f"{main_artists} feat. {feature_artists}"
"""
def __str__(self) -> str:
artist_credit_str = ""
artist_credits = self.get_artist_credits()
if artist_credits != "":
artist_credit_str = f" by {artist_credits}"
return f"\"{self.title}\"{artist_credit_str}"
"""
def __repr__(self) -> str:
return f"Song(\"{self.title}\")"
@property
def option_string(self) -> str:
return f"{self.__repr__()} " \
f"from Album({OPTION_STRING_DELIMITER.join(album.title for album in self.album_collection)}) " \
f"by Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.main_artist_collection)}) " \
f"feat. Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.feature_artist_collection)})"
@property
def options(self) -> List[P]:
options = self.main_artist_collection.shallow_list
options.extend(self.feature_artist_collection)
options.extend(self.album_collection)
options.append(self)
return options
@property
def tracksort_str(self) -> str:
"""
if the album tracklist is empty, it sets it length to 1, this song has to be on the Album
:returns id3_tracksort: {song_position}/{album.length_of_tracklist}
"""
if len(self.album_collection) == 0:
return f"{self.tracksort}"
return f"{self.tracksort}/{len(self.album_collection[0].song_collection) or 1}"
"""
All objects dependent on Album
"""
class Album(Base):
title: str
unified_title: str
album_status: AlbumStatus
album_type: AlbumType
language: Language
date: ID3Timestamp
barcode: str
albumsort: int
notes: FormattedText
source_collection: SourceCollection
artist_collection: Collection[Artist]
song_collection: Collection[Song]
label_collection: Collection[Label]
_default_factories = {
"title": lambda: None,
"unified_title": lambda: None,
"album_status": lambda: None,
"barcode": lambda: None,
"albumsort": lambda: None,
"album_type": lambda: AlbumType.OTHER,
"language": lambda: Language.by_alpha_2("en"),
"date": ID3Timestamp,
"notes": FormattedText,
"source_collection": SourceCollection,
"artist_collection": Collection,
"song_collection": Collection,
"label_collection": Collection,
}
# This is automatically generated
def __init__(self, title: str = None, unified_title: str = None, album_status: AlbumStatus = None,
album_type: AlbumType = None, language: Language = None, date: ID3Timestamp = None,
barcode: str = None, albumsort: int = None, notes: FormattedText = None,
source_list: List[Source] = None, artist_list: List[Artist] = None, song_list: List[Song] = None,
label_list: List[Label] = None, **kwargs) -> None:
super().__init__(title=title, unified_title=unified_title, album_status=album_status, album_type=album_type,
language=language, date=date, barcode=barcode, albumsort=albumsort, notes=notes,
source_list=source_list, artist_list=artist_list, song_list=song_list, label_list=label_list,
**kwargs)
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",)
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection")
def __init_collections__(self):
self.song_collection.contain_attribute_in_given = {
"main_artist_collection": self.artist_collection
}
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
if object_type is Song:
self.song_collection.extend(object_list)
return
if object_type is Artist:
self.artist_collection.extend(object_list)
return
if object_type is Album:
return
if object_type is Label:
self.label_collection.extend(object_list)
return
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
('id', self.id),
('title', self.unified_title),
('barcode', self.barcode),
*[('url', source.url) for source in self.source_collection]
]
@property
def metadata(self) -> Metadata:
"""
TODO
- barcode
:return:
"""
return Metadata({
id3Mapping.ALBUM: [self.title],
id3Mapping.COPYRIGHT: [self.copyright],
id3Mapping.LANGUAGE: [self.iso_639_2_lang],
id3Mapping.ALBUM_ARTIST: [a.name for a in self.artist_collection],
id3Mapping.DATE: [self.date.strftime("%d%m")] if self.date.has_year and self.date.has_month else [],
id3Mapping.TIME: [self.date.strftime(("%H%M"))] if self.date.has_hour and self.date.has_minute else [],
id3Mapping.YEAR: [str(self.date.year).zfill(4)] if self.date.has_year else [],
id3Mapping.RELEASE_DATE: [self.date.timestamp],
id3Mapping.ORIGINAL_RELEASE_DATE: [self.date.timestamp],
id3Mapping.ALBUMSORTORDER: [str(self.albumsort)] if self.albumsort is not None else []
})
def __repr__(self):
return f"Album(\"{self.title}\")"
@property
def option_string(self) -> str:
return f"{self.__repr__()} " \
f"by Artist({OPTION_STRING_DELIMITER.join([artist.name for artist in self.artist_collection])}) " \
f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})"
@property
def options(self) -> List[P]:
options = [*self.artist_collection, self, *self.song_collection]
return options
def update_tracksort(self):
"""
This updates the tracksort attributes, of the songs in
`self.song_collection`, and sorts the songs, if possible.
It is advised to only call this function, once all the tracks are
added to the songs.
:return:
"""
if self.song_collection.empty:
return
tracksort_map: Dict[int, Song] = {
song.tracksort: song for song in self.song_collection if song.tracksort is not None
}
# place the songs, with set tracksort attribute according to it
for tracksort, song in tracksort_map.items():
index = tracksort - 1
"""
I ONLY modify the `Collection._data` attribute directly,
to bypass the mapping of the attributes, because I will add the item in the next step
"""
"""
but for some reason, neither
`self.song_collection._data.index(song)`
`self.song_collection._data.remove(song)`
get the right object.
I have NO FUCKING CLUE why xD
But I just implemented it myself.
"""
for old_index, temp_song in enumerate(self.song_collection._data):
if song is temp_song:
break
# the list can't be empty
del self.song_collection._data[old_index]
self.song_collection._data.insert(index, song)
# fill in the empty tracksort attributes
for i, song in enumerate(self.song_collection):
if song.tracksort is not None:
continue
song.tracksort = i + 1
def compile(self, merge_into: bool = False):
"""
compiles the recursive structures,
and does depending on the object some other stuff.
no need to override if only the recursive structure should be built.
override self.build_recursive_structures() instead
"""
self.update_tracksort()
self._build_recursive_structures(build_version=random.randint(0, 99999), merge=merge_into)
@property
def copyright(self) -> str:
if self.date is None:
return ""
if self.date.has_year or len(self.label_collection) == 0:
return ""
return f"{self.date.year} {self.label_collection[0].name}"
@property
def iso_639_2_lang(self) -> Optional[str]:
if self.language is None:
return None
return self.language.alpha_3
@property
def is_split(self) -> bool:
"""
A split Album is an Album from more than one Artists
usually half the songs are made by one Artist, the other half by the other one.
In this case split means either that or one artist featured by all songs.
:return:
"""
return len(self.artist_collection) > 1
@property
def album_type_string(self) -> str:
return self.album_type.value
"""
All objects dependent on Artist
"""
class Artist(Base):
name: str
unified_name: str
country: Country
formed_in: ID3Timestamp
notes: FormattedText
lyrical_themes: List[str]
general_genre: str
unformated_location: str
source_collection: SourceCollection
contact_collection: Collection[Contact]
feature_song_collection: Collection[Song]
main_album_collection: Collection[Album]
label_collection: Collection[Label]
_default_factories = {
"name": str,
"unified_name": lambda: None,
"country": lambda: None,
"unformated_location": lambda: None,
"formed_in": ID3Timestamp,
"notes": FormattedText,
"lyrical_themes": list,
"general_genre": lambda: "",
"source_collection": SourceCollection,
"feature_song_collection": Collection,
"main_album_collection": Collection,
"contact_collection": Collection,
"label_collection": Collection,
}
# This is automatically generated
def __init__(self, name: str = "", unified_name: str = None, country: Country = None,
formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None,
general_genre: str = None, unformated_location: str = None, source_list: List[Source] = None,
contact_list: List[Contact] = None, feature_song_list: List[Song] = None,
main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None:
super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes,
lyrical_themes=lyrical_themes, general_genre=general_genre,
unformated_location=unformated_location, source_list=source_list, contact_list=contact_list,
feature_song_list=feature_song_list, main_album_list=main_album_list, label_list=label_list,
**kwargs)
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection")
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",)
def __init_collections__(self):
self.feature_song_collection.append_object_to_attribute = {
"feature_artist_collection": self
}
self.main_album_collection.append_object_to_attribute = {
"artist_collection": self
}
self.label_collection.append_object_to_attribute = {
"current_artist_collection": self
}
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
if object_type is Song:
# this doesn't really make sense
# self.feature_song_collection.extend(object_list)
return
if object_type is Artist:
return
if object_type is Album:
self.main_album_collection.extend(object_list)
return
if object_type is Label:
self.label_collection.extend(object_list)
return
@property
def options(self) -> List[P]:
options = [self, *self.main_album_collection.shallow_list, *self.feature_album]
print(options)
return options
def update_albumsort(self):
"""
This updates the albumsort attributes, of the albums in
`self.main_album_collection`, and sorts the albums, if possible.
It is advised to only call this function, once all the albums are
added to the artist.
:return:
"""
if len(self.main_album_collection) <= 0:
return
type_section: Dict[AlbumType, int] = defaultdict(lambda: 2, {
AlbumType.OTHER: 0, # if I don't know it, I add it to the first section
AlbumType.STUDIO_ALBUM: 0,
AlbumType.EP: 0,
AlbumType.SINGLE: 1
}) if main_settings["sort_album_by_type"] else defaultdict(lambda: 0)
sections = defaultdict(list)
# order albums in the previously defined section
album: Album
for album in self.main_album_collection:
sections[type_section[album.album_type]].append(album)
def sort_section(_section: List[Album], last_albumsort: int) -> int:
# album is just a value used in loops
nonlocal album
if main_settings["sort_by_date"]:
_section.sort(key=lambda _album: _album.date, reverse=True)
new_last_albumsort = last_albumsort
for album_index, album in enumerate(_section):
if album.albumsort is None:
album.albumsort = new_last_albumsort = album_index + 1 + last_albumsort
_section.sort(key=lambda _album: _album.albumsort)
return new_last_albumsort
# sort the sections individually
_last_albumsort = 1
for section_index in sorted(sections):
_last_albumsort = sort_section(sections[section_index], _last_albumsort)
# merge all sections again
album_list = []
for section_index in sorted(sections):
album_list.extend(sections[section_index])
# replace the old collection with the new one
self.main_album_collection: Collection = Collection(data=album_list, element_type=Album)
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
('id', self.id),
('name', self.unified_name),
*[('url', source.url) for source in self.source_collection],
*[('contact', contact.value) for contact in self.contact_collection]
]
@property
def metadata(self) -> Metadata:
metadata = Metadata({
id3Mapping.ARTIST: [self.name]
})
metadata.merge_many([s.get_artist_metadata() for s in self.source_collection])
return metadata
"""
def __str__(self, include_notes: bool = False):
string = self.name or ""
if include_notes:
plaintext_notes = self.notes.get_plaintext()
if plaintext_notes is not None:
string += "\n" + plaintext_notes
return string
"""
def __repr__(self):
return f"Artist(\"{self.name}\")"
@property
def option_string(self) -> str:
return f"{self.__repr__()} " \
f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})"
@property
def options(self) -> List[P]:
options = [self]
options.extend(self.main_album_collection)
options.extend(self.feature_song_collection)
return options
@property
def feature_album(self) -> Album:
return Album(
title="features",
album_status=AlbumStatus.UNRELEASED,
album_type=AlbumType.COMPILATION_ALBUM,
is_split=True,
albumsort=666,
dynamic=True,
song_list=self.feature_song_collection.shallow_list
)
def get_all_songs(self) -> List[Song]:
"""
returns a list of all Songs.
probably not that useful, because it is unsorted
"""
collection = self.feature_song_collection.copy()
for album in self.discography:
collection.extend(album.song_collection)
return collection
@property
def discography(self) -> List[Album]:
flat_copy_discography = self.main_album_collection.copy()
flat_copy_discography.append(self.feature_album)
return flat_copy_discography
"""
Label
"""
class Label(Base):
COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection")
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = COLLECTION_STRING_ATTRIBUTES
name: str
unified_name: str
notes: FormattedText
source_collection: SourceCollection
contact_collection: Collection[Contact]
album_collection: Collection[Album]
current_artist_collection: Collection[Artist]
_default_factories = {
"notes": FormattedText,
"album_collection": Collection,
"current_artist_collection": Collection,
"source_collection": SourceCollection,
"contact_collection": Collection,
"name": lambda: None,
"unified_name": lambda: None,
}
def __init__(self, name: str = None, unified_name: str = None, notes: FormattedText = None,
source_list: List[Source] = None, contact_list: List[Contact] = None,
album_list: List[Album] = None, current_artist_list: List[Artist] = None, **kwargs) -> None:
super().__init__(name=name, unified_name=unified_name, notes=notes, source_list=source_list,
contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list,
**kwargs)
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
('id', self.id),
('name', self.unified_name),
*[('url', source.url) for source in self.source_collection]
]
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
if object_type is Song:
return
if object_type is Artist:
self.current_artist_collection.extend(object_list)
return
if object_type is Album:
self.album_collection.extend(object_list)
return
@property
def options(self) -> List[P]:
options = [self]
options.extend(self.current_artist_collection.shallow_list)
options.extend(self.album_collection.shallow_list)
return options

View File

@@ -0,0 +1,132 @@
from collections import defaultdict
from enum import Enum
from typing import List, Dict, Set, Tuple, Optional, Iterable
from urllib.parse import urlparse
from ..utils.enums.source import SourcePages, SourceTypes
from ..utils.config import youtube_settings
from .metadata import Mapping, Metadata
from .parents import OuterProxy
from .collection import Collection
class Source(OuterProxy):
url: str
page_enum: SourcePages
referer_page: SourcePages
audio_url: str
_default_factories = {
"audio_url": lambda: None,
}
# This is automatically generated
def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None,
**kwargs) -> None:
if referer_page is None:
referer_page = page_enum
super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs)
@classmethod
def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:
"""
this shouldn't be used, unlesse you are not certain what the source is for
the reason is that it is more inefficient
"""
parsed = urlparse(url)
url = parsed.geturl()
if "musify" in parsed.netloc:
return cls(SourcePages.MUSIFY, url, referer_page=referer_page)
if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)
if url.startswith("https://www.deezer"):
return cls(SourcePages.DEEZER, url, referer_page=referer_page)
if url.startswith("https://open.spotify.com"):
return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)
if "bandcamp" in url:
return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)
if "wikipedia" in parsed.netloc:
return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)
if url.startswith("https://www.metal-archives.com/"):
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)
# the less important once
if url.startswith("https://www.facebook"):
return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)
if url.startswith("https://www.instagram"):
return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)
if url.startswith("https://twitter"):
return cls(SourcePages.TWITTER, url, referer_page=referer_page)
if url.startswith("https://myspace.com"):
return cls(SourcePages.MYSPACE, url, referer_page=referer_page)
def get_song_metadata(self) -> Metadata:
return Metadata({
Mapping.FILE_WEBPAGE_URL: [self.url],
Mapping.SOURCE_WEBPAGE_URL: [self.homepage]
})
def get_artist_metadata(self) -> Metadata:
return Metadata({
Mapping.ARTIST_WEBPAGE_URL: [self.url]
})
@property
def metadata(self) -> Metadata:
return self.get_song_metadata()
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
('id', self.id),
('url', self.url),
('audio_url', self.audio_url),
]
def __str__(self):
return self.__repr__()
def __repr__(self) -> str:
return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"
page_str = property(fget=lambda self: self.page_enum.value)
type_str = property(fget=lambda self: self.type_enum.value)
homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))
class SourceCollection(Collection):
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)
super().__init__(data=data, **kwargs)
def _map_element(self, __object: Source, **kwargs):
super()._map_element(__object, **kwargs)
self._page_to_source_list[__object.page_enum].append(__object)
@property
def source_pages(self) -> Set[SourcePages]:
return set(source.page_enum for source in self._data)
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
"""
getting the sources for a specific page like
YouTube or musify
"""
return self._page_to_source_list[source_page].copy()

View File

@@ -0,0 +1,108 @@
from __future__ import annotations
from pathlib import Path
from typing import List, Tuple, TextIO, Union
import logging
import requests
from tqdm import tqdm
from .parents import OuterProxy
from ..utils.config import main_settings, logging_settings
from ..utils.string_processing import fit_to_file_system
LOGGER = logging.getLogger("target")
class Target(OuterProxy):
"""
create somehow like that
```python
# I know path is pointless, and I will change that (don't worry about backwards compatibility there)
Target(file="song.mp3", path="~/Music/genre/artist/album")
```
"""
file_path: Path
_default_factories = {
}
# This is automatically generated
def __init__(self, file_path: Union[Path, str], relative_to_music_dir: bool = False, **kwargs) -> None:
if not isinstance(file_path, Path):
file_path = Path(file_path)
if relative_to_music_dir:
file_path = Path(main_settings["music_directory"], file_path)
super().__init__(file_path=fit_to_file_system(file_path), **kwargs)
self.is_relative_to_music_dir: bool = relative_to_music_dir
def __repr__(self) -> str:
return str(self.file_path)
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [('filepath', self.file_path)]
@property
def exists(self) -> bool:
return self.file_path.is_file()
@property
def size(self) -> int:
"""
returns the size the downloaded audio takes up in bytes
returns 0 if the file doesn't exist
"""
if not self.exists:
return 0
return self.file_path.stat().st_size
def create_path(self):
self.file_path.parent.mkdir(parents=True, exist_ok=True)
def copy_content(self, copy_to: Target):
if not self.exists:
LOGGER.warning(f"No file exists at: {self.file_path}")
return
with open(self.file_path, "rb") as read_from:
copy_to.create_path()
with open(copy_to.file_path, "wb") as write_to:
write_to.write(read_from.read())
def stream_into(self, r: requests.Response, desc: str = None) -> bool:
if r is None:
return False
self.create_path()
total_size = int(r.headers.get('content-length'))
with open(self.file_path, 'wb') as f:
try:
"""
https://en.wikipedia.org/wiki/Kilobyte
> The internationally recommended unit symbol for the kilobyte is kB.
"""
with tqdm(total=total_size, unit='B', unit_scale=True, unit_divisor=1024, desc=desc) as t:
for chunk in r.iter_content(chunk_size=main_settings["chunk_size"]):
size = f.write(chunk)
t.update(size)
return True
except requests.exceptions.Timeout:
logging_settings["download_logger"].error("Stream timed out.")
return False
def open(self, file_mode: str, **kwargs) -> TextIO:
return self.file_path.open(file_mode, **kwargs)
def delete(self):
self.file_path.unlink(missing_ok=True)

View File

@@ -0,0 +1,7 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum
from .musify import Musify
from .youtube import YouTube
from .youtube_music import YoutubeMusic
from .bandcamp import Bandcamp
from .abstract import Page, INDEPENDENT_DB_OBJECTS

View File

@@ -0,0 +1,453 @@
import logging
import random
import re
from copy import copy
from pathlib import Path
from typing import Optional, Union, Type, Dict, Set, List, Tuple
from string import Formatter
import requests
from bs4 import BeautifulSoup
from ..connection import Connection
from ..objects import (
Song,
Source,
Album,
Artist,
Target,
DatabaseObject,
Options,
Collection,
Label,
)
from ..utils.enums.source import SourcePages
from ..utils.enums.album import AlbumType
from ..audio import write_metadata_to_target, correct_codec
from ..utils.config import main_settings
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import fit_to_file_system
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
class NamingDict(dict):
CUSTOM_KEYS: Dict[str, str] = {
"label": "label.name",
"artist": "artist.name",
"song": "song.title",
"isrc": "song.isrc",
"album": "album.title",
"album_type": "album.album_type_string"
}
def __init__(self, values: dict, object_mappings: Dict[str, DatabaseObject] = None):
self.object_mappings: Dict[str, DatabaseObject] = object_mappings or dict()
super().__init__(values)
self["audio_format"] = main_settings["audio_format"]
def add_object(self, music_object: DatabaseObject):
self.object_mappings[type(music_object).__name__.lower()] = music_object
def copy(self) -> dict:
return type(self)(super().copy(), self.object_mappings.copy())
def __getitem__(self, key: str) -> str:
return fit_to_file_system(super().__getitem__(key))
def default_value_for_name(self, name: str) -> str:
return f'Various {name.replace("_", " ").title()}'
def __missing__(self, key: str) -> str:
if "." not in key:
if key not in self.CUSTOM_KEYS:
return self.default_value_for_name(key)
key = self.CUSTOM_KEYS[key]
frag_list = key.split(".")
object_name = frag_list[0].strip().lower()
attribute_name = frag_list[-1].strip().lower()
if object_name not in self.object_mappings:
return self.default_value_for_name(attribute_name)
music_object = self.object_mappings[object_name]
try:
value = getattr(music_object, attribute_name)
if value is None:
return self.default_value_for_name(attribute_name)
return str(value)
except AttributeError:
return self.default_value_for_name(attribute_name)
def _clean_music_object(music_object: INDEPENDENT_DB_OBJECTS, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
if type(music_object) == Label:
return _clean_label(label=music_object, collections=collections)
if type(music_object) == Artist:
return _clean_artist(artist=music_object, collections=collections)
if type(music_object) == Album:
return _clean_album(album=music_object, collections=collections)
if type(music_object) == Song:
return _clean_song(song=music_object, collections=collections)
def _clean_collection(collection: Collection, collection_dict: Dict[INDEPENDENT_DB_TYPES, Collection]):
if collection.element_type not in collection_dict:
return
for i, element in enumerate(collection):
r = collection_dict[collection.element_type].append(element, merge_into_existing=True)
collection[i] = r.current_element
if not r.was_the_same:
_clean_music_object(r.current_element, collection_dict)
def _clean_label(label: Label, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(label.current_artist_collection, collections)
_clean_collection(label.album_collection, collections)
def _clean_artist(artist: Artist, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(artist.main_album_collection, collections)
_clean_collection(artist.feature_song_collection, collections)
_clean_collection(artist.label_collection, collections)
def _clean_album(album: Album, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(album.label_collection, collections)
_clean_collection(album.song_collection, collections)
_clean_collection(album.artist_collection, collections)
def _clean_song(song: Song, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(song.album_collection, collections)
_clean_collection(song.feature_artist_collection, collections)
_clean_collection(song.main_artist_collection, collections)
class Page:
"""
This is an abstract class, laying out the
functionality for every other class fetching something
"""
SOURCE_TYPE: SourcePages
LOGGER = logging.getLogger("this shouldn't be used")
# set this to true, if all song details can also be fetched by fetching album details
NO_ADDITIONAL_DATA_FROM_SONG = False
def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
In case of failure return a default value or raise a WARNING or a
RegexNotFoundError, depending on fatal, specifying the field name.
"""
if isinstance(pattern, str):
mobj = re.search(pattern, string, flags)
else:
for p in pattern:
mobj = re.search(p, string, flags)
if mobj:
break
if mobj:
if group is None:
# return the first matching group
return next(g for g in mobj.groups() if g is not None)
elif isinstance(group, (list, tuple)):
return tuple(mobj.group(g) for g in group)
else:
return mobj.group(group)
return default
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
return None
def get_soup_from_response(self, r: requests.Response) -> BeautifulSoup:
return BeautifulSoup(r.content, "html.parser")
# to search stuff
def search(self, query: Query) -> List[DatabaseObject]:
music_object = query.music_object
search_functions = {
Song: self.song_search,
Album: self.album_search,
Artist: self.artist_search,
Label: self.label_search
}
if type(music_object) in search_functions:
r = search_functions[type(music_object)](music_object)
if r is not None and len(r) > 0:
return r
r = []
for default_query in query.default_search:
for single_option in self.general_search(default_query):
r.append(single_option)
return r
def general_search(self, search_query: str) -> List[DatabaseObject]:
return []
def label_search(self, label: Label) -> List[Label]:
return []
def artist_search(self, artist: Artist) -> List[Artist]:
return []
def album_search(self, album: Album) -> List[Album]:
return []
def song_search(self, song: Song) -> List[Song]:
return []
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1,
post_process: bool = True) -> DatabaseObject:
"""
when a music object with lacking data is passed in, it returns
the SAME object **(no copy)** with more detailed data.
If you for example put in, an album, it fetches the tracklist
:param music_object:
:param stop_at_level:
This says the depth of the level the scraper will recurse to.
If this is for example set to 2, then the levels could be:
1. Level: the album
2. Level: every song of the album + every artist of the album
If no additional requests are needed to get the data one level below the supposed stop level
this gets ignored
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
"""
# creating a new object, of the same type
new_music_object: Optional[DatabaseObject] = None
# only certain database objects, have a source list
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
source: Source
for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE):
tmp = self.fetch_object_from_source(
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
post_process=False
)
if new_music_object is None:
new_music_object = tmp
else:
new_music_object.merge(tmp)
if new_music_object is not None:
music_object.merge(new_music_object)
return music_object
def fetch_object_from_source(self, source: Source, stop_at_level: int = 2,
enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[
DatabaseObject]:
obj_type = self.get_source_type(source)
if obj_type is None:
return None
if enforce_type != obj_type and enforce_type is not None:
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
return None
music_object: DatabaseObject = None
fetch_map = {
Song: self.fetch_song,
Album: self.fetch_album,
Artist: self.fetch_artist,
Label: self.fetch_label
}
if obj_type in fetch_map:
music_object = fetch_map[obj_type](source, stop_at_level)
else:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None
if stop_at_level > 1:
collection: Collection
for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection = music_object.__getattribute__(collection_str)
for sub_element in collection:
sub_element.merge(
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1, post_process=False))
return music_object
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
return Album()
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
return Artist()
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False,
process_metadata_anyway: bool = False) -> DownloadResult:
naming_dict: NamingDict = NamingDict({"genre": genre})
def fill_naming_objects(naming_music_object: DatabaseObject):
nonlocal naming_dict
for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
collection: Collection = getattr(naming_music_object, collection_name)
if collection.empty:
continue
dom_ordered_music_object: DatabaseObject = collection[0]
naming_dict.add_object(dom_ordered_music_object)
return fill_naming_objects(dom_ordered_music_object)
fill_naming_objects(music_object)
return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway)
def _download(self, music_object: DatabaseObject, naming_dict: NamingDict, download_all: bool = False,
skip_details: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
skip_next_details = skip_details
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
if isinstance(music_object, Album):
if self.NO_ADDITIONAL_DATA_FROM_SONG:
skip_next_details = True
if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]:
return DownloadResult()
if not isinstance(music_object, Song) or not self.NO_ADDITIONAL_DATA_FROM_SONG:
self.fetch_details(music_object=music_object, stop_at_level=2)
naming_dict.add_object(music_object)
if isinstance(music_object, Song):
return self._download_song(music_object, naming_dict, process_metadata_anyway=process_metadata_anyway)
download_result: DownloadResult = DownloadResult()
for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection: Collection = getattr(music_object, collection_name)
sub_ordered_music_object: DatabaseObject
for sub_ordered_music_object in collection:
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy(), download_all,
skip_details=skip_next_details,
process_metadata_anyway=process_metadata_anyway))
return download_result
def _download_song(self, song: Song, naming_dict: NamingDict, process_metadata_anyway: bool = False):
if "genre" not in naming_dict and song.genre is not None:
naming_dict["genre"] = song.genre
if song.genre is None:
song.genre = naming_dict["genre"]
path_parts = Formatter().parse(main_settings["download_path"])
file_parts = Formatter().parse(main_settings["download_file"])
new_target = Target(
relative_to_music_dir=True,
file_path=Path(
main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}),
main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts})
)
)
if song.target_collection.empty:
song.target_collection.append(new_target)
sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE)
if len(sources) == 0:
return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.")
temp_target: Target = Target(
relative_to_music_dir=False,
file_path=Path(
main_settings["temp_directory"],
str(song.id)
)
)
r = DownloadResult(1)
found_on_disc = False
target: Target
for target in song.target_collection:
if target.exists:
if process_metadata_anyway:
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
if found_on_disc and not process_metadata_anyway:
self.LOGGER.info(f"{song.option_string} already exists, thus not downloading again.")
return r
source = sources[0]
if not found_on_disc:
r = self.download_song_to_target(source=source, target=temp_target, desc=song.title)
if not r.is_fatal_error:
r.merge(self._post_process_targets(song, temp_target,
[] if found_on_disc else self.get_skip_intervals(song, source)))
return r
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List) -> DownloadResult:
correct_codec(temp_target, interval_list=interval_list)
self.post_process_hook(song, temp_target)
write_metadata_to_target(song.metadata, temp_target)
r = DownloadResult()
target: Target
for target in song.target_collection:
if temp_target is not target:
temp_target.copy_content(target)
r.add_target(target)
temp_target.delete()
r.sponsor_segments += len(interval_list)
return r
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
return []
def post_process_hook(self, song: Song, temp_target: Target, **kwargs):
pass
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
return DownloadResult()

View File

@@ -0,0 +1,361 @@
from typing import List, Optional, Type
from urllib.parse import urlparse, urlunparse
import json
from enum import Enum
from bs4 import BeautifulSoup
import pycountry
from ..objects import Source, DatabaseObject
from .abstract import Page
from ..objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
Target,
Contact,
ID3Timestamp,
Lyrics,
FormattedText
)
from ..connection import Connection
from ..utils.support_classes.download_result import DownloadResult
from ..utils.config import main_settings, logging_settings
from ..utils.shared import DEBUG
if DEBUG:
from ..utils.debug_utils import dump_to_file
def _parse_artist_url(url: str) -> str:
parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, "/music/", "", "", ""))
def _get_host(source: Source) -> str:
parsed = urlparse(source.url)
return urlunparse((parsed.scheme, parsed.netloc, "", "", "", ""))
class BandcampTypes(Enum):
ARTIST = "b"
ALBUM = "a"
SONG = "t"
class Bandcamp(Page):
# CHANGE
SOURCE_TYPE = SourcePages.BANDCAMP
LOGGER = logging_settings["bandcamp_logger"]
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://bandcamp.com/",
logger=self.LOGGER,
module="bandcamp",
)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
parsed_url = urlparse(source.url)
path = parsed_url.path.replace("/", "")
if path == "" or path.startswith("music"):
return Artist
if path.startswith("album"):
return Album
if path.startswith("track"):
return Song
return super().get_source_type(source)
def _parse_autocomplete_api_result(self, data: dict) -> DatabaseObject:
try:
object_type = BandcampTypes(data["type"])
except ValueError:
return
url = data["item_url_root"]
if "item_url_path" in data:
url = data["item_url_path"]
source_list = [Source(self.SOURCE_TYPE, url)]
name = data["name"]
if data.get("is_label", False):
return Label(
name=name,
source_list=source_list
)
if object_type is BandcampTypes.ARTIST:
source_list = [Source(self.SOURCE_TYPE, _parse_artist_url(url))]
return Artist(
name=name,
source_list=source_list
)
if object_type is BandcampTypes.ALBUM:
return Album(
title=name,
source_list=source_list,
artist_list=[
Artist(
name=data["band_name"].strip(),
source_list=[
Source(self.SOURCE_TYPE, data["item_url_root"])
]
)
]
)
if object_type is BandcampTypes.SONG:
return Song(
title=name.strip(),
source_list=source_list,
main_artist_list=[
Artist(
name=data["band_name"],
source_list=[
Source(self.SOURCE_TYPE, data["item_url_root"])
]
)
]
)
def general_search(self, search_query: str, filter_string: str = "") -> List[DatabaseObject]:
results = []
r = self.connection.post("https://bandcamp.com/api/bcsearch_public_api/1/autocomplete_elastic", json={
"fan_id": None,
"full_page": True,
"search_filter": filter_string,
"search_text": search_query,
})
if r is None:
return results
if DEBUG:
dump_to_file("bandcamp_search_response.json", r.text, is_json=True, exit_after_dump=False)
data = r.json()
for element in data.get("auto", {}).get("results", []):
r = self._parse_autocomplete_api_result(element)
if r is not None:
results.append(r)
return results
def label_search(self, label: Label) -> List[Label]:
return self.general_search(label.name, filter_string="b")
def artist_search(self, artist: Artist) -> List[Artist]:
return self.general_search(artist.name, filter_string="b")
def album_search(self, album: Album) -> List[Album]:
return self.general_search(album.title, filter_string="a")
def song_search(self, song: Song) -> List[Song]:
return self.general_search(song.title, filter_string="t")
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()
def _parse_artist_details(self, soup: BeautifulSoup) -> Artist:
name: str = None
source_list: List[Source] = []
contact_list: List[Contact] = []
band_name_location: BeautifulSoup = soup.find("p", {"id": "band-name-location"})
if band_name_location is not None:
title_span = band_name_location.find("span", {"class": "title"})
if title_span is not None:
name = title_span.text.strip()
link_container: BeautifulSoup = soup.find("ol", {"id": "band-links"})
if link_container is not None:
li: BeautifulSoup
for li in link_container.find_all("a"):
if li is None and li['href'] is not None:
continue
source_list.append(Source.match_url(_parse_artist_url(li['href']), referer_page=self.SOURCE_TYPE))
return Artist(
name=name,
source_list=source_list
)
def _parse_album(self, soup: BeautifulSoup, initial_source: Source) -> List[Album]:
title = None
source_list: List[Source] = []
a = soup.find("a")
if a is not None and a["href"] is not None:
source_list.append(Source(self.SOURCE_TYPE, _get_host(initial_source) + a["href"]))
title_p = soup.find("p", {"class": "title"})
if title_p is not None:
title = title_p.text.strip()
return Album(title=title, source_list=source_list)
def _parse_artist_data_blob(self, data_blob: dict, artist_url: str):
parsed_artist_url = urlparse(artist_url)
album_list: List[Album] = []
for album_json in data_blob.get("buyfulldisco", {}).get("tralbums", []):
album_list.append(Album(
title=album_json["title"].strip(),
source_list=[Source(
self.SOURCE_TYPE,
urlunparse((parsed_artist_url.scheme, parsed_artist_url.netloc, album_json["page_url"], "", "", ""))
)]
))
return album_list
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist = Artist()
r = self.connection.get(_parse_artist_url(source.url))
if r is None:
return artist
soup = self.get_soup_from_response(r)
if DEBUG:
dump_to_file("artist_page.html", r.text, exit_after_dump=False)
artist = self._parse_artist_details(soup=soup.find("div", {"id": "bio-container"}))
html_music_grid = soup.find("ol", {"id": "music-grid"})
if html_music_grid is not None:
for subsoup in html_music_grid.find_all("li"):
artist.main_album_collection.append(self._parse_album(soup=subsoup, initial_source=source))
for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})):
data_blob = data_blob_soup["data-blob"]
if DEBUG:
dump_to_file(f"bandcamp_artist_data_blob_{i}.json", data_blob, is_json=True, exit_after_dump=False)
if data_blob is not None:
artist.main_album_collection.extend(
self._parse_artist_data_blob(json.loads(data_blob), source.url)
)
artist.source_collection.append(source)
return artist
def _parse_track_element(self, track: dict) -> Optional[Song]:
return Song(
title=track["item"]["name"].strip(),
source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])],
tracksort=int(track["position"])
)
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
album = Album()
r = self.connection.get(source.url)
if r is None:
return album
soup = self.get_soup_from_response(r)
data_container = soup.find("script", {"type": "application/ld+json"})
if DEBUG:
dump_to_file("album_data.json", data_container.text, is_json=True, exit_after_dump=False)
data = json.loads(data_container.text)
artist_data = data["byArtist"]
artist_source_list = []
if "@id" in artist_data:
artist_source_list = [Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))]
album = Album(
title=data["name"].strip(),
source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))],
date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"),
artist_list=[Artist(
name=artist_data["name"].strip(),
source_list=artist_source_list
)]
)
for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])):
if DEBUG:
dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False)
try:
album.song_collection.append(self._parse_track_element(track_json))
except KeyError:
continue
album.source_collection.append(source)
return album
def _fetch_lyrics(self, soup: BeautifulSoup) -> List[Lyrics]:
track_lyrics = soup.find("div", {"class": "lyricsText"})
if track_lyrics:
self.LOGGER.debug(" Lyrics retrieved..")
return [Lyrics(text=FormattedText(html=track_lyrics.prettify()))]
return []
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
r = self.connection.get(source.url)
if r is None:
return Song()
soup = self.get_soup_from_response(r)
data_container = soup.find("script", {"type": "application/ld+json"})
other_data = {}
other_data_list = soup.select("script[data-tralbum]")
if len(other_data_list) > 0:
other_data = json.loads(other_data_list[0]["data-tralbum"])
if DEBUG:
dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False)
dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False)
dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False)
data = json.loads(data_container.text)
album_data = data["inAlbum"]
artist_data = data["byArtist"]
mp3_url = None
for key, value in other_data.get("trackinfo", [{}])[0].get("file", {"": None}).items():
mp3_url = value
song = Song(
title=data["name"].strip(),
source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
album_list=[Album(
title=album_data["name"].strip(),
date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"),
source_list=[Source(self.SOURCE_TYPE, album_data["@id"])]
)],
main_artist_list=[Artist(
name=artist_data["name"].strip(),
source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))]
)],
lyrics_list=self._fetch_lyrics(soup=soup)
)
song.source_collection.append(source)
return song
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
if source.audio_url is None:
return DownloadResult(error_message="Couldn't find download link.")
return self.connection.stream_into(url=source.audio_url, target=target, description=desc)

View File

@@ -0,0 +1,857 @@
from collections import defaultdict
from typing import List, Optional, Dict, Type, Union
from bs4 import BeautifulSoup
import pycountry
from urllib.parse import urlparse, urlencode
from ..connection import Connection
from ..utils.config import logging_settings
from .abstract import Page
from ..utils.enums.source import SourcePages
from ..utils.enums.album import AlbumType
from ..utils.support_classes.query import Query
from ..objects import (
Lyrics,
Artist,
Source,
Song,
Album,
ID3Timestamp,
FormattedText,
Label,
Options,
DatabaseObject
)
from ..utils.shared import DEBUG
if DEBUG:
from ..utils.debug_utils import dump_to_file
ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, {
"Full-length": AlbumType.STUDIO_ALBUM,
"Single": AlbumType.SINGLE,
"EP": AlbumType.EP,
"Demo": AlbumType.DEMO,
"Video": AlbumType.OTHER,
"Live album": AlbumType.LIVE_ALBUM,
"Compilation": AlbumType.COMPILATION_ALBUM
})
URL_SITE = 'https://www.metal-archives.com/'
URL_IMAGES = 'https://www.metal-archives.com/images/'
URL_CSS = 'https://www.metal-archives.com/css/'
def _song_from_json(artist_html=None, album_html=None, release_type=None, title=None, lyrics_html=None) -> Song:
song_id = None
if lyrics_html is not None:
soup = BeautifulSoup(lyrics_html, 'html.parser')
anchor = soup.find('a')
raw_song_id = anchor.get('id')
song_id = raw_song_id.replace("lyricsLink_", "")
return Song(
title=title,
main_artist_list=[
_artist_from_json(artist_html=artist_html)
],
album_list=[
_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
],
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
]
)
def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
"""
TODO parse the country to a standard
"""
# parse the html
# parse the html for the band name and link on metal-archives
soup = BeautifulSoup(artist_html, 'html.parser')
anchor = soup.find('a')
artist_name = anchor.text
artist_url = anchor.get('href')
artist_id = artist_url.split("/")[-1]
anchor.decompose()
strong = soup.find('strong')
if strong is not None:
strong.decompose()
akronyms_ = soup.text[2:-2].split(', ')
return Artist(
name=artist_name,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
]
)
def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Album:
# parse the html
# <a href="https://www.metal-archives.com/albums/Ghost_Bath/Self_Loather/970834">Self Loather</a>'
soup = BeautifulSoup(album_html, 'html.parser')
anchor = soup.find('a')
album_name = anchor.text.strip()
album_url = anchor.get('href')
album_id = album_url.split("/")[-1]
album_type = ALBUM_TYPE_MAP[release_type.strip()]
return Album(
title=album_name,
album_type=album_type,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
],
artist_list=[
_artist_from_json(artist_html=artist_html)
]
)
def create_grid(
tableOrId: str = "#searchResultsSong",
nbrPerPage: int = 200,
ajaxUrl: str = "search/ajax-advanced/searching/songs/?songTitle=high&bandName=&releaseTitle=&lyrics=&genre=",
extraOptions: dict = None
):
"""
function createGrid(tableOrId, nbrPerPage, ajaxUrl, extraOptions) {
var table = null;
if (typeof tableOrId == "string") {
table = $(tableOrId);
} else {
table = tableOrId;
}
if (ajaxUrl == undefined) {
ajaxUrl = null;
}
var options = {
bAutoWidth: false,
bFilter: false,
bLengthChange: false,
bProcessing: true,
bServerSide: ajaxUrl != null,
iDisplayLength: nbrPerPage,
sAjaxSource: URL_SITE + ajaxUrl,
sPaginationType: 'full_numbers',
sDom: 'ipl<"block_spacer_5"><"clear"r>f<t>rip',
oLanguage: {
sProcessing: 'Loading...',
sEmptyTable: 'No records to display.',
sZeroRecords: 'No records found.'
},
"fnDrawCallback": autoScrollUp
};
if (typeof extraOptions == "object") {
for (var key in extraOptions) {
options[key] = extraOptions[key];
if (key == 'fnDrawCallback') {
var callback = options[key];
options[key] = function(o) {
autoScrollUp(o);
callback(o);
}
}
}
}
return table.dataTable(options);
}
:return:
"""
def onDrawCallback(o):
"""
this gets executed once the ajax request is done
:param o:
:return:
"""
extraOptions = extraOptions or {
"bSort": False,
"oLanguage": {
"sProcessing": 'Searching, please wait...',
"sEmptyTable": 'No matches found. Please try with different search terms.'
}
}
options = {
"bAutoWidth": False,
"bFilter": False,
"bLengthChange": False,
"bProcessing": True,
"bServerSide": ajaxUrl is not None,
"iDisplayLength": nbrPerPage,
"sAjaxSource": URL_SITE + ajaxUrl,
"sPaginationType": 'full_numbers',
"sDom": 'ipl<"block_spacer_5"><"clear"r>f<t>rip',
"oLanguage": {
"sProcessing": 'Loading...',
"sEmptyTable": 'No records to display.',
"sZeroRecords": 'No records found.'
},
"fnDrawCallback": onDrawCallback
}
for key, value in extraOptions.items():
options[key] = value
if key == 'fnDrawCallback':
callback = options[key]
options[key] = lambda o: onDrawCallback(o) and callback(o)
# implement jquery datatable
class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
LOGGER = logging_settings["metal_archives_logger"]
def __init__(self, **kwargs):
self.connection: Connection = Connection(
host="https://www.metal-archives.com/",
logger=self.LOGGER,
module=type(self).__name__
)
super().__init__(**kwargs)
def song_search(self, song: Song) -> List[Song]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?"
"""
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/songs/?songTitle={song}&bandName={" \
"artist}&releaseTitle={album}&lyrics=&genre=&sEcho=1&iColumns=5&sColumns=&iDisplayStart=0" \
"&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&_" \
"=1674550595663"
"""
"""
The difficult question I am facing is, that if I try every artist, with every song, with every album,
I end up with a quadratic runtime complecety O(n^2), where every step means one web request.
This.
Is not good.
"""
search_params = {
"songTitle": song.title,
"bandName": "*",
"releaseTitle": "*",
"lyrics": "",
"genre": "",
"sEcho": 1,
"iColumns": 5,
"sColumns": "",
"iDisplayStart": 0,
"iDisplayLength": 200,
"mDataProp_0": 0,
"mDataProp_1": 1,
"mDataProp_2": 2,
"mDataProp_3": 3,
"mDataProp_4": 4,
"_": 1705946986092
}
referer_params = {
"songTitle": song.title,
"bandName": "*",
"releaseTitle": "*",
"lyrics": "",
"genre": "",
}
urlencode(search_params)
song_title = song.title.strip()
album_titles = ["*"] if song.album_collection.empty else [album.title.strip() for album in song.album_collection]
artist_titles = ["*"] if song.main_artist_collection.empty else [artist.name.strip() for artist in song.main_artist_collection]
search_results = []
for artist in artist_titles:
for album in album_titles:
_search = search_params.copy()
_referer_params = referer_params.copy()
_search["bandName"] = _referer_params["bandName"] = artist
_search["releaseTitle"] = _referer_params["releaseTitle"] = album
r = self.connection.get(endpoint + urlencode(_search), headers={
"Referer": "https://www.metal-archives.com/search/advanced/searching/songs?" + urlencode(_referer_params),
"Cache-Control": "no-cache",
"Pragma": "no-cache",
"X-Requested-With": "XMLHttpRequest",
}, name="song_search")
if r is None:
return []
search_results.extend(_song_from_json(
artist_html=raw_song[0],
album_html=raw_song[1],
release_type=raw_song[2],
title=raw_song[3],
lyrics_html=raw_song[4]
) for raw_song in r.json()['aaData'])
return search_results
def album_search(self, album: Album) -> List[Album]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/albums/?"
search_params = {
"bandName": "*",
"releaseTitle": album.title.strip(),
"releaseYearFrom": "",
"releaseMonthFrom": "",
"releaseYearTo": "",
"releaseMonthTo": "",
"country": "",
"location": "",
"releaseLabelName": "",
"releaseCatalogNumber": "",
"releaseIdentifiers": "",
"releaseRecordingInfo": "",
"releaseDescription": "",
"releaseNotes": "",
"genre": "",
"sEcho": 1,
"iColumns": 3,
"sColumns": "",
"iDisplayStart": 0,
"iDisplayLength": 200,
"mDataProp_0": 0,
"mDataProp_1": 1,
"mDataProp_2": 2,
"_": 1705946986092
}
referer_params = {
"bandName": "*",
"releaseTitle": album.title.strip(),
}
album_title = album.title
artist_titles = ["*"] if album.artist_collection.empty else [artist.name.strip() for artist in album.artist_collection]
search_results = []
for artist in artist_titles:
_search = search_params.copy()
_referer_params = referer_params.copy()
_search["bandName"] = _referer_params["bandName"] = artist
r = self.connection.get(endpoint + urlencode(_search), headers={
"Referer": "https://www.metal-archives.com/search/advanced/searching/albums?" + urlencode(_referer_params),
"Cache-Control": "no-cache",
"Pragma": "no-cache",
"X-Requested-With": "XMLHttpRequest",
"Accept": "application/json, text/javascript, */*; q=0.01",
})
#r = self.connection.get(endpoint.format(artist=artist, album=album_title))
if r is None:
return []
search_results.extend(_album_from_json(
artist_html=raw_album[0],
album_html=raw_album[1],
release_type=raw_album[2]
) for raw_album in r.json()['aaData'])
def artist_search(self, artist: Artist) -> List[Artist]:
endpoint = "https://www.metal-archives.com/search/ajax-advanced/searching/bands/?"
search_params = {
"bandName": artist.name.strip(),
"genre": "",
"country": "",
"yearCreationFrom": "",
"yearCreationTo": "",
"bandNotes": "",
"status": "",
"themes": "",
"location": "",
"bandLabelName": "",
"sEcho": 1,
"iColumns": 3,
"sColumns": "",
"iDisplayStart": 0,
"iDisplayLength": 200,
"mDataProp_0": 0,
"mDataProp_1": 1,
"mDataProp_2": 2,
"_": 1705946986092
}
r = self.connection.get(endpoint + urlencode(search_params), headers={
"Referer": "https://www.metal-archives.com/search/advanced/searching/bands?" + urlencode({"bandName": artist.name.strip()}),
"Cache-Control": "no-cache",
"Pragma": "no-cache",
"X-Requested-With": "XMLHttpRequest",
"Accept": "application/json, text/javascript, */*; q=0.01",
}, name="artist_search.json")
if r is None:
return []
data_key = 'aaData'
parsed_data = r.json()
if data_key not in parsed_data:
return []
return [
_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
for raw_artist in r.json()['aaData']
]
def general_search(self, query: str) -> List[DatabaseObject]:
"""
Searches the default endpoint from metal archives, which intern searches only
for bands, but it is the default, thus I am rolling with it
"""
endpoint = "https://www.metal-archives.com/search/ajax-band-search/?field=name&query={query}&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2"
r = self.connection.get(endpoint.format(query=query))
if r is None:
return []
return [
_artist_from_json(artist_html=raw_artist[0], genre=raw_artist[1], country=raw_artist[2])
for raw_artist in r.json()['aaData']
]
def _fetch_artist_discography(self, ma_artist_id: str) -> List[Album]:
discography_url = "https://www.metal-archives.com/band/discography/id/{}/tab/all"
# make the request
r = self.connection.get(discography_url.format(ma_artist_id))
if r is None:
return []
soup = self.get_soup_from_response(r)
discography = []
tbody_soup = soup.find('tbody')
for tr_soup in tbody_soup.find_all('tr'):
td_list = tr_soup.findChildren(recursive=False)
album_soup = td_list[0]
album_name = album_soup.text
album_url = album_soup.find('a').get('href')
album_id = album_url.split('/')[-1]
raw_album_type = td_list[1].text
album_year = td_list[2].text
date_obj = None
try:
date_obj = ID3Timestamp(year=int(album_year))
except ValueError():
pass
discography.append(
Album(
title=album_name,
date=date_obj,
album_type=ALBUM_TYPE_MAP[raw_album_type],
source_list=[Source(self.SOURCE_TYPE, album_url)]
)
)
return discography
def _fetch_artist_sources(self, ma_artist_id: str) -> List[Source]:
sources_url = "https://www.metal-archives.com/link/ajax-list/type/band/id/{}"
r = self.connection.get(sources_url.format(ma_artist_id))
if r is None:
return []
soup = self.get_soup_from_response(r)
if DEBUG:
dump_to_file(f"ma_artist_sources_{ma_artist_id}.html", soup.prettify(), exit_after_dump=False)
if soup.find("span", {"id": "noLinks"}) is not None:
return []
source_list = []
link_table: BeautifulSoup = soup.find("table", {"id": "linksTablemain"})
if link_table is not None:
for tr in link_table.find_all("tr"):
anchor: BeautifulSoup = tr.find("a")
if anchor is None:
continue
href = anchor["href"]
if href is not None:
source_list.append(Source.match_url(href, referer_page=self.SOURCE_TYPE))
# The following code is only legacy code, which I just kep because it doesn't harm.
# The way ma returns sources changed.
artist_source = soup.find("div", {"id": "band_links"})
merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"})
label_source = soup.find("div", {"id": "band_links_Labels"})
if artist_source is not None:
for tr in artist_source.find_all("td"):
a = tr.find("a")
url = a.get("href")
if url is None:
continue
source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE))
return source_list
def _parse_artist_attributes(self, artist_soup: BeautifulSoup) -> Artist:
name: str = None
country: pycountry.Countrie = None
formed_in_year: int = None
genre: str = None
lyrical_themes: List[str] = []
label_name: str = None
label_url: str = None
source_list: List[Source] = []
title_soup: BeautifulSoup = artist_soup.find("title")
if title_soup is not None:
bad_name_substring = " - Encyclopaedia Metallum: The Metal Archives"
title_text = title_soup.get_text()
if title_text.count(bad_name_substring) == 1:
name = title_text.replace(bad_name_substring, "")
else:
self.LOGGER.debug(f"the title of the page is \"{title_text}\"")
"""
TODO
Implement the bandpictures and logos that can be gotten with the elements
<a class="image" id="photo" title="Ghost Bath"...
<a class="image" id="logo" title="Ghost Bath"...
where the titles are the band name
"""
image_container_soup: BeautifulSoup = artist_soup.find(id="band_sidebar")
if image_container_soup is not None:
logo_soup = image_container_soup.find(id="logo")
if logo_soup is not None:
logo_title = logo_soup.get("title")
if logo_title is not None:
name = logo_title.strip()
band_pictures = image_container_soup.find(id="photo")
if band_pictures is not None:
band_picture_title = logo_soup.get("title")
if band_picture_title is not None:
name = band_picture_title.strip()
for h1_band_name_soup in artist_soup.find_all("h1", {"class": "band_name"}):
anchor: BeautifulSoup = h1_band_name_soup.find("a")
if anchor is None:
continue
href = anchor.get("href")
if href is not None:
source_list.append(Source(self.SOURCE_TYPE, href))
name = anchor.get_text(strip=True)
band_stat_soup = artist_soup.find("div", {"id": "band_stats"})
for dl_soup in band_stat_soup.find_all("dl"):
for title, data in zip(dl_soup.find_all("dt"), dl_soup.find_all("dd")):
title_text = title.text
if "Country of origin:" == title_text:
href = data.find('a').get('href')
country = pycountry.countries.get(alpha_2=href.split("/")[-1])
continue
# not needed: Location: Minot, North Dakota
"""
TODO
status: active
need to do enums for that and add it to object
"""
if "Formed in:" == title_text:
if not data.text.isnumeric():
continue
formed_in_year = int(data.text)
continue
if "Genre:" == title_text:
genre = data.text
continue
if "Lyrical themes:" == title_text:
lyrical_themes = data.text.split(", ")
continue
if "Current label:" == title_text:
label_name = data.text
label_anchor = data.find("a")
label_url = None
if label_anchor is not None:
label_url = label_anchor.get("href")
label_id = None
if type(label_url) is str and "/" in label_url:
label_id = label_url.split("/")[-1]
"""
TODO
years active: 2012-present
process this and add field to class
"""
return Artist(
name=name,
country=country,
formed_in=ID3Timestamp(year=formed_in_year),
general_genre=genre,
lyrical_themes=lyrical_themes,
label_list=[
Label(
name=label_name,
source_list=[
Source(self.SOURCE_TYPE, label_url)
]
)
],
source_list=source_list
)
def _fetch_artist_attributes(self, url: str) -> Artist:
r = self.connection.get(url)
if r is None:
return Artist()
soup: BeautifulSoup = self.get_soup_from_response(r)
return self._parse_artist_attributes(artist_soup=soup)
def _fetch_band_notes(self, ma_artist_id: str) -> Optional[FormattedText]:
endpoint = "https://www.metal-archives.com/band/read-more/id/{}"
# make the request
r = self.connection.get(endpoint.format(ma_artist_id))
if r is None:
return FormattedText()
return FormattedText(html=r.text)
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
"""
What it could fetch, and what is implemented:
[x] https://www.metal-archives.com/bands/Ghost_Bath/3540372489
[x] https://www.metal-archives.com/band/discography/id/3540372489/tab/all
[] reviews: https://www.metal-archives.com/review/ajax-list-band/id/3540372489/json/1?sEcho=1&iColumns=4&sColumns=&iDisplayStart=0&iDisplayLength=200&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&iSortCol_0=3&sSortDir_0=desc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&_=1675155257133
[] simmilar: https://www.metal-archives.com/band/ajax-recommendations/id/3540372489
[x] sources: https://www.metal-archives.com/link/ajax-list/type/band/id/3540372489
[x] band notes: https://www.metal-archives.com/band/read-more/id/3540372489
"""
artist = self._fetch_artist_attributes(source.url)
artist_id = source.url.split("/")[-1]
artist_sources = self._fetch_artist_sources(artist_id)
artist.source_collection.extend(artist_sources)
band_notes = self._fetch_band_notes(artist_id)
if band_notes is not None:
artist.notes = band_notes
discography: List[Album] = self._fetch_artist_discography(artist_id)
artist.main_album_collection.extend(discography)
return artist
def _parse_album_track_row(self, track_row: BeautifulSoup) -> Song:
"""
<tr class="even">
<td width="20"><a class="anchor" name="5948442"> </a>1.</td> # id and tracksort
<td class="wrapWords">Convince Me to Bleed</td> # name
<td align="right">03:40</td> # length
<td nowrap="nowrap"> 
<a href="#5948442" id="lyricsButton5948442" onclick="toggleLyrics('5948442'); return false;">Show lyrics</a>
</td>
</tr>
"""
row_list = track_row.find_all(recursive=False)
source_list: List[Source] = []
track_sort_soup = row_list[0]
track_sort = int(track_sort_soup.text[:-1])
track_id = track_sort_soup.find("a").get("name").strip()
if track_row.find("a", {"href": f"#{track_id}"}) is not None:
source_list.append(Source(self.SOURCE_TYPE, track_id))
title = row_list[1].text.strip()
length = None
duration_stamp = row_list[2].text
if ":" in duration_stamp:
minutes, seconds = duration_stamp.split(":")
length = (int(minutes) * 60 + int(seconds)) * 1000 # in milliseconds
return Song(
title=title,
length=length,
tracksort=track_sort,
source_list=source_list
)
def _parse_album_attributes(self, album_soup: BeautifulSoup, stop_at_level: int = 1) -> Album:
tracklist: List[Song] = []
artist_list = []
album_name: str = None
source_list: List[Source] = []
def _parse_album_info(album_info_soup: BeautifulSoup):
nonlocal artist_list
nonlocal album_name
nonlocal source_list
if album_info_soup is None:
return
album_soup_list = album_info_soup.find_all("h1", {"class": "album_name"})
if len(album_soup_list) == 1:
anchor: BeautifulSoup = album_soup_list[0].find("a")
href = anchor.get("href")
if href is not None:
source_list.append(Source(self.SOURCE_TYPE, href.strip()))
album_name = anchor.get_text(strip=True)
elif len(album_soup_list) > 1:
self.LOGGER.debug("there are more than 1 album soups")
artist_soup_list = album_info_soup.find_all("h2", {"class": "band_name"})
if len(artist_soup_list) == 1:
for anchor in artist_soup_list[0].find_all("a"):
artist_sources: List[Source] = []
href = anchor.get("href")
if href is not None:
artist_sources.append(Source(self.SOURCE_TYPE, href.strip()))
artist_name = anchor.get_text(strip=True)
artist_list.append(Artist(
name=artist_name,
source_list=artist_sources
))
elif len(artist_soup_list) > 1:
self.LOGGER.debug("there are more than 1 artist soups")
_parse_album_info(album_info_soup=album_soup.find(id="album_info"))
tracklist_soup = album_soup.find("table", {"class": "table_lyrics"}).find("tbody")
for track_soup in tracklist_soup.find_all("tr", {"class": ["even", "odd"]}):
tracklist.append(self._parse_album_track_row(track_row=track_soup))
return Album(
title=album_name,
source_list=source_list,
artist_list=artist_list,
song_list=tracklist
)
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
"""
I am preeeety sure I can get way more data than... nothing from there
:param source:
:param stop_at_level:
:return:
"""
# <table class="display table_lyrics
r = self.connection.get(source.url)
if r is None:
return Album()
soup = self.get_soup_from_response(r)
album = self._parse_album_attributes(soup, stop_at_level=stop_at_level)
return album
def _fetch_lyrics(self, song_id: str) -> Optional[Lyrics]:
"""
function toggleLyrics(songId) {
var lyricsRow = $('#song' + songId);
lyricsRow.toggle();
var lyrics = $('#lyrics_' + songId);
if (lyrics.html() == '(loading lyrics...)') {
var realId = songId;
if(!$.isNumeric(songId.substring(songId.length -1, songId.length))) {
realId = songId.substring(0, songId.length -1);
}
lyrics.load(URL_SITE + "release/ajax-view-lyrics/id/" + realId);
}
// toggle link
var linkLabel = "lyrics";
$("#lyricsButton" + songId).text(lyricsRow.css("display") == "none" ? "Show " + linkLabel : "Hide " + linkLabel);
return false;
}
"""
if song_id is None:
return None
endpoint = "https://www.metal-archives.com/release/ajax-view-lyrics/id/{id}".format(id=song_id)
r = self.connection.get(endpoint)
if r is None:
return None
return Lyrics(
text=FormattedText(html=r.text),
language=pycountry.languages.get(alpha_2="en"),
source_list=[
Source(self.SOURCE_TYPE, endpoint)
]
)
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song_id = source.url
return Song(
lyrics_list=[
self._fetch_lyrics(song_id=song_id)
]
)
def get_source_type(self, source: Source):
if self.SOURCE_TYPE != source.page_enum:
return None
url = source.url
if url is None:
return None
parsed_url = urlparse(url)
path: List[str] = parsed_url.path.split("/")
if "band" in path:
return Artist
if "bands" in path:
return Artist
if "albums" in path:
return Album
if "labels" in path:
return Label
return None

1124
music_kraken/pages/musify.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,65 @@
from typing import List, Optional, Type
from urllib.parse import urlparse
import logging
from ..objects import Source, DatabaseObject
from .abstract import Page
from ..objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
Target
)
from ..connection import Connection
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
class Preset(Page):
# CHANGE
SOURCE_TYPE = SourcePages.PRESET
LOGGER = logging.getLogger("preset")
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://www.preset.cum/",
logger=self.LOGGER
)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
return super().get_source_type(source)
def general_search(self, search_query: str) -> List[DatabaseObject]:
return []
def label_search(self, label: Label) -> List[Label]:
return []
def artist_search(self, artist: Artist) -> List[Artist]:
return []
def album_search(self, album: Album) -> List[Album]:
return []
def song_search(self, song: Song) -> List[Song]:
return []
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
return Album()
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
return Artist()
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
return DownloadResult()

View File

@@ -0,0 +1,353 @@
from typing import List, Optional, Type, Tuple
from urllib.parse import urlparse, urlunparse, parse_qs
from enum import Enum
import sponsorblock
from sponsorblock.errors import HTTPException, NotFoundException
from ..objects import Source, DatabaseObject, Song, Target
from .abstract import Page
from ..objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
Target,
FormattedText,
ID3Timestamp
)
from ..connection import Connection
from ..utils.string_processing import clean_song_title
from ..utils.support_classes.download_result import DownloadResult
from ..utils.config import youtube_settings, main_settings, logging_settings
from .youtube_music.super_youtube import SuperYouTube, YouTubeUrl, get_invidious_url, YouTubeUrlType
"""
- https://yt.artemislena.eu/api/v1/search?q=Zombiez+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance
- https://yt.artemislena.eu/api/v1/channels/playlists/UCV0Ntl3lVR7xDXKoCU6uUXA
- https://yt.artemislena.eu/api/v1/playlists/OLAK5uy_kcUBiDv5ATbl-R20OjNaZ5G28XFanQOmM
- https://yt.artemislena.eu/api/v1/videos/SULFl39UjgY
"""
def get_piped_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str:
return urlunparse((youtube_settings["piped_instance"].scheme, youtube_settings["piped_instance"].netloc, path, params, query, fragment))
class YouTube(SuperYouTube):
# CHANGE
SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = logging_settings["youtube_logger"]
NO_ADDITIONAL_DATA_FROM_SONG = True
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host=get_invidious_url(),
logger=self.LOGGER
)
self.piped_connection: Connection = Connection(
host=get_piped_url(),
logger=self.LOGGER
)
self.download_connection: Connection = Connection(
host="https://www.youtube.com/",
logger=self.LOGGER,
sleep_after_404=youtube_settings["sleep_after_youtube_403"]
)
# the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does
_sponsorblock_connection: Connection = Connection(host="https://sponsor.ajay.app/")
self.sponsorblock_client = sponsorblock.Client(session=_sponsorblock_connection.session)
super().__init__(*args, **kwargs)
def general_search(self, search_query: str) -> List[DatabaseObject]:
return self.artist_search(Artist(name=search_query, dynamic=True))
def _json_to_artist(self, artist_json: dict) -> Artist:#
return Artist(
name=artist_json["author"].replace(" - Topic", ""),
source_list=[
Source(self.SOURCE_TYPE, get_invidious_url(path=artist_json["authorUrl"]))
]
)
def artist_search(self, artist: Artist) -> List[Artist]:
# https://yt.artemislena.eu/api/v1/search?q=Zombiez+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance
endpoint = get_invidious_url(path="/api/v1/search", query=f"q={artist.name.replace(' ', '+')}+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance")
artist_list = []
r = self.connection.get(endpoint)
if r is None:
return []
for search_result in r.json():
if search_result["type"] != "channel":
continue
author: str = search_result["author"]
if not author.endswith(" - Topic"):
continue
artist_list.append(self._json_to_artist(search_result))
return artist_list
def _fetch_song_from_id(self, youtube_id: str) -> Tuple[Song, Optional[int]]:
# https://yt.artemislena.eu/api/v1/videos/SULFl39UjgY
r = self.connection.get(get_invidious_url(path=f"/api/v1/videos/{youtube_id}"))
if r is None:
return Song(), None
data = r.json()
if data["genre"] != "Music":
self.LOGGER.warning(f"Genre has to be music, trying anyways")
title = data["title"]
license_str = None
artist_list: List[Artist] = []
_author: str = data["author"]
if _author.endswith(" - Topic"):
artist_list.append(Artist(
name=_author.replace(" - Topic", ""),
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url(path=f"/channel/{data['authorId']}")
)]
))
else:
# If the song is not a topic song in the beginning, it cleans the title. If it is from a topic channel, it is clean anyways
# If cleaned data is returned by the api, it will be overridden in the next step anyways
title = clean_song_title(title, _author)
for music_track in data.get("musicTracks", []):
title = music_track["song"]
license_str = music_track["license"]
for artist_name in music_track["artist"].split(" x "):
artist_list.append(Artist(name=artist_name))
# if all attempts to get a clean artis name (mainly striping topic or getting the stuff in the api) fail, just add an artist with the name of the uploader channel
if len(artist_list) == 0:
artist_list.append(Artist(name=_author))
return Song(
title=title,
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url(path="/watch", query=f"v={data['videoId']}")
)],
notes=FormattedText(html=data["descriptionHtml"] + f"\n<p>{license_str}</ p>" ),
main_artist_list=artist_list
), int(data["published"])
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.VIDEO:
return Song()
song, _ = self._fetch_song_from_id(parsed.id)
return song
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
self.LOGGER.info(f"Getting the metadata of an album may take slightly longer, only panic in a couple minutes <333")
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.PLAYLIST:
return Album()
title = None
source_list = [source]
notes = None
song_list = []
# https://yt.artemislena.eu/api/v1/playlists/OLAK5uy_kcUBiDv5ATbl-R20OjNaZ5G28XFanQOmM
r = self.connection.get(get_invidious_url(path=f"/api/v1/playlists/{parsed.id}"))
if r is None:
return Album()
data = r.json()
if data["type"] != "playlist":
return Album()
title = data["title"]
notes = FormattedText(html=data["descriptionHtml"])
timestamps: List[int] = []
"""
TODO
fetch the song and don't get it from there
"""
for video in data["videos"]:
other_song = Song(
source_list=[
Source(
self.SOURCE_TYPE, get_invidious_url(path="/watch", query=f"v={video['videoId']}")
)
],
tracksort=video["index"]+1
)
song, utc_timestamp = self._fetch_song_from_id(video["videoId"])
song.merge(other_song)
if utc_timestamp is not None:
timestamps.append(utc_timestamp)
song_list.append(song)
return Album(
title=title,
source_list=source_list,
notes=notes,
song_list=song_list,
date=ID3Timestamp.fromtimestamp(round(sum(timestamps) / len(timestamps)))
)
def fetch_invidious_album_list(self, yt_id: str):
artist_name = None
album_list = []
# playlist
# https://yt.artemislena.eu/api/v1/channels/playlists/UCV0Ntl3lVR7xDXKoCU6uUXA
r = self.connection.get(get_invidious_url(f"/api/v1/channels/playlists/{yt_id}"))
if r is None:
return Artist()
for playlist_json in r.json()["playlists"]:
if playlist_json["type"] != "playlist":
continue
artist_name = playlist_json["author"].replace(" - Topic", "")
# /playlist?list=OLAK5uy_nbvQeskr8nbIuzeLxoceNLuCL_KjAmzVw
album_list.append(Album(
title=playlist_json["title"],
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url(path="/playlist", query=f"list={playlist_json['playlistId']}")
)],
artist_list=[Artist(
name=artist_name,
source_list=[
Source(self.SOURCE_TYPE, get_invidious_url(path=playlist_json["authorUrl"]))
]
)]
))
return album_list, artist_name
def fetch_piped_album_list(self, yt_id: str):
endpoint = get_piped_url(path=f"/channels/tabs", query='data={"originalUrl":"https://www.youtube.com/' + yt_id + '/playlists","url":"https://www.youtube.com/' + yt_id + 'playlists","id":"' + yt_id + '","contentFilters":["playlists"],"sortFilter":"","baseUrl":"https://www.youtube.com"}')
r = self.piped_connection.get(endpoint)
if r is None:
return [], None
content = r.json()["content"]
artist_name = None
album_list = []
for playlist in content:
if playlist["type"] != "playlist":
continue
artist_name = playlist["uploaderName"].replace(" - Topic", "")
album_list.append(Album(
title=playlist["name"],
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url() + playlist["url"]
)],
artist_list=[Artist(
name=artist_name,
source_list=[
Source(self.SOURCE_TYPE, get_invidious_url(path=playlist["uploaderUrl"]))
]
)]
))
return album_list, artist_name
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.CHANNEL:
return Artist(source_list=[source])
album_list, artist_name = self.fetch_piped_album_list(parsed.id)
if len(album_list) <= 0:
self.LOGGER.warning(f"didn't found any playlists with piped, falling back to invidious. (it is unusual)")
album_list, artist_name = self.fetch_invidious_album_list(parsed.id)
return Artist(name=artist_name, main_album_list=album_list, source_list=[source])
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
"""
1. getting the optimal source
Only audio sources allowed
not a bitrate that is smaller than the selected bitrate, but not one that is wayyy huger
2. download it
:param source:
:param target:
:param desc:
:return:
"""
r = self.connection.get(YouTubeUrl(source.url).api)
if r is None:
return DownloadResult(error_message="Api didn't even respond, maybe try another invidious Instance")
audio_format = None
best_bitrate = 0
for possible_format in r.json()["adaptiveFormats"]:
format_type: str = possible_format["type"]
if not format_type.startswith("audio"):
continue
bitrate = int(possible_format.get("bitrate", 0))
if bitrate >= main_settings["bitrate"]:
best_bitrate = bitrate
audio_format = possible_format
break
if bitrate > best_bitrate:
best_bitrate = bitrate
audio_format = possible_format
if audio_format is None:
return DownloadResult(error_message="Couldn't find the download link.")
endpoint = audio_format["url"]
return self.download_connection.stream_into(endpoint, target, description=desc, raw_url=True)
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
if not youtube_settings["use_sponsor_block"]:
return []
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.VIDEO:
self.LOGGER.warning(f"{source.url} is no video url.")
return []
segments = []
try:
segments = self.sponsorblock_client.get_skip_segments(parsed.id)
except NotFoundException:
self.LOGGER.debug(f"No sponsor found for the video {parsed.id}.")
except HTTPException as e:
self.LOGGER.warning(f"{e}")
return [(segment.start, segment.end) for segment in segments]

View File

@@ -0,0 +1 @@
from .youtube_music import YoutubeMusic

View File

@@ -0,0 +1,112 @@
from typing import List, Optional, Dict, Type
from enum import Enum
from ...utils.config import logging_settings
from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
Target
)
from ._music_object_render import parse_run_list, parse_run_element
LOGGER = logging_settings["youtube_music_logger"]
def music_card_shelf_renderer(renderer: dict) -> List[DatabaseObject]:
results = parse_run_list(renderer.get("title", {}).get("runs", []))
for sub_renderer in renderer.get("contents", []):
results.extend(parse_renderer(sub_renderer))
return results
def music_responsive_list_item_flex_column_renderer(renderer: dict) -> List[DatabaseObject]:
return parse_run_list(renderer.get("text", {}).get("runs", []))
def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]:
results = []
for i, column in enumerate(renderer.get("flexColumns", [])):
_r = parse_renderer(column)
if i == 0 and len(_r) == 0:
renderer["text"] = \
column.get("musicResponsiveListItemFlexColumnRenderer", {}).get("text", {}).get("runs", [{}])[0].get(
"text")
results.extend(_r)
_r = parse_run_element(renderer)
if _r is not None:
results.append(_r)
song_list: List[Song] = []
album_list: List[Album] = []
artist_list: List[Artist] = []
_map: Dict[Type[DatabaseObject], List[DatabaseObject]] = {Song: song_list, Album: album_list, Artist: artist_list}
for result in results:
_map[type(result)].append(result)
for song in song_list:
song.album_collection.extend(album_list)
song.main_artist_collection.extend(artist_list)
for album in album_list:
album.artist_collection.extend(artist_list)
if len(song_list) > 0:
return song_list
if len(album_list) > 0:
return album_list
if len(artist_list) > 0:
return artist_list
return results
def music_shelf_renderer(renderer: dict) -> List[DatabaseObject]:
result = []
for subrenderer in renderer.get("contents"):
result.extend(parse_renderer(subrenderer))
return result
def music_carousel_shelf_renderer(renderer: dict):
return music_shelf_renderer(renderer=renderer)
def music_two_row_item_renderer(renderer: dict):
return parse_run_list(renderer.get("title", {}).get("runs", []))
RENDERER_PARSERS = {
"musicCardShelfRenderer": music_card_shelf_renderer,
"musicResponsiveListItemRenderer": music_responsive_list_item_renderer,
"musicResponsiveListItemFlexColumnRenderer": music_responsive_list_item_flex_column_renderer,
"musicShelfRenderer": music_card_shelf_renderer,
"musicCarouselShelfRenderer": music_carousel_shelf_renderer,
"musicTwoRowItemRenderer": music_two_row_item_renderer,
"itemSectionRenderer": lambda _: [],
}
def parse_renderer(renderer: dict) -> List[DatabaseObject]:
result: List[DatabaseObject] = []
for renderer_name, renderer in renderer.items():
if renderer_name not in RENDERER_PARSERS:
LOGGER.warning(f"Can't parse the renderer {renderer_name}.")
continue
result.extend(RENDERER_PARSERS[renderer_name](renderer))
return result

View File

@@ -0,0 +1,85 @@
from typing import List, Optional
from enum import Enum
from ...utils.config import youtube_settings, logging_settings
from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
Target
)
LOGGER = logging_settings["youtube_music_logger"]
SOURCE_PAGE = SourcePages.YOUTUBE_MUSIC
class PageType(Enum):
ARTIST = "MUSIC_PAGE_TYPE_ARTIST"
ALBUM = "MUSIC_PAGE_TYPE_ALBUM"
CHANNEL = "MUSIC_PAGE_TYPE_USER_CHANNEL"
PLAYLIST = "MUSIC_PAGE_TYPE_PLAYLIST"
SONG = "MUSIC_VIDEO_TYPE_ATV"
VIDEO = "MUSIC_VIDEO_TYPE_UGC"
OFFICIAL_MUSIC_VIDEO = "MUSIC_VIDEO_TYPE_OMV"
# returns this type if you search for the band Queen
# S = "MUSIC_VIDEO_TYPE_OFFICIAL_SOURCE_MUSIC"
def parse_run_element(run_element: dict) -> Optional[DatabaseObject]:
if "navigationEndpoint" not in run_element:
return
_temp_nav = run_element.get("navigationEndpoint", {})
is_video = "watchEndpoint" in _temp_nav
navigation_endpoint = _temp_nav.get("watchEndpoint" if is_video else "browseEndpoint", {})
element_type = PageType.SONG
page_type_string = navigation_endpoint.get("watchEndpointMusicSupportedConfigs", {}).get("watchEndpointMusicConfig", {}).get("musicVideoType", "")
if not is_video:
page_type_string = navigation_endpoint.get("browseEndpointContextSupportedConfigs", {}).get("browseEndpointContextMusicConfig", {}).get("pageType", "")
try:
element_type = PageType(page_type_string)
except ValueError:
return
element_id = navigation_endpoint.get("videoId" if is_video else "browseId")
element_text = run_element.get("text")
if element_id is None or element_text is None:
LOGGER.warning("Couldn't find either the id or text of a Youtube music element.")
return
if element_type == PageType.SONG or (element_type == PageType.VIDEO and not youtube_settings["youtube_music_clean_data"]) or (element_type == PageType.OFFICIAL_MUSIC_VIDEO and not youtube_settings["youtube_music_clean_data"]):
source = Source(SOURCE_PAGE, f"https://music.youtube.com/watch?v={element_id}")
return Song(title=element_text, source_list=[source])
if element_type == PageType.ARTIST or (element_type == PageType.CHANNEL and not youtube_settings["youtube_music_clean_data"]):
source = Source(SOURCE_PAGE, f"https://music.youtube.com/channel/{element_id}")
return Artist(name=element_text, source_list=[source])
if element_type == PageType.ALBUM or (element_type == PageType.PLAYLIST and not youtube_settings["youtube_music_clean_data"]):
source = Source(SOURCE_PAGE, f"https://music.youtube.com/playlist?list={element_id}")
return Album(title=element_text, source_list=[source])
LOGGER.debug(f"Type {page_type_string} wasn't implemented.")
def parse_run_list(run_list: List[dict]) -> List[DatabaseObject]:
music_object_list: List[DatabaseObject] = []
for run_renderer in run_list:
music_object = parse_run_element(run_renderer)
if music_object is None:
continue
music_object_list.append(music_object)
return music_object_list

View File

@@ -0,0 +1,222 @@
from typing import List, Optional, Type, Tuple
from urllib.parse import urlparse, urlunparse, parse_qs
from enum import Enum
import requests
import sponsorblock
from sponsorblock.errors import HTTPException, NotFoundException
from ...objects import Source, DatabaseObject, Song, Target
from ..abstract import Page
from ...objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
Target,
FormattedText,
ID3Timestamp
)
from ...connection import Connection
from ...utils.support_classes.download_result import DownloadResult
from ...utils.config import youtube_settings, logging_settings, main_settings
def get_invidious_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str:
return urlunparse((youtube_settings["invidious_instance"].scheme, youtube_settings["invidious_instance"].netloc, path, params, query, fragment))
class YouTubeUrlType(Enum):
CHANNEL = "channel"
PLAYLIST = "playlist"
VIDEO = "watch"
NONE = ""
class YouTubeUrl:
"""
Artist
https://yt.artemislena.eu/channel/UCV0Ntl3lVR7xDXKoCU6uUXA
https://www.youtube.com/channel/UCV0Ntl3lVR7xDXKoCU6uUXA
Release
https://yt.artemislena.eu/playlist?list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw
https://www.youtube.com/playlist?list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw
Track
https://yt.artemislena.eu/watch?v=SULFl39UjgY&list=OLAK5uy_nEg5joAyFjHBPwnS_ADHYtgSqAjFMQKLw&index=1
https://www.youtube.com/watch?v=SULFl39UjgY
"""
def __init__(self, url: str) -> None:
self.SOURCE_TYPE = SourcePages.YOUTUBE
"""
Raises Index exception for wrong url, and value error for not found enum type
"""
self.id = ""
parsed = urlparse(url=url)
if parsed.netloc == "music.youtube.com":
self.SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC
self.url_type: YouTubeUrlType
type_frag_list = parsed.path.split("/")
if len(type_frag_list) < 2:
self.url_type = YouTubeUrlType.NONE
else:
try:
self.url_type = YouTubeUrlType(type_frag_list[1].strip())
except ValueError:
self.url_type = YouTubeUrlType.NONE
if self.url_type == YouTubeUrlType.CHANNEL:
if len(type_frag_list) < 3:
self.couldnt_find_id(url)
else:
self.id = type_frag_list[2]
elif self.url_type == YouTubeUrlType.PLAYLIST:
query_stuff = parse_qs(parsed.query)
if "list" not in query_stuff:
self.couldnt_find_id(url)
else:
self.id = query_stuff["list"][0]
elif self.url_type == YouTubeUrlType.VIDEO:
query_stuff = parse_qs(parsed.query)
if "v" not in query_stuff:
self.couldnt_find_id(url)
else:
self.id = query_stuff["v"][0]
def couldnt_find_id(self, url: str):
logging_settings["youtube_logger"].warning(f"The id is missing: {url}")
self.url_type = YouTubeUrlType.NONE
@property
def api(self) -> str:
if self.url_type == YouTubeUrlType.CHANNEL:
return get_invidious_url(path=f"/api/v1/channels/playlists/{self.id}")
if self.url_type == YouTubeUrlType.PLAYLIST:
return get_invidious_url(path=f"/api/v1/playlists/{id}")
if self.url_type == YouTubeUrlType.VIDEO:
return get_invidious_url(path=f"/api/v1/videos/{self.id}")
return get_invidious_url()
@property
def normal(self) -> str:
if self.url_type.CHANNEL:
return get_invidious_url(path=f"/channel/{self.id}")
if self.url_type.PLAYLIST:
return get_invidious_url(path="/playlist", query=f"list={self.id}")
if self.url_type.VIDEO:
return get_invidious_url(path="/watch", query=f"v={self.id}")
class SuperYouTube(Page):
# CHANGE
SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = logging_settings["youtube_logger"]
NO_ADDITIONAL_DATA_FROM_SONG = True
def __init__(self, *args, **kwargs):
self.download_connection: Connection = Connection(
host="https://www.youtube.com/",
logger=self.LOGGER,
sleep_after_404=youtube_settings["sleep_after_youtube_403"]
)
self.connection: Connection = Connection(
host=get_invidious_url(),
logger=self.LOGGER
)
# the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does
_sponsorblock_connection: Connection = Connection(host="https://sponsor.ajay.app/")
self.sponsorblock_client = sponsorblock.Client(session=_sponsorblock_connection.session)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
_url_type = {
YouTubeUrlType.CHANNEL: Artist,
YouTubeUrlType.PLAYLIST: Album,
YouTubeUrlType.VIDEO: Song,
}
parsed = YouTubeUrl(source.url)
if parsed.url_type in _url_type:
return _url_type[parsed.url_type]
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
"""
1. getting the optimal source
Only audio sources allowed
not a bitrate that is smaller than the selected bitrate, but not one that is wayyy huger
2. download it
:param source:
:param target:
:param desc:
:return:
"""
r: requests.Response = self.connection.get(YouTubeUrl(source.url).api)
if r is None:
return DownloadResult(error_message="Api didn't even respond, maybe try another invidious Instance")
audio_format = None
best_bitrate = 0
for possible_format in r.json()["adaptiveFormats"]:
format_type: str = possible_format["type"]
if not format_type.startswith("audio"):
continue
bitrate = int(possible_format.get("bitrate", 0))
if bitrate >= main_settings["bitrate"]:
best_bitrate = bitrate
audio_format = possible_format
break
if bitrate > best_bitrate:
best_bitrate = bitrate
audio_format = possible_format
if audio_format is None:
return DownloadResult(error_message="Couldn't find the download link.")
endpoint = audio_format["url"]
return self.download_connection.stream_into(endpoint, target, name=desc, raw_url=True)
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
if not youtube_settings["use_sponsor_block"]:
return []
parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.VIDEO:
self.LOGGER.warning(f"{source.url} is no video url.")
return []
segments = []
try:
segments = self.sponsorblock_client.get_skip_segments(parsed.id)
except NotFoundException:
self.LOGGER.debug(f"No sponsor found for the video {parsed.id}.")
except HTTPException as e:
self.LOGGER.warning(f"{e}")
return [(segment.start, segment.end) for segment in segments]

View File

@@ -0,0 +1,542 @@
from __future__ import unicode_literals, annotations
from typing import Dict, List, Optional, Set, Type
from urllib.parse import urlparse, urlunparse, quote, parse_qs, urlencode
import logging
import random
import json
from dataclasses import dataclass
import re
from functools import lru_cache
import youtube_dl
from youtube_dl.extractor.youtube import YoutubeIE
from ...utils.exception.config import SettingValueError
from ...utils.config import main_settings, youtube_settings, logging_settings
from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING
from ...utils.functions import get_current_millis
if DEBUG:
from ...utils.debug_utils import dump_to_file
from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
Target
)
from ...connection import Connection
from ...utils.support_classes.download_result import DownloadResult
from ._list_render import parse_renderer
from .super_youtube import SuperYouTube
def get_youtube_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str:
return urlunparse(("https", "music.youtube.com", path, params, query, fragment))
class YoutubeMusicConnection(Connection):
"""
===heartbeat=timings=for=YOUTUBEMUSIC===
96.27
98.16
100.04
101.93
103.82
--> average delay in between: 1.8875 min
"""
def __init__(self, logger: logging.Logger, accept_language: str):
# https://stackoverflow.com/questions/30561260/python-change-accept-language-using-requests
super().__init__(
host="https://music.youtube.com/",
logger=logger,
heartbeat_interval=113.25,
header_values={
"Accept-Language": accept_language
},
module="youtube_music",
)
# cookie consent for youtube
# https://stackoverflow.com/a/66940841/16804841 doesn't work
for cookie_key, cookie_value in youtube_settings["youtube_music_consent_cookies"].items():
self.session.cookies.set(
name=cookie_key,
value=cookie_value,
path='/', domain='.youtube.com'
)
def heartbeat(self):
r = self.get("https://music.youtube.com/verify_session")
if r is None:
self.heartbeat_failed()
return
string = r.text
data = json.loads(string[string.index("{"):])
success: bool = data["success"]
if not success:
self.heartbeat_failed()
@dataclass
class YouTubeMusicCredentials:
api_key: str
# ctoken is probably short for continue-token
# It is probably not strictly necessary, but hey :))
ctoken: str
# the context in requests
context: dict
player_url: str
@property
def player_id(self):
@lru_cache(128)
def _extract_player_info(player_url):
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
for player_re in _PLAYER_INFO_RE:
id_m = re.search(player_re, player_url)
if id_m:
break
else:
return
return id_m.group('id')
return _extract_player_info(self.player_url)
class YTDLLogger:
def __init__(self, logger: logging.Logger):
self.logger = logger
def debug(self, msg):
self.logger.debug(msg)
def warning(self, msg):
self.logger.warning(msg)
def error(self, msg):
self.logger.error(msg)
class MusicKrakenYoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, main_instance: YoutubeMusic, ydl_opts: dict, **kwargs):
self.main_instance = main_instance
ydl_opts = ydl_opts or {}
ydl_opts.update({
"logger": YTDLLogger(self.main_instance.LOGGER),
})
super().__init__(ydl_opts, **kwargs)
super().__enter__()
def __del__(self):
super().__exit__(None, None, None)
class MusicKrakenYoutubeIE(YoutubeIE):
def __init__(self, *args, main_instance: YoutubeMusic, **kwargs):
self.main_instance = main_instance
super().__init__(*args, **kwargs)
class YoutubeMusic(SuperYouTube):
# CHANGE
SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC
LOGGER = logging_settings["youtube_music_logger"]
def __init__(self, *args, ydl_opts: dict = None, **kwargs):
self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
logger=self.LOGGER,
accept_language="en-US,en;q=0.5"
)
self.credentials: YouTubeMusicCredentials = YouTubeMusicCredentials(
api_key=youtube_settings["youtube_music_api_key"],
ctoken="",
context=youtube_settings["youtube_music_innertube_context"],
player_url=youtube_settings["player_url"],
)
self.start_millis = get_current_millis()
if self.credentials.api_key == "" or DEBUG_YOUTUBE_INITIALIZING:
self._fetch_from_main_page()
SuperYouTube.__init__(self, *args, **kwargs)
self.download_connection: Connection = Connection(
host="https://rr2---sn-cxaf0x-nugl.googlevideo.com/",
logger=self.LOGGER,
sleep_after_404=youtube_settings["sleep_after_youtube_403"],
header_values={
"Referer": "https://music.youtube.com/",
'Origin': 'https://music.youtube.com',
}
)
# https://github.com/ytdl-org/youtube-dl/blob/master/README.md#embedding-youtube-dl
self.ydl = MusicKrakenYoutubeDL(self, ydl_opts)
self.yt_ie = MusicKrakenYoutubeIE(downloader=self.ydl, main_instance=self)
def _fetch_from_main_page(self):
"""
===API=KEY===
AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30
can be found at `view-source:https://music.youtube.com/`
search for: "innertubeApiKey"
"""
r = self.yt_music_connection.get("https://music.youtube.com/")
if r is None:
return
if urlparse(r.url).netloc == "consent.youtube.com":
self.LOGGER.info(f"Making cookie consent request for {type(self).__name__}.")
r = self.yt_music_connection.post("https://consent.youtube.com/save", data={
'gl': 'DE',
'm': '0',
'app': '0',
'pc': 'ytm',
'continue': 'https://music.youtube.com/?cbrd=1',
'x': '6',
'bl': 'boq_identityfrontenduiserver_20230905.04_p0',
'hl': 'en',
'src': '1',
'cm': '2',
'set_ytc': 'true',
'set_apyt': 'true',
'set_eom': 'false'
})
if r is None:
return
# load cookie dict from settings
cookie_dict = youtube_settings["youtube_music_consent_cookies"]
for cookie in r.cookies:
cookie_dict[cookie.name] = cookie.value
for cookie in self.yt_music_connection.session.cookies:
cookie_dict[cookie.name] = cookie.value
# save cookies in settings
youtube_settings["youtube_music_consent_cookies"] = cookie_dict
else:
self.yt_music_connection.save(r, "index.html")
r = self.yt_music_connection.get("https://music.youtube.com/", name="index.html")
if r is None:
return
content = r.text
if DEBUG:
dump_to_file(f"youtube_music_index.html", r.text, exit_after_dump=False)
# api key
api_key_pattern = (
r"(?<=\"innertubeApiKey\":\")(.*?)(?=\")",
r"(?<=\"INNERTUBE_API_KEY\":\")(.*?)(?=\")",
)
api_keys = []
for api_key_patter in api_key_pattern:
api_keys.extend(re.findall(api_key_patter, content))
found_a_good_api_key = False
for api_key in api_keys:
# save the first api key
api_key = api_keys[0]
try:
youtube_settings["youtube_music_api_key"] = api_key
except SettingValueError:
continue
found_a_good_api_key = True
break
if found_a_good_api_key:
self.LOGGER.info(f"Found a valid API-KEY for {type(self).__name__}: \"{api_key}\"")
else:
self.LOGGER.error(f"Couldn't find an API-KEY for {type(self).__name__}. :((")
# context
context_pattern = r"(?<=\"INNERTUBE_CONTEXT\":{)(.*?)(?=},\"INNERTUBE_CONTEXT_CLIENT_NAME\":)"
found_context = False
for context_string in re.findall(context_pattern, content, re.M):
try:
youtube_settings["youtube_music_innertube_context"] = json.loads("{" + context_string + "}")
found_context = True
except json.decoder.JSONDecodeError:
continue
self.credentials.context = youtube_settings["youtube_music_innertube_context"]
break
if not found_context:
self.LOGGER.warning(f"Couldn't find a context for {type(self).__name__}.")
# player url
"""
Thanks to youtube-dl <33
"""
player_pattern = [
r'(?<="jsUrl":")(.*?)(?=")',
r'(?<="PLAYER_JS_URL":")(.*?)(?=")'
]
found_player_url = False
for pattern in player_pattern:
for player_string in re.findall(pattern, content, re.M):
try:
youtube_settings["player_url"] = "https://music.youtube.com" + player_string
found_player_url = True
except json.decoder.JSONDecodeError:
continue
self.credentials.player_url = youtube_settings["player_url"]
break
if found_player_url:
break
if not found_player_url:
self.LOGGER.warning(f"Couldn't find an url for the video player.")
# ytcfg
youtube_settings["ytcfg"] = json.loads(self._search_regex(
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;',
content,
default='{}'
)) or {}
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
return super().get_source_type(source)
def general_search(self, search_query: str) -> List[DatabaseObject]:
search_query = search_query.strip()
urlescaped_query: str = quote(search_query.strip().replace(" ", "+"))
# approximate the ammount of time it would take to type the search, because google for some reason tracks that
LAST_EDITED_TIME = get_current_millis() - random.randint(0, 20)
_estimated_time = sum(len(search_query) * random.randint(50, 100) for _ in search_query.strip())
FIRST_EDITED_TIME = LAST_EDITED_TIME - _estimated_time if LAST_EDITED_TIME - self.start_millis > _estimated_time else random.randint(
50, 100)
query_continue = "" if self.credentials.ctoken == "" else f"&ctoken={self.credentials.ctoken}&continuation={self.credentials.ctoken}"
# construct the request
r = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/search",
query=f"key={self.credentials.api_key}&prettyPrint=false" + query_continue),
json={
"context": {**self.credentials.context, "adSignalsInfo": {"params": []}},
"query": search_query,
"suggestStats": {
"clientName": "youtube-music",
"firstEditTimeMsec": FIRST_EDITED_TIME,
"inputMethod": "KEYBOARD",
"lastEditTimeMsec": LAST_EDITED_TIME,
"originalQuery": search_query,
"parameterValidationStatus": "VALID_PARAMETERS",
"searchMethod": "ENTER_KEY",
"validationStatus": "VALID",
"zeroPrefixEnabled": True,
"availableSuggestions": []
}
},
headers={
"Referer": get_youtube_url(path=f"/search", query=f"q={urlescaped_query}")
}
)
if r is None:
return []
renderer_list = r.json().get("contents", {}).get("tabbedSearchResultsRenderer", {}).get("tabs", [{}])[0].get(
"tabRenderer").get("content", {}).get("sectionListRenderer", {}).get("contents", [])
if DEBUG:
for i, content in enumerate(renderer_list):
dump_to_file(f"{i}-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False)
results = []
"""
cant use fixed indices, because if something has no entries, the list disappears
instead I have to try parse everything, and just reject community playlists and profiles.
"""
for renderer in renderer_list:
results.extend(parse_renderer(renderer))
return results
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist = Artist()
# construct the request
url = urlparse(source.url)
browse_id = url.path.replace("/channel/", "")
r = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"),
json={
"browseId": browse_id,
"context": {**self.credentials.context, "adSignalsInfo": {"params": []}}
}
)
if r is None:
return artist
if DEBUG:
dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False)
renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[
0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", [])
if DEBUG:
for i, content in enumerate(renderer_list):
dump_to_file(f"{i}-artists-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False)
results = []
"""
cant use fixed indices, because if something has no entries, the list dissappears
instead I have to try parse everything, and just reject community playlists and profiles.
"""
for renderer in renderer_list:
results.extend(parse_renderer(renderer))
artist.add_list_of_other_objects(results)
return artist
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
album = Album()
parsed_url = urlparse(source.url)
list_id_list = parse_qs(parsed_url.query)['list']
if len(list_id_list) <= 0:
return album
browse_id = list_id_list[0]
r = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"),
json={
"browseId": browse_id,
"context": {**self.credentials.context, "adSignalsInfo": {"params": []}}
}
)
if r is None:
return album
if DEBUG:
dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False)
renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[
0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", [])
if DEBUG:
for i, content in enumerate(renderer_list):
dump_to_file(f"{i}-album-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False)
results = []
"""
cant use fixed indices, because if something has no entries, the list dissappears
instead I have to try parse everything, and just reject community playlists and profiles.
"""
for renderer in renderer_list:
results.extend(parse_renderer(renderer))
album.add_list_of_other_objects(results)
return album
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song = Song()
return song
def fetch_media_url(self, source: Source) -> dict:
def _get_best_format(format_list: List[Dict]) -> dict:
def _calc_score(_f: dict):
s = 0
_url = _f.get("url", "")
if "mime=audio" in _url:
s += 100
return s
highest_score = 0
best_format = {}
for _format in format_list:
_s = _calc_score(_format)
if _s >= highest_score:
highest_score = _s
best_format = _format
return best_format
ydl_res = self.ydl.extract_info(url=source.url, download=False)
_best_format = _get_best_format(ydl_res.get("formats", [{}]))
print(_best_format)
return {
"url": _best_format.get("url"),
"chunk_size": _best_format.get("downloader_options", {}).get("http_chunk_size", main_settings["chunk_size"]),
"headers": _best_format.get("http_headers", {}),
}
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
media = self.fetch_media_url(source)
result = self.download_connection.stream_into(
media["url"],
target,
name=desc,
raw_url=True,
raw_headers=True,
disable_cache=True,
headers=media.get("headers", {}),
# chunk_size=media.get("chunk_size", main_settings["chunk_size"]),
method="GET",
)
if result.is_fatal_error:
result.merge(super().download_song_to_target(source=source, target=target, desc=desc))
return result
def __del__(self):
self.ydl.__exit__()

View File

@@ -0,0 +1,20 @@
from .config import config, read_config, write_config
from .enums.colors import BColors
"""
Here are all global important functions.
"""
def _apply_color(msg: str, color: BColors) -> str:
if color is BColors.ENDC:
return msg
return color.value + msg + BColors.ENDC.value
def output(msg: str, color: BColors = BColors.ENDC):
print(_apply_color(msg, color))
def user_input(msg: str, color: BColors = BColors.ENDC):
return input(_apply_color(msg, color)).strip()

View File

@@ -0,0 +1,33 @@
from typing import Tuple
from .config import Config
from .config_files import (
main_config,
logging_config,
youtube_config,
)
_sections: Tuple[Config, ...] = (
main_config.config,
logging_config.config,
youtube_config.config
)
def read_config():
for section in _sections:
section.read()
# special cases
if main_settings['tor']:
main_settings['proxies'] = {
'http': f'socks5h://127.0.0.1:{main_settings["tor_port"]}',
'https': f'socks5h://127.0.0.1:{main_settings["tor_port"]}'
}
def write_config():
for section in _sections:
section.write()
main_settings: main_config.SettingsStructure = main_config.config.loaded_settings
logging_settings: logging_config.SettingsStructure = logging_config.config.loaded_settings
youtube_settings: youtube_config.SettingsStructure = youtube_config.config.loaded_settings

View File

@@ -0,0 +1,132 @@
import re
from typing import Optional, List, Union, Iterable, Callable
from dataclasses import dataclass
import logging
import toml
from copy import deepcopy, copy
from urllib.parse import urlparse, urlunparse, ParseResult
from ...exception.config import SettingValueError
from ..utils import comment
LOGGER = logging.getLogger("config")
COMMENT_PREFIX = "#"
def comment_string(uncommented: str) -> str:
unprocessed_lines = uncommented.split("\n")
processed_lines: List[str] = []
for line in unprocessed_lines:
if line.startswith(COMMENT_PREFIX) or line == "":
processed_lines.append(line)
continue
line = COMMENT_PREFIX + " " + line
processed_lines.append(line)
return "\n".join(processed_lines)
@dataclass
class Description:
description: str
@property
def toml_string(self):
return comment_string(self.description)
class EmptyLine(Description):
def __init__(self):
self.description = ""
class Attribute:
def __init__(
self,
name: str,
default_value: any,
description: Optional[str] = None,
):
self.name = name
self.value = self._recursive_parse_object(default_value, self.parse_simple_value)
self.description: Optional[str] = description
self.loaded_settings: dict = None
def initialize_from_config(self, loaded_settings: dict):
self.loaded_settings = loaded_settings
self.loaded_settings.__setitem__(self.name, self.value, True)
def unparse_simple_value(self, value: any) -> any:
return value
def parse_simple_value(self, value: any) -> any:
return value
def _recursive_parse_object(self, __object, callback: Callable):
__object = copy(__object)
if isinstance(__object, dict):
for key, value in __object.items():
__object[key] = self._recursive_parse_object(value, callback)
return __object
if isinstance(__object, list) or (isinstance(__object, tuple) and not isinstance(__object, ParseResult)):
for i, item in enumerate(__object):
__object[i] = self._recursive_parse_object(item, callback)
return __object
return callback(__object)
def parse(self, unparsed_value):
self.value = self._recursive_parse_object(unparsed_value, self.parse_simple_value)
return self.value
def unparse(self, parsed_value):
return self._recursive_parse_object(parsed_value, self.unparse_simple_value)
def load_toml(self, loaded_toml: dict) -> bool:
"""
returns true if succesfull
"""
if self.name not in loaded_toml:
LOGGER.warning(f"No setting by the name {self.name} found in the settings file.")
self.loaded_settings.__setitem__(self.name, self.value, True)
return
try:
self.parse(loaded_toml[self.name])
except SettingValueError as settings_error:
logging.warning(settings_error)
return False
self.loaded_settings.__setitem__(self.name, self.value, True)
return True
@property
def toml_string(self) -> str:
string = ""
if self.description is not None:
string += comment(self.description) + "\n"
string += toml.dumps({self.name: self.unparse(self.value)})
# print(string)
return string
def __str__(self):
return f"{self.description}\n{self.name}={self.value}"

View File

@@ -0,0 +1,151 @@
from pathlib import Path, PosixPath
from typing import Optional, Dict, Set
from urllib.parse import urlparse, urlunparse
import logging
from .attribute import Attribute
from ...exception.config import SettingValueError
class UrlAttribute(Attribute):
def parse_simple_value(self, value: any) -> any:
return urlparse(value)
def unparse_simple_value(self, value: any) -> any:
return urlunparse((value.scheme, value.netloc, value.path, value.params, value.query, value.fragment))
class PathAttribute(Attribute):
def parse_simple_value(self, value: any) -> Path:
if isinstance(value, Path) or isinstance(value, PosixPath):
return value
return Path(value)
def unparse_simple_value(self, value: Path) -> any:
return str(value.resolve())
class SelectAttribute(Attribute):
def __init__(self, name: str, default_value: any, options: tuple, description: Optional[str] = None, ignore_options_for_description = False):
self.options: tuple = options
new_description = ""
if description is not None:
new_description += description
new_description += "\n"
if not ignore_options_for_description:
new_description += f"{{{', '.join(self.options)}}}"
super().__init__(name, default_value, description)
def parse_simple_value(self, value: any) -> any:
if value in self.options:
return value
raise SettingValueError(
setting_name=self.name,
setting_value=value,
rule=f"has to be in the options: {{{', '.join(self.options)}}}."
)
def unparse_simple_value(self, value: any) -> any:
return value
class IntegerSelect(Attribute):
def __init__(self, name: str, default_value: any, options: Dict[int, str], description: Optional[str] = None, ignore_options_for_description = False):
self.options: Dict[str, int] = options
self.option_values: Set[int] = set(self.options.values())
new_description = ""
if description is not None:
new_description += description
description_lines = []
if description is not None:
description_lines.append(description)
description_lines.append("The values can be either an integer or one of the following values:")
for number, option in self.options.items():
description_lines.append(f"{number}: {option}")
super().__init__(name, default_value, "\n".join(description_lines))
def parse_simple_value(self, value: any) -> any:
if isinstance(value, str):
if value not in self.options:
raise SettingValueError(
setting_name=self.name,
setting_value=value,
rule=f"has to be in the options: {{{', '.join(self.options.keys())}}}, if it is a string."
)
return self.options[value]
return value
def unparse_simple_value(self, value: int) -> any:
if value in self.option_values:
for option, v in self.options.items():
if v == value:
return value
return value
ID3_2_FILE_FORMATS = frozenset((
"mp3", "mp2", "mp1", # MPEG-1 ID3.2
"wav", "wave", "rmi", # RIFF (including WAV) ID3.2
"aiff", "aif", "aifc", # AIFF ID3.2
"aac", "aacp", # Raw AAC ID3.2
"tta", # True Audio ID3.2
))
_sorted_id3_2_formats = sorted(ID3_2_FILE_FORMATS)
ID3_1_FILE_FORMATS = frozenset((
"ape", # Monkey's Audio ID3.1
"mpc", "mpp", "mp+", # MusePack ID3.1
"wv", # WavPack ID3.1
"ofr", "ofs" # OptimFrog ID3.1
))
_sorted_id3_1_formats = sorted(ID3_1_FILE_FORMATS)
class AudioFormatAttribute(Attribute):
def __init__(self, name: str, default_value: any, description: Optional[str] = None, ignore_options_for_description = False):
new_description = ""
if description is not None:
new_description += description
new_description += "\n"
new_description += f"ID3.2: {{{', '.join(ID3_2_FILE_FORMATS)}}}\n"
new_description += f"ID3.1: {{{', '.join(ID3_1_FILE_FORMATS)}}}"
super().__init__(name, default_value, description)
def parse_simple_value(self, value: any) -> any:
value = value.strip().lower()
if value in ID3_2_FILE_FORMATS:
return value
if value in ID3_1_FILE_FORMATS:
logging.debug(f"setting audio format to a format that only supports ID3.1: {v}")
return value
raise SettingValueError(
setting_name=self.name,
setting_value=value,
rule="has to be a valid audio format, supporting id3 metadata"
)
def unparse_simple_value(self, value: any) -> any:
return value
class LoggerAttribute(Attribute):
def parse_simple_value(self, value: str) -> logging.Logger:
return logging.getLogger(value)
def unparse_simple_value(self, value: logging.Logger) -> any:
return value.name

View File

@@ -0,0 +1,85 @@
from typing import Any, Tuple, Union, List
from pathlib import Path
import logging
from datetime import datetime
import toml
from .attributes.attribute import Attribute, Description, EmptyLine
class ConfigDict(dict):
def __init__(self, config_reference: "Config", *args, **kwargs):
self.config_reference: Config = config_reference
super().__init__(*args, **kwargs)
def __getitem__(self, __name: str) -> Any:
return super().__getitem__(__name)
def __setitem__(self, __key: Any, __value: Any, from_attribute: bool = False, is_parsed: bool = False) -> None:
if not from_attribute:
attribute: Attribute = self.config_reference.attribute_map[__key]
if is_parsed:
attribute.value = __value
else:
attribute.parse(__value)
self.config_reference.write()
__value = attribute.value
return super().__setitem__(__key, __value)
class Config:
def __init__(self, component_list: Tuple[Union[Attribute, Description, EmptyLine], ...], config_file: Path) -> None:
self.config_file: Path = config_file
self.component_list: List[Union[Attribute, Description, EmptyLine]] = [
Description(f"""IMPORTANT: If you modify this file, the changes for the actual setting, will be kept as is.
The changes you make to the comments, will be discarded, next time you run music-kraken. Have fun!
Latest reset: {datetime.now()}
_____
/ ____|
| | __ __ _ _ _
| | |_ | / _` || | | |
| |__| || (_| || |_| |
\_____| \__,_| \__, |
__/ |
|___/
""")]
self.component_list.extend(component_list)
self.loaded_settings: ConfigDict = ConfigDict(self)
self.attribute_map = {}
for component in self.component_list:
if not isinstance(component, Attribute):
continue
component.initialize_from_config(self.loaded_settings)
self.attribute_map[component.name] = component
@property
def toml_string(self):
return "\n".join(component.toml_string for component in self.component_list)
def write(self):
with self.config_file.open("w") as conf_file:
conf_file.write(self.toml_string)
def read(self):
if not self.config_file.is_file():
logging.info(f"Config file at '{self.config_file}' doesn't exist => generating")
self.write()
return
toml_data = {}
with self.config_file.open("r") as conf_file:
toml_data = toml.load(conf_file)
for component in self.component_list:
if isinstance(component, Attribute):
component.load_toml(toml_data)

View File

@@ -0,0 +1,105 @@
from typing import TypedDict, List
from urllib.parse import ParseResult
from logging import Logger
from pathlib import Path
import logging
from ...path_manager import LOCATIONS
from ..config import Config
from ..attributes.attribute import Attribute, EmptyLine
from ..attributes.special_attributes import (
IntegerSelect,
LoggerAttribute
)
config = Config([
Attribute(name="logging_format", default_value="%(levelname)s:%(name)s:%(message)s", description="""Logging settings for the actual logging:
Reference for the logging formats: https://docs.python.org/3/library/logging.html#logrecord-attributes"""),
IntegerSelect(
name="log_level",
default_value=logging.INFO,
options={
"CRITICAL": 50,
"ERROR": 40,
"WARNING": 30,
"INFO": 20,
"DEBUG": 10,
"NOTSET": 0
}
),
LoggerAttribute(
name="download_logger",
description="The logger for downloading.",
default_value="download"
),
LoggerAttribute(
name="tagging_logger",
description="The logger for tagging id3 containers.",
default_value="tagging"
),
LoggerAttribute(
name="codex_logger",
description="The logger for streaming the audio into an uniform codex.",
default_value="codex"
),
LoggerAttribute(
name="object_logger",
description="The logger for creating Data-Objects.",
default_value="object"
),
LoggerAttribute(
name="database_logger",
description="The logger for Database operations.",
default_value="database"
),
LoggerAttribute(
name="musify_logger",
description="The logger for the musify scraper.",
default_value="musify"
),
LoggerAttribute(
name="youtube_logger",
description="The logger for the youtube scraper.",
default_value="youtube"
),
LoggerAttribute(
name="youtube_music_logger",
description="The logger for the youtube music scraper.\n(The scraper is seperate to the youtube scraper)",
default_value="youtube_music"
),
LoggerAttribute(
name="metal_archives_logger",
description="The logger for the metal archives scraper.",
default_value="metal_archives"
),
LoggerAttribute(
name="genius_logger",
description="The logger for the genius scraper",
default_value="genius"
),
LoggerAttribute(
name="bandcamp_logger",
description="The logger for the bandcamp scraper",
default_value="bandcamp"
)
], LOCATIONS.get_config_file("logging"))
class SettingsStructure(TypedDict):
# logging
logging_format: str
log_level: int
download_logger: Logger
tagging_logger: Logger
codex_logger: Logger
object_logger: Logger
database_logger: Logger
musify_logger: Logger
youtube_logger: Logger
youtube_music_logger: Logger
metal_archives_logger: Logger
genius_logger: Logger
bandcamp_logger: Logger

View File

@@ -0,0 +1,153 @@
from typing import TypedDict, List
from urllib.parse import ParseResult
from logging import Logger
from pathlib import Path
from ...path_manager import LOCATIONS
from ..config import Config
from ..attributes.attribute import Attribute, EmptyLine, Description
from ..attributes.special_attributes import (
SelectAttribute,
PathAttribute,
AudioFormatAttribute
)
config = Config((
Attribute(name="hasnt_yet_started", default_value=False, description="This will be set automatically, to look if it needs to run the scripts that run on start."),
Attribute(name="bitrate", default_value=125, description="Streams the audio with given bitrate [kB/s]. Can't stream with a higher Bitrate, than the audio source provides."),
AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format.
You can use Audio formats which support ID3.2 and ID3.1,
but you will have cleaner Metadata using ID3.2."""),
Attribute(name="result_history", default_value=False, description="""If enabled, you can go back to the previous results.
The consequence is a higher meory consumption, because every result is saved."""),
Attribute(name="history_length", default_value=8, description="""You can choose how far back you can go in the result history.
The further you choose to be able to go back, the higher the memory usage.
'-1' removes the Limit entirely."""),
EmptyLine(),
Attribute(name="sort_by_date", default_value=True, description="If this is set to true, it will set the albumsort attribute such that,\nthe albums are sorted by date"),
Attribute(name="sort_album_by_type", default_value=True, description="""If this is set to true, it will set the albumsort attribute such that,
the albums are put into categories before being sorted.
This means for example, the Studio Albums and EP's are always in front of Singles, and Compilations are in the back."""),
Attribute(name="download_path", default_value="{genre}/{artist}/{album}", description="""There are multiple fields, you can use for the path and file name:
- genre
- label
- artist
- album
- song
- album_type
The folder music kraken should put the songs into."""),
Attribute(name="download_file", default_value="{song}.{audio_format}", description="The filename of the audio file."),
SelectAttribute(name="album_type_blacklist", default_value=[
"Compilation Album",
"Live Album",
"Mixtape"
], options=("Studio Album", "EP (Extended Play)", "Single", "Live Album", "Compilation Album", "Mixtape", "Demo", "Other"), description="""Music Kraken ignores all albums of those types.
Following album types exist in the programm:"""),
EmptyLine(),
Attribute(name="proxies", default_value=[], description="This is a dictionary."),
Attribute(name="tor", default_value=False, description="""Route ALL traffic through Tor.
If you use Tor, make sure the Tor browser is installed, and running.I can't guarantee maximum security though!"""),
Attribute(name="tor_port", default_value=9150, description="The port, tor is listening. If tor is already working, don't change it."),
Attribute(name="chunk_size", default_value=1024, description="Size of the chunks that are streamed.\nHere could be some room for improvement."),
Attribute(name="show_download_errors_threshold", default_value=0.3, description="""If the percentage of failed downloads goes over this threshold,
all the error messages are shown."""),
Attribute(
name="language",
default_value="en-US,en;q=0.6",
description="The language of the program. This will be used to translate the program in the future.\n"
"Currently it just sets the Accept-Language header.\n"
"https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language"
),
Attribute(
name="user_agent",
default_value="Mozilla/5.0 (X11; Linux x86_64; rv:90.0) Gecko/20100101 Firefox/90.0",
description="The user agent of the program. This will be used to translate the program in the future.\n"
"Currently it just sets the User-Agent header.\n"
"https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent"
),
Attribute(
name="tries_per_proxy",
default_value=2,
description="The retries it should do. These can be overridden by the program, at certain places, and they have to be.",
),
EmptyLine(),
PathAttribute(name="music_directory", default_value=LOCATIONS.MUSIC_DIRECTORY.resolve(), description="The directory, all the music will be downloaded to."),
PathAttribute(name="temp_directory", default_value=LOCATIONS.TEMP_DIRECTORY.resolve(), description="All temporary stuff is gonna be dumped in this directory."),
PathAttribute(name="log_file", default_value=LOCATIONS.get_log_file("download_logs.log").resolve()),
PathAttribute(name="ffmpeg_binary", default_value=LOCATIONS.FFMPEG_BIN.resolve(), description="Set the path to the ffmpeg binary."),
PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(), description="Set the path of the cache directory."),
Attribute(
name="not_a_genre_regex",
description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n"
"it should ignore, and not count to genres",
default_value=[
r'^\.' # is hidden/starts with a "."
]
),
EmptyLine(),
Attribute(name="happy_messages", default_value=[
"Support the artist.",
"Star Me: https://github.com/HeIIow2/music-downloader",
"🏳️‍⚧️🏳️‍⚧️ Trans rights are human rights. 🏳️‍⚧️🏳️‍⚧️",
"🏳️‍⚧️🏳️‍⚧️ Trans women are women, trans men are men, and enbies are enbies. 🏳️‍⚧️🏳️‍⚧️",
"🏴‍☠️🏴‍☠️ Unite under one flag, fck borders. 🏴‍☠️🏴‍☠️",
"Join my Matrix Space: https://matrix.to/#/#music-kraken:matrix.org",
"BPJM does cencorship.",
"🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️",
"Klassenkampf",
"Rise Proletarians!!"
], description="""Just some nice and wholesome messages.
If your mindset has traits of a [file corruption], you might not agree.
But anyways... Freedom of thought, so go ahead and change the messages."""),
Attribute(name="modify_gc", default_value=True),
Attribute(name="id_bits", default_value=64, description="I really dunno why I even made this a setting.. Modifying this is a REALLY dumb idea."),
Description("🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️\n"),
), LOCATIONS.get_config_file("main"))
class SettingsStructure(TypedDict):
hasnt_yet_started: bool
result_history: bool
history_length: int
happy_messages: List[str]
modify_gc: bool
id_bits: int
# audio
bitrate: int
audio_format: str
sort_by_date: bool
sort_album_by_type: bool
download_path: str
download_file: str
album_type_blacklist: List[str]
# connection
proxies: List[dict[str, str]]
tries_per_proxy: int
tor: bool
tor_port: int
chunk_size: int
show_download_errors_threshold: float
language: str
user_agent: str
# paths
music_directory: Path
temp_directory: Path
log_file: Path
not_a_genre_regex: List[str]
ffmpeg_binary: Path
cache_directory: Path

View File

@@ -0,0 +1,113 @@
from typing import TypedDict, List
from urllib.parse import ParseResult
from logging import Logger
from pathlib import Path
from ...path_manager import LOCATIONS
from ..config import Config
from ..attributes.attribute import Attribute
from ..attributes.special_attributes import SelectAttribute, PathAttribute, UrlAttribute
config = Config((
Attribute(name="use_youtube_alongside_youtube_music", default_value=False, description="""If set to true, it will search youtube through invidious and piped,
despite a direct wrapper for the youtube music INNERTUBE api being implemented.
I my INNERTUBE api wrapper doesn't work, set this to true."""),
UrlAttribute(name="invidious_instance", default_value="https://yt.artemislena.eu", description="""This is an attribute, where you can define the invidious instances,
the youtube downloader should use.
Here is a list of active ones: https://docs.invidious.io/instances/
Instances that use cloudflare or have source code changes could cause issues.
Hidden instances (.onion) will only work, when setting 'tor=true'."""),
UrlAttribute(name="piped_instance", default_value="https://piped-api.privacy.com.de", description="""This is an attribute, where you can define the pioed instances,
the youtube downloader should use.
Here is a list of active ones: https://github.com/TeamPiped/Piped/wiki/Instances
Instances that use cloudflare or have source code changes could cause issues.
Hidden instances (.onion) will only work, when setting 'tor=true"""),
Attribute(name="sleep_after_youtube_403", default_value=30, description="The time to wait, after youtube returned 403 (in seconds)"),
Attribute(name="youtube_music_api_key", default_value="AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", description="""This is the API key used by YouTube-Music internally.
Dw. if it is empty, Rachel will fetch it automatically for you <333
(she will also update outdated api keys/those that don't work)"""),
Attribute(name="youtube_music_clean_data", default_value=True, description="If set to true, it exclusively fetches artists/albums/songs, not things like user channels etc."),
UrlAttribute(name="youtube_url", default_value=[
"https://www.youtube.com/",
"https://www.youtu.be/",
"https://music.youtube.com/",
], description="""This is used to detect, if an url is from youtube, or any alternativ frontend.
If any instance seems to be missing, run music kraken with the -f flag."""),
Attribute(name="use_sponsor_block", default_value=True, description="Use sponsor block to remove adds or simmilar from the youtube videos."),
Attribute(name="player_url", default_value="https://music.youtube.com/s/player/80b90bfd/player_ias.vflset/en_US/base.js", description="""
This is needed to fetch videos without invidious
"""),
Attribute(name="youtube_music_consent_cookies", default_value={
"CONSENT": "PENDING+258"
}, description="The cookie with the key CONSENT says to what stuff you agree. Per default you decline all cookies, but it honestly doesn't matter."),
Attribute(name="youtube_music_innertube_context", default_value={
"client": {
"hl": "en",
"gl": "DE",
"remoteHost": "87.123.241.77",
"deviceMake": "",
"deviceModel": "",
"visitorData": "CgtiTUxaTHpoXzk1Zyia59WlBg%3D%3D",
"userAgent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
"clientName": "WEB_REMIX",
"clientVersion": "1.20230710.01.00",
"osName": "X11",
"osVersion": "",
"originalUrl": "https://music.youtube.com/",
"platform": "DESKTOP",
"clientFormFactor": "UNKNOWN_FORM_FACTOR",
"configInfo": {
"appInstallData": "",
"coldConfigData": "",
"coldHashData": "",
"hotHashData": ""
},
"userInterfaceTheme": "USER_INTERFACE_THEME_DARK",
"timeZone": "Atlantic/Jan_Mayen",
"browserName": "Firefox",
"browserVersion": "115.0",
"acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"deviceExperimentId": "ChxOekkxTmpnek16UTRNVFl4TkRrek1ETTVOdz09EJrn1aUGGJrn1aUG",
"screenWidthPoints": 584,
"screenHeightPoints": 939,
"screenPixelDensity": 1,
"screenDensityFloat": 1,
"utcOffsetMinutes": 120,
"musicAppInfo": {
"pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_UNKNOWN",
"webDisplayMode": "WEB_DISPLAY_MODE_BROWSER",
"storeDigitalGoodsApiSupportStatus": {
"playStoreDigitalGoodsApiSupportStatus": "DIGITAL_GOODS_API_SUPPORT_STATUS_UNSUPPORTED"
}
}
},
"user": { "lockedSafetyMode": False },
"request": {
"useSsl": True,
"internalExperimentFlags": [],
"consistencyTokenJars": []
},
"adSignalsInfo": {
"params": []
}
}, description="Don't bother about this. It is something technical, but if you wanna change the innertube requests... go on."),
Attribute(name="ytcfg", description="Please... ignore it.", default_value={})
), LOCATIONS.get_config_file("youtube"))
class SettingsStructure(TypedDict):
use_youtube_alongside_youtube_music: bool
invidious_instance: ParseResult
piped_instance: ParseResult
sleep_after_youtube_403: float
youtube_music_api_key: str
youtube_music_clean_data: bool
youtube_url: List[ParseResult]
use_sponsor_block: bool
player_url: str
youtube_music_innertube_context: dict
youtube_music_consent_cookies: dict
ytcfg: dict

View File

@@ -0,0 +1,4 @@
def comment(uncommented_string: str) -> str:
_fragments = uncommented_string.split("\n")
_fragments = ["# " + frag for frag in _fragments]
return "\n".join(_fragments)

View File

@@ -0,0 +1,21 @@
from pathlib import Path
import json
from .path_manager import LOCATIONS
def dump_to_file(file_name: str, payload: str, is_json: bool = False, exit_after_dump: bool = True):
path = Path(LOCATIONS.TEMP_DIRECTORY, file_name)
print(f"Dumping payload to: \"{path}\"")
if is_json:
payload = json.dumps(json.loads(payload), indent=4)
if isinstance(payload, dict):
payload = json.dumps(payload, indent=4)
with path.open("w") as f:
f.write(payload)
if exit_after_dump:
exit()

View File

@@ -0,0 +1 @@
from .source import SourcePages

View File

@@ -0,0 +1,26 @@
from enum import Enum
class AlbumStatus(Enum):
"""
Enum class representing the possible statuses of an album.
"""
UNRELEASED = "Unreleased"
RELEASED = "Released"
LEAKED = "Leaked"
OFFICIAL = "Official"
BOOTLEG = "Bootleg"
class AlbumType(Enum):
"""
Enum class representing the possible types of an album.
"""
STUDIO_ALBUM = "Studio Album"
EP = "EP (Extended Play)"
SINGLE = "Single"
LIVE_ALBUM = "Live Album"
COMPILATION_ALBUM = "Compilation Album"
MIXTAPE = "Mixtape"
DEMO = "Demo"
OTHER = "Other"

View File

@@ -0,0 +1,19 @@
from enum import Enum
class BColors(Enum):
# https://stackoverflow.com/a/287944
HEADER = "\033[95m"
OKBLUE = "\033[94m"
OKCYAN = "\033[96m"
OKGREEN = "\033[92m"
WARNING = "\033[93m"
FAIL = "\033[91m"
ENDC = "\033[0m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
GREY = "\x1b[38;20m"
YELLOW = "\x1b[33;20m"
RED = "\x1b[31;20m"
BOLD_RED = "\x1b[31;1m"

View File

@@ -0,0 +1,7 @@
from enum import Enum
class ContactMethod(Enum):
EMAIL = "email"
PHONE = "phone"
FAX = "fax"

View File

@@ -0,0 +1,50 @@
from enum import Enum
class SourceTypes(Enum):
SONG = "song"
ALBUM = "album"
ARTIST = "artist"
LYRICS = "lyrics"
class SourcePages(Enum):
YOUTUBE = "youtube"
MUSIFY = "musify"
YOUTUBE_MUSIC = "youtube music"
GENIUS = "genius"
MUSICBRAINZ = "musicbrainz"
ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum"
BANDCAMP = "bandcamp"
DEEZER = "deezer"
SPOTIFY = "spotify"
# This has nothing to do with audio, but bands can be here
WIKIPEDIA = "wikipedia"
INSTAGRAM = "instagram"
FACEBOOK = "facebook"
TWITTER = "twitter" # I will use nitter though lol
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
MANUAL = "manual"
PRESET = "preset"
@classmethod
def get_homepage(cls, attribute) -> str:
homepage_map = {
cls.YOUTUBE: "https://www.youtube.com/",
cls.MUSIFY: "https://musify.club/",
cls.MUSICBRAINZ: "https://musicbrainz.org/",
cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/",
cls.GENIUS: "https://genius.com/",
cls.BANDCAMP: "https://bandcamp.com/",
cls.DEEZER: "https://www.deezer.com/",
cls.INSTAGRAM: "https://www.instagram.com/",
cls.FACEBOOK: "https://www.facebook.com/",
cls.SPOTIFY: "https://open.spotify.com/",
cls.TWITTER: "https://twitter.com/",
cls.MYSPACE: "https://myspace.com/",
cls.WIKIPEDIA: "https://en.wikipedia.org/wiki/Main_Page"
}
return homepage_map[attribute]

View File

@@ -0,0 +1 @@
__all__ = ["config"]

View File

@@ -0,0 +1,28 @@
class SettingException(Exception):
pass
class SettingNotFound(SettingException):
def __init__(self, setting_name: str):
self.setting_name = setting_name
def __str__(self):
return f"Setting '{self.setting_name}' not found."
class SettingValueError(SettingException):
def __init__(self, setting_name: str, setting_value: str, rule: str):
"""
The rule has to be such, that the following format makes sense:
{name} {rule}, not '{value}'
:param setting_name:
:param setting_value:
:param rule:
"""
self.setting_name = setting_name
self.setting_value = setting_value
self.rule = rule
def __str__(self):
return f"{self.setting_name} {self.rule}, not '{self.setting_value}'."

View File

@@ -0,0 +1,11 @@
class DownloadException(Exception):
pass
class UrlNotFoundException(DownloadException):
def __init__(self, url: str, *args: object) -> None:
self.url = url
super().__init__(*args)
def __str__(self) -> str:
return f"Couldn't find the page of {self.url}"

View File

@@ -0,0 +1,10 @@
class ObjectException(Exception):
pass
class IsDynamicException(Exception):
"""
Gets raised, if a dynamic data object tries to perform an action,
which does not make sense for a dynamic object.
"""
pass

View File

@@ -0,0 +1,11 @@
import os
from datetime import datetime
def clear_console():
os.system('cls' if os.name in ('nt', 'dos') else 'clear')
def get_current_millis() -> int:
dt = datetime.now()
return int(dt.microsecond / 1_000)

View File

@@ -0,0 +1,198 @@
# -*- encoding: utf-8 -*-
# merge_args v0.1.5
# Merge signatures of two functions with Advanced Hackery.
# Copyright © 2018-2023, Chris Warrick.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the author of this software nor the names of
# contributors to this software may be used to endorse or promote
# products derived from this software without specific prior written
# consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Merge signatures of two functions with Advanced Hackery. Useful for wrappers.
Usage: @merge_args(old_function)
"""
import inspect
import itertools
import types
import functools
import sys
import typing
__version__ = '0.1.5'
__all__ = ('merge_args',)
PY38 = sys.version_info >= (3, 8)
PY310 = sys.version_info >= (3, 10)
PY311 = sys.version_info >= (3, 11)
def _blank(): # pragma: no cover
pass
def _merge(
source,
dest,
drop_args: typing.Optional[typing.List[str]] = None,
drop_kwonlyargs: typing.Optional[typing.List[str]] = None,
):
"""Merge the signatures of ``source`` and ``dest``.
``dest`` args go before ``source`` args in all three categories
(positional, keyword-maybe, keyword-only).
"""
if drop_args is None:
drop_args = []
if drop_kwonlyargs is None:
drop_kwonlyargs = []
source_spec = inspect.getfullargspec(source)
dest_spec = inspect.getfullargspec(dest)
if source_spec.varargs or source_spec.varkw:
return dest
source_all = source_spec.args
dest_all = dest_spec.args
if source_spec.defaults:
source_pos = source_all[:-len(source_spec.defaults)]
source_kw = source_all[-len(source_spec.defaults):]
else:
source_pos = source_all
source_kw = []
if dest_spec.defaults:
dest_pos = dest_all[:-len(dest_spec.defaults)]
dest_kw = dest_all[-len(dest_spec.defaults):]
else:
dest_pos = dest_all
dest_kw = []
args_merged = dest_pos
for a in source_pos:
if a not in args_merged and a not in drop_args:
args_merged.append(a)
defaults_merged = []
for a, default in itertools.chain(
zip(dest_kw, dest_spec.defaults or []),
zip(source_kw, source_spec.defaults or [])
):
if a not in args_merged and a not in drop_args:
args_merged.append(a)
defaults_merged.append(default)
kwonlyargs_merged = dest_spec.kwonlyargs
for a in source_spec.kwonlyargs:
if a not in kwonlyargs_merged and a not in drop_kwonlyargs:
kwonlyargs_merged.append(a)
args_all = tuple(args_merged + kwonlyargs_merged)
if PY38:
replace_kwargs = {
'co_argcount': len(args_merged),
'co_kwonlyargcount': len(kwonlyargs_merged),
'co_posonlyargcount': dest.__code__.co_posonlyargcount,
'co_nlocals': len(args_all),
'co_flags': source.__code__.co_flags,
'co_varnames': args_all,
'co_filename': dest.__code__.co_filename,
'co_name': dest.__code__.co_name,
'co_firstlineno': dest.__code__.co_firstlineno,
}
if PY310:
replace_kwargs['co_linetable'] = dest.__code__.co_linetable
else:
replace_kwargs['co_lnotab'] = dest.__code__.co_lnotab
if PY311:
replace_kwargs['co_exceptiontable'] = dest.__code__.co_exceptiontable
replace_kwargs['co_qualname'] = dest.__code__.co_qualname
passer_code = _blank.__code__.replace(**replace_kwargs)
else:
passer_args = [
len(args_merged),
len(kwonlyargs_merged),
_blank.__code__.co_nlocals,
_blank.__code__.co_stacksize,
source.__code__.co_flags,
_blank.__code__.co_code, (), (),
args_all, dest.__code__.co_filename,
dest.__code__.co_name,
dest.__code__.co_firstlineno,
dest.__code__.co_lnotab,
]
passer_code = types.CodeType(*passer_args)
passer = types.FunctionType(passer_code, globals())
dest.__wrapped__ = passer
# annotations
# ensure we take destinations return annotation
has_dest_ret = 'return' in dest.__annotations__
if has_dest_ret:
dest_ret = dest.__annotations__['return']
for v in ('__kwdefaults__', '__annotations__'):
out = getattr(source, v)
if out is None:
out = {}
if getattr(dest, v) is not None:
out = out.copy()
out.update(getattr(dest, v))
setattr(passer, v, out)
if has_dest_ret:
passer.__annotations__['return'] = dest_ret
dest.__annotations__ = passer.__annotations__
passer.__defaults__ = tuple(defaults_merged)
if not dest.__doc__:
dest.__doc__ = source.__doc__
return dest
def merge_args(
source,
drop_args: typing.Optional[typing.List[str]] = None,
drop_kwonlyargs: typing.Optional[typing.List[str]] = None,
):
"""Merge the signatures of two functions."""
try:
return functools.partial(
lambda x, y: _merge(x, y, drop_args, drop_kwonlyargs), source
)
except TypeError:
pass

View File

@@ -0,0 +1,24 @@
from datetime import date
def get_elem_from_obj(current_object, keys: list, after_process=lambda x: x, return_if_none=None):
current_object = current_object
for key in keys:
if key in current_object or (type(key) == int and key < len(current_object)):
current_object = current_object[key]
else:
return return_if_none
return after_process(current_object)
def parse_music_brainz_date(mb_date: str) -> date:
year = 1
month = 1
day = 1
first_release_date = mb_date
if first_release_date.count("-") == 2:
year, month, day = [int(i) for i in first_release_date.split("-")]
elif first_release_date.count("-") == 0 and first_release_date.isdigit():
year = int(first_release_date)
return date(year, month, day)

View File

@@ -0,0 +1,3 @@
from .locations import Locations
LOCATIONS = Locations()

View File

@@ -0,0 +1,7 @@
from pathlib import Path
import platformdirs
def get_config_directory(application_name: str) -> Path:
return platformdirs.user_config_path(appname=application_name)

View File

@@ -0,0 +1,88 @@
import configparser
from pathlib import Path
import os
from os.path import expandvars
import logging
from sys import platform
import tempfile
from typing import Optional
from pyffmpeg import FFmpeg
from .music_directory import get_music_directory
from .config_directory import get_config_directory
class Locations:
@staticmethod
def _get_env(key: str, default: Path, default_for_windows: bool = True) -> Optional[Path]:
res = os.environ.get(key.upper())
if res is not None:
return res
xdg_user_dirs_file = os.environ.get("XDG_CONFIG_HOME") or Path(Path.home(), ".config", "user-dirs.dirs")
xdg_user_dirs_default_file = Path("/etc/xdg/user-dirs.defaults")
def get_dir_from_xdg_file(xdg_file_path: os.PathLike) -> Optional[Path]:
nonlocal key
try:
with open(xdg_file_path, 'r') as f:
data = "[XDG_USER_DIRS]\n" + f.read()
config = configparser.ConfigParser(allow_no_value=True)
config.read_string(data)
xdg_config = config['XDG_USER_DIRS']
return Path(expandvars(xdg_config[key.lower()].strip('"')))
except (FileNotFoundError, KeyError) as e:
logging.warning(
f"Missing file or No entry found for \"{key}\" in: \"{xdg_file_path}\".\n"
)
logging.debug(str(e))
res = get_dir_from_xdg_file(xdg_user_dirs_file)
if res is not None:
return res
res = get_dir_from_xdg_file(xdg_user_dirs_default_file)
if res is not None:
return res
logging.warning(f"couldn't find a {key}, falling back to: {default}")
if not default_for_windows and platform == "linux":
return
return default
def __init__(self, application_name: os.PathLike = "music-kraken"):
self.FILE_ENCODING: str = "utf-8"
self.TEMP_DIRECTORY = Path(tempfile.gettempdir(), application_name)
self.TEMP_DIRECTORY.mkdir(exist_ok=True, parents=True)
self.MUSIC_DIRECTORY = get_music_directory()
self.CONFIG_DIRECTORY = get_config_directory(str(application_name))
self.CONFIG_DIRECTORY.mkdir(exist_ok=True, parents=True)
self.CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf")
self.LEGACY_CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf")
self.CACHE_DIRECTORY = self._get_env("XDG_CACHE_HOME", Path(Path.home(), ".cache"))
if self.CACHE_DIRECTORY is None:
logging.warning(f"Could not find a cache dir. Falling back to the temp dir: {self.TEMP_DIRECTORY}")
self.CACHE_DIRECTORY = self.TEMP_DIRECTORY
else:
self.CACHE_DIRECTORY = Path(self.CACHE_DIRECTORY, application_name)
self.CACHE_DIRECTORY.mkdir(parents=True, exist_ok=True)
self.FFMPEG_BIN = Path(FFmpeg(enable_log=False).get_ffmpeg_bin())
def get_config_file(self, config_name: str) -> Path:
return Path(self.CONFIG_DIRECTORY, f"{config_name}.toml")
def get_log_file(self, file_name: os.PathLike) -> Path:
return Path(self.TEMP_DIRECTORY, file_name)

View File

@@ -0,0 +1,58 @@
import os
from pathlib import Path
from typing import Optional
from sys import platform
import logging
from os.path import expandvars
import configparser
DEFAULT_MUSIC_DIRECTORY = Path(Path.home(), "Music")
def get_xdg_music_directory() -> Path:
"""
gets the xdg music directory, for all the linux or bsd folks!
Thanks to Distant Thunder, as well as Kevin Gruber for making that pull request:
https://github.com/HeIIow2/music-downloader/pull/6
XDG_USER_DIRS_FILE reference:
https://freedesktop.org/wiki/Software/xdg-user-dirs/
https://web.archive.org/web/20230322012953/https://freedesktop.org/wiki/Software/xdg-user-dirs/
"""
xdg_user_dirs_file = Path(os.environ.get("XDG_CONFIG_HOME") or Path(Path.home(), ".config"), "user-dirs.dirs")
xdg_user_dirs_default_file = Path("/etc/xdg/user-dirs.defaults")
def get_music_dir_from_xdg_file(xdg_file_path: os.PathLike) -> Optional[Path]:
try:
with open(xdg_file_path, 'r') as f:
data = "[XDG_USER_DIRS]\n" + f.read()
config = configparser.ConfigParser(allow_no_value=True)
config.read_string(data)
xdg_config = config['XDG_USER_DIRS']
return Path(expandvars(xdg_config['xdg_music_dir'].strip('"')))
except (FileNotFoundError, KeyError) as e:
logging.warning(
f"Missing file or No entry found for \"xdg_music_dir\" in: \"{xdg_file_path}\".\n"
)
logging.debug(str(e))
music_dir = get_music_dir_from_xdg_file(xdg_user_dirs_file)
if music_dir is not None:
return music_dir
music_dir = get_music_dir_from_xdg_file(xdg_user_dirs_default_file)
if music_dir is not None:
return music_dir
logging.warning(f"couldn't find a XDG music dir, falling back to: {DEFAULT_MUSIC_DIRECTORY}")
return DEFAULT_MUSIC_DIRECTORY
def get_music_directory() -> Path:
if platform != "linux":
return DEFAULT_MUSIC_DIRECTORY
return get_xdg_music_directory()

View File

@@ -0,0 +1,57 @@
import jellyfish
import string
TITLE_THRESHOLD_LEVENSHTEIN = 1
UNIFY_TO = " "
ALLOWED_LENGTH_DISTANCE = 20
def unify_punctuation(to_unify: str) -> str:
for char in string.punctuation:
to_unify = to_unify.replace(char, UNIFY_TO)
return to_unify
def remove_feature_part_from_track(title: str) -> str:
if ")" != title[-1]:
return title
if "(" not in title:
return title
return title[:title.index("(")]
def modify_title(to_modify: str) -> str:
to_modify = to_modify.strip()
to_modify = to_modify.lower()
to_modify = remove_feature_part_from_track(to_modify)
to_modify = unify_punctuation(to_modify)
return to_modify
def match_titles(title_1: str, title_2: str):
title_1, title_2 = modify_title(title_1), modify_title(title_2)
distance = jellyfish.levenshtein_distance(title_1, title_2)
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
def match_artists(artist_1, artist_2: str):
if type(artist_1) == list:
distances = []
for artist_1_ in artist_1:
match, distance = match_titles(artist_1_, artist_2)
if not match:
return match, distance
distances.append(distance)
return True, min(distances)
return match_titles(artist_1, artist_2)
def match_length(length_1: int | None, length_2: int | None) -> bool:
# returning true if either one is Null, because if one value is not known,
# then it shouldn't be an attribute which could reject an audio source
if length_1 is None or length_2 is None:
return True
return abs(length_1 - length_2) <= ALLOWED_LENGTH_DISTANCE

View File

@@ -0,0 +1,3 @@
URL_PATTERN = r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+"
INT_PATTERN = r"^\d*$"
FLOAT_PATTERN = r"^[\d|\,|\.]*$"

View File

@@ -0,0 +1,34 @@
import random
from .path_manager import LOCATIONS
from .config import main_settings
DEBUG = True
DEBUG_LOGGING = DEBUG and True
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
DEBUG_PAGES = DEBUG and False
if DEBUG:
print("DEBUG ACTIVE")
def get_random_message() -> str:
return random.choice(main_settings['happy_messages'])
CONFIG_DIRECTORY = LOCATIONS.CONFIG_DIRECTORY
HIGHEST_ID = 2 ** main_settings['id_bits']
HELP_MESSAGE = """to search:
> s: {query or url}
> s: https://musify.club/release/some-random-release-183028492
> s: #a {artist} #r {release} #t {track}
to download:
> d: {option ids or direct url}
> d: 0, 3, 4
> d: 1
> d: https://musify.club/release/some-random-release-183028492
have fun :3""".strip()

View File

@@ -0,0 +1,84 @@
from typing import Tuple, Union
from pathlib import Path
from transliterate.exceptions import LanguageDetectionError
from transliterate import translit
from pathvalidate import sanitize_filename
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
"(official video)",
)
def unify(string: str) -> str:
"""
returns a unified str, to make comparisons easy.
a unified string has the following attributes:
- is lowercase
"""
try:
string = translit(string, reversed=True)
except LanguageDetectionError:
pass
return string.lower()
def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
def fit_string(string: str) -> str:
if string == "/":
return "/"
string = string.strip()
while string[0] == ".":
if len(string) == 0:
return string
string = string[1:]
string = string.replace("/", "_").replace("\\", "_")
string = sanitize_filename(string)
return string
if isinstance(string, Path):
return Path(*(fit_string(part) for part in string.parts))
else:
return fit_string(string)
def clean_song_title(raw_song_title: str, artist_name: str) -> str:
"""
This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
cleans:
- `artist - song` -> `song`
- `song (Official Video)` -> `song`
- ` song` -> `song`
- `song (prod. some producer)`
"""
raw_song_title = raw_song_title.strip()
artist_name = artist_name.strip()
# Clean official Video appendix
for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
if raw_song_title.lower().endswith(dirty_appendix):
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
return raw_song_title.strip()
def comment(uncommented_string: str) -> str:
_fragments = uncommented_string.split("\n")
_fragments = ["# " + frag for frag in _fragments]
return "\n".join(_fragments)

View File

@@ -0,0 +1 @@
from .thread_classes import EndThread, FinishedSearch

View File

@@ -0,0 +1,98 @@
from dataclasses import dataclass, field
from typing import List, Tuple
from ...utils.config import main_settings, logging_settings
from ...utils.enums.colors import BColors
from ...objects import Target
UNIT_PREFIXES: List[str] = ["", "k", "m", "g", "t"]
UNIT_DIVISOR = 1024
LOGGER = logging_settings["download_logger"]
@dataclass
class DownloadResult:
total: int = 0
fail: int = 0
sponsor_segments: int = 0
error_message: str = None
total_size = 0
found_on_disk: int = 0
_error_message_list: List[str] = field(default_factory=list)
@property
def success(self) -> int:
return self.total - self.fail
@property
def success_percentage(self) -> float:
if self.total == 0:
return 0
return self.success / self.total
@property
def failure_percentage(self) -> float:
if self.total == 0:
return 1
return self.fail / self.total
@property
def is_fatal_error(self) -> bool:
return self.error_message is not None
@property
def is_mild_failure(self) -> bool:
if self.is_fatal_error:
return True
return self.failure_percentage > main_settings["show_download_errors_threshold"]
def _size_val_unit_pref_ind(self, val: float, ind: int) -> Tuple[float, int]:
if val < UNIT_DIVISOR:
return val, ind
if ind >= len(UNIT_PREFIXES):
return val, ind
return self._size_val_unit_pref_ind(val=val / UNIT_DIVISOR, ind=ind + 1)
@property
def formated_size(self) -> str:
total_size, prefix_index = self._size_val_unit_pref_ind(self.total_size, 0)
return f"{total_size:.{2}f} {UNIT_PREFIXES[prefix_index]}B"
def add_target(self, target: Target):
self.total_size += target.size
def merge(self, other: "DownloadResult"):
if other.is_fatal_error:
LOGGER.debug(other.error_message)
self._error_message_list.append(other.error_message)
self.total += 1
self.fail += 1
else:
self.total += other.total
self.fail += other.fail
self._error_message_list.extend(other._error_message_list)
self.sponsor_segments += other.sponsor_segments
self.total_size += other.total_size
self.found_on_disk += other.found_on_disk
def __str__(self):
if self.is_fatal_error:
return self.error_message
head = f"{self.fail} from {self.total} downloads failed:\n" \
f"success-rate:\t{int(self.success_percentage * 100)}%\n" \
f"fail-rate:\t{int(self.failure_percentage * 100)}%\n" \
f"total size:\t{self.formated_size}\n" \
f"skipped segments:\t{self.sponsor_segments}\n" \
f"found on disc:\t{self.found_on_disk}"
if not self.is_mild_failure:
return head
_lines = [head]
_lines.extend(BColors.FAIL.value + s + BColors.ENDC.value for s in self._error_message_list)
return "\n".join(_lines)

View File

@@ -0,0 +1,104 @@
import weakref
from types import FunctionType
from functools import wraps
from typing import Dict, Set
class Lake:
def __init__(self):
self.redirects: Dict[int, int] = {}
self.id_to_object: Dict[int, object] = {}
def get_real_object(self, db_object: object) -> object:
_id = id(db_object)
while _id in self.redirects:
_id = self.redirects[_id]
try:
return self.id_to_object[_id]
except KeyError:
self.add(db_object)
return db_object
def add(self, db_object: object):
self.id_to_object[id(db_object)] = db_object
def override(self, to_override: object, new_db_object: object):
_id = id(to_override)
while _id in self.redirects:
_id = self.redirects[_id]
if id(new_db_object) in self.id_to_object:
print("!!!!!")
self.add(new_db_object)
self.redirects[_id] = id(new_db_object)
# if _id in self.id_to_object:
# del self.id_to_object[_id]
def is_same(self, __object: object, other: object) -> bool:
_self_id = id(__object)
while _self_id in self.redirects:
_self_id = self.redirects[_self_id]
_other_id = id(other)
while _other_id in self.redirects:
_other_id = self.redirects[_other_id]
return _self_id == _other_id
lake = Lake()
def wrapper(method):
@wraps(method)
def wrapped(*args, **kwargs):
return method(*(lake.get_real_object(args[0]), *args[1:]), **kwargs)
return wrapped
class BaseClass:
def __new__(cls, *args, **kwargs):
instance = cls(*args, **kwargs)
print("new")
lake.add(instance)
return instance
def __eq__(self, other):
return lake.is_same(self, other)
def _risky_merge(self, to_replace):
lake.override(to_replace, self)
class MetaClass(type):
def __new__(meta, classname, bases, classDict):
bases = (*bases, BaseClass)
newClassDict = {}
ignore_functions: Set[str] = {"__new__", "__init__"}
for attributeName, attribute in classDict.items():
if isinstance(attribute, FunctionType) and (attributeName not in ignore_functions):
"""
The funktion new and init shouldn't be accounted for because we can assume the class is
independent on initialization.
"""
attribute = wrapper(attribute)
newClassDict[attributeName] = attribute
print()
for key, value in object.__dict__.items():
# hasattr( value, '__call__' ) and
if hasattr(value, '__call__') and value not in newClassDict and key not in ("__new__", "__init__"):
newClassDict[key] = wrapper(value)
new_instance = type.__new__(meta, classname, bases, newClassDict)
lake.add(new_instance)
return new_instance

View File

@@ -0,0 +1,32 @@
from typing import Optional, List
from ...objects import Artist, Album, Song, DatabaseObject
class Query:
def __init__(
self,
raw_query: str = "",
music_object: DatabaseObject = None
) -> None:
self.raw_query: str = raw_query
self.music_object: Optional[DatabaseObject] = music_object
@property
def is_raw(self) -> bool:
return self.music_object is None
@property
def default_search(self) -> List[str]:
if self.music_object is None:
return [self.raw_query]
if isinstance(self.music_object, Artist):
return [self.music_object.name]
if isinstance(self.music_object, Song):
return [f"{artist.name} - {self.music_object}" for artist in self.music_object.main_artist_collection]
if isinstance(self.music_object, Album):
return [f"{artist.name} - {self.music_object}" for artist in self.music_object.artist_collection]
return [self.raw_query]

View File

@@ -0,0 +1,12 @@
class EndThread:
_has_ended: bool = False
def __bool__(self):
return self._has_ended
def exit(self):
self._has_ended
class FinishedSearch:
pass