diff --git a/.vscode/settings.json b/.vscode/settings.json index aca8e78..662ba25 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,12 +20,16 @@ "Bandcamp", "dotenv", "encyclopaedia", + "ENDC", "levenshtein", "metallum", "musify", "OKBLUE", + "pathvalidate", "Referer", + "sponsorblock", "tracksort", + "translit", "unmap", "youtube" ] diff --git a/assets/database_structure.sql b/assets/database_structure.sql deleted file mode 100644 index 293f428..0000000 --- a/assets/database_structure.sql +++ /dev/null @@ -1,66 +0,0 @@ -DROP TABLE IF EXISTS artist; -CREATE TABLE artist ( - id TEXT PRIMARY KEY NOT NULL, - name TEXT -); - -DROP TABLE IF EXISTS artist_release_group; -CREATE TABLE artist_release_group ( - artist_id TEXT NOT NULL, - release_group_id TEXT NOT NULL -); - -DROP TABLE IF EXISTS artist_track; -CREATE TABLE artist_track ( - artist_id TEXT NOT NULL, - track_id TEXT NOT NULL -); - -DROP TABLE IF EXISTS release_group; -CREATE TABLE release_group ( - id TEXT PRIMARY KEY NOT NULL, - albumartist TEXT, - albumsort INT, - musicbrainz_albumtype TEXT, - compilation TEXT, - album_artist_id TEXT -); - -DROP TABLE IF EXISTS release_; -CREATE TABLE release_ ( - id TEXT PRIMARY KEY NOT NULL, - release_group_id TEXT NOT NULL, - title TEXT, - copyright TEXT, - album_status TEXT, - language TEXT, - year TEXT, - date TEXT, - country TEXT, - barcode TEXT -); - -DROP TABLE IF EXISTS track; -CREATE TABLE track ( - id TEXT PRIMARY KEY NOT NULL, - downloaded BOOLEAN NOT NULL DEFAULT 0, - release_id TEXT NOT NULL, - track TEXT, - length INT, - tracknumber TEXT, - isrc TEXT, - genre TEXT, - lyrics TEXT, - path TEXT, - file TEXT, - url TEXT, - src TEXT -); - -DROP TABLE IF EXISTS source; -CREATE TABLE source ( - track_id TEXT NOT NULL, - src TEXT NOT NULL, - url TEXT NOT NULL, - valid BOOLEAN NOT NULL DEFAULT 1 -); diff --git a/assets/logo.svg b/assets/logo.svg index 49738fc..eb043bc 100644 --- a/assets/logo.svg +++ b/assets/logo.svg @@ -1,73 +1,35 @@ - - - - - - - - - - - - - + + + + + + + + + + + + + + diff --git a/assets/logos/.png b/assets/logos/.png deleted file mode 100644 index df21544..0000000 Binary files a/assets/logos/.png and /dev/null differ diff --git a/assets/logos/00.jpg b/assets/logos/00.jpg deleted file mode 100644 index a4f0a94..0000000 Binary files a/assets/logos/00.jpg and /dev/null differ diff --git a/assets/logos/01.png b/assets/logos/01.png deleted file mode 100644 index e0c807d..0000000 Binary files a/assets/logos/01.png and /dev/null differ diff --git a/assets/logos/01.svg b/assets/logos/01.svg deleted file mode 100644 index 13e2386..0000000 --- a/assets/logos/01.svg +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/assets/logos/02.png b/assets/logos/02.png deleted file mode 100644 index 1bc91de..0000000 Binary files a/assets/logos/02.png and /dev/null differ diff --git a/assets/logos/02.svg b/assets/logos/02.svg deleted file mode 100644 index 49738fc..0000000 --- a/assets/logos/02.svg +++ /dev/null @@ -1,73 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/assets/logos/craiyon_142854_simplistic_black_and_white_logo_of_an_octopus_wearing_headphones__svg__cute__octupus.png b/assets/logos/craiyon_142854_simplistic_black_and_white_logo_of_an_octopus_wearing_headphones__svg__cute__octupus.png deleted file mode 100644 index 7c75bb2..0000000 Binary files a/assets/logos/craiyon_142854_simplistic_black_and_white_logo_of_an_octopus_wearing_headphones__svg__cute__octupus.png and /dev/null differ diff --git a/assets/logos/craiyon_142854_simplistic_black_and_white_logo_of_an_octopus_wearing_headphones__svg__cute__octupus.svg b/assets/logos/craiyon_142854_simplistic_black_and_white_logo_of_an_octopus_wearing_headphones__svg__cute__octupus.svg deleted file mode 100644 index bac4a90..0000000 --- a/assets/logos/craiyon_142854_simplistic_black_and_white_logo_of_an_octopus_wearing_headphones__svg__cute__octupus.svg +++ /dev/null @@ -1,137 +0,0 @@ - - - - - - - - - - - - - diff --git a/assets/logos/craiyon_144924_simplistic__black_and_white_logo__octopus_wearing_headphones__svg__cute__in_frame__ce.png b/assets/logos/craiyon_144924_simplistic__black_and_white_logo__octopus_wearing_headphones__svg__cute__in_frame__ce.png deleted file mode 100644 index df21544..0000000 Binary files a/assets/logos/craiyon_144924_simplistic__black_and_white_logo__octopus_wearing_headphones__svg__cute__in_frame__ce.png and /dev/null differ diff --git a/assets/temp_database_structure.sql b/assets/temp_database_structure.sql deleted file mode 100644 index 293f428..0000000 --- a/assets/temp_database_structure.sql +++ /dev/null @@ -1,66 +0,0 @@ -DROP TABLE IF EXISTS artist; -CREATE TABLE artist ( - id TEXT PRIMARY KEY NOT NULL, - name TEXT -); - -DROP TABLE IF EXISTS artist_release_group; -CREATE TABLE artist_release_group ( - artist_id TEXT NOT NULL, - release_group_id TEXT NOT NULL -); - -DROP TABLE IF EXISTS artist_track; -CREATE TABLE artist_track ( - artist_id TEXT NOT NULL, - track_id TEXT NOT NULL -); - -DROP TABLE IF EXISTS release_group; -CREATE TABLE release_group ( - id TEXT PRIMARY KEY NOT NULL, - albumartist TEXT, - albumsort INT, - musicbrainz_albumtype TEXT, - compilation TEXT, - album_artist_id TEXT -); - -DROP TABLE IF EXISTS release_; -CREATE TABLE release_ ( - id TEXT PRIMARY KEY NOT NULL, - release_group_id TEXT NOT NULL, - title TEXT, - copyright TEXT, - album_status TEXT, - language TEXT, - year TEXT, - date TEXT, - country TEXT, - barcode TEXT -); - -DROP TABLE IF EXISTS track; -CREATE TABLE track ( - id TEXT PRIMARY KEY NOT NULL, - downloaded BOOLEAN NOT NULL DEFAULT 0, - release_id TEXT NOT NULL, - track TEXT, - length INT, - tracknumber TEXT, - isrc TEXT, - genre TEXT, - lyrics TEXT, - path TEXT, - file TEXT, - url TEXT, - src TEXT -); - -DROP TABLE IF EXISTS source; -CREATE TABLE source ( - track_id TEXT NOT NULL, - src TEXT NOT NULL, - url TEXT NOT NULL, - valid BOOLEAN NOT NULL DEFAULT 1 -); diff --git a/development/actual_donwload.py b/development/actual_donwload.py index 333da4f..a8eb732 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Ghost Bath", - "0", - "d: 1", + "s: #a Crystal F", + "d: 20", ] diff --git a/music_kraken/audio/metadata.py b/music_kraken/audio/metadata.py index a0d8386..1d37419 100644 --- a/music_kraken/audio/metadata.py +++ b/music_kraken/audio/metadata.py @@ -68,9 +68,11 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song): LOGGER.info(str(metadata)) if song.artwork.best_variant is not None: + best_variant = song.artwork.best_variant + r = artwork_connection.get( - url=song.artwork.best_variant["url"], - disable_cache=False, + url=best_variant["url"], + name=song.artwork.get_variant_name(best_variant), ) temp_target: Target = Target.temp() diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index 36541fe..dad0b5d 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -13,6 +13,8 @@ from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult from ..utils.exception.download import UrlNotFoundException from ..utils.enums.colors import BColors +from .. import console + from ..download.results import Results, Option, PageResults from ..download.page_attributes import Pages from ..pages import Page @@ -174,12 +176,14 @@ class Downloader: page_count = 0 for option in self.current_results.formated_generator(max_items_per_page=self.max_displayed_options): if isinstance(option, Option): - color = BColors.BOLD.value if self.pages.is_downloadable(option.music_object) else BColors.GREY.value - print(f"{color}{option.index:0{self.option_digits}} {option.music_object.option_string}{BColors.ENDC.value}") + _downloadable = self.pages.is_downloadable(option.music_object) + + r = f"{BColors.GREY.value}{option.index:0{self.option_digits}}{BColors.ENDC.value} {option.music_object.option_string}" + print(r) else: prefix = ALPHABET[page_count % len(ALPHABET)] print( - f"{BColors.HEADER.value}({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------{BColors.ENDC.value}") + f"{BColors.HEADER.value}({prefix}) --------------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}--------------------{BColors.ENDC.value}") self.page_dict[prefix] = option self.page_dict[option.__name__] = option @@ -211,6 +215,9 @@ class Downloader: return True def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query: + # strip all the values in key_text + key_text = {key: value.strip() for key, value in key_text.items()} + song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True) album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True) artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True) diff --git a/music_kraken/connection/cache.py b/music_kraken/connection/cache.py index c224375..004a6ba 100644 --- a/music_kraken/connection/cache.py +++ b/music_kraken/connection/cache.py @@ -1,12 +1,13 @@ import json from pathlib import Path -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime, timedelta from typing import List, Optional from functools import lru_cache import logging from ..utils.config import main_settings +from ..utils.string_processing import fit_to_file_system @dataclass @@ -17,6 +18,8 @@ class CacheAttribute: created: datetime expires: datetime + additional_info: dict = field(default_factory=dict) + @property def id(self): return f"{self.module}_{self.name}" @@ -31,6 +34,12 @@ class CacheAttribute: return self.__dict__ == other.__dict__ +@dataclass +class CacheResult: + content: bytes + attribute: CacheAttribute + + class Cache: def __init__(self, module: str, logger: logging.Logger): self.module = module @@ -48,13 +57,16 @@ class Cache: self._time_fields = {"created", "expires"} with self.index.open("r") as i: - for c in json.loads(i.read()): - for key in self._time_fields: - c[key] = datetime.fromisoformat(c[key]) + try: + for c in json.loads(i.read()): + for key in self._time_fields: + c[key] = datetime.fromisoformat(c[key]) - ca = CacheAttribute(**c) - self.cached_attributes.append(ca) - self._id_to_attribute[ca.id] = ca + ca = CacheAttribute(**c) + self.cached_attributes.append(ca) + self._id_to_attribute[ca.id] = ca + except json.JSONDecodeError: + pass @lru_cache() def _init_module(self, module: str) -> Path: @@ -63,7 +75,7 @@ class Cache: :return: the module path """ r = Path(self._dir, module) - r.mkdir(exist_ok=True) + r.mkdir(exist_ok=True, parents=True) return r def _write_index(self, indent: int = 4): @@ -99,7 +111,7 @@ class Cache: return True - def set(self, content: bytes, name: str, expires_in: float = 10, module: str = ""): + def set(self, content: bytes, name: str, expires_in: float = 10, module: str = "", additional_info: dict = None): """ :param content: :param module: @@ -110,6 +122,7 @@ class Cache: if name == "": return + additional_info = additional_info or {} module = self.module if module == "" else module module_path = self._init_module(module) @@ -119,27 +132,31 @@ class Cache: name=name, created=datetime.now(), expires=datetime.now() + timedelta(days=expires_in), + additional_info=additional_info, ) self._write_attribute(cache_attribute) - cache_path = Path(module_path, name) + cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True) with cache_path.open("wb") as content_file: self.logger.debug(f"writing cache to {cache_path}") content_file.write(content) - def get(self, name: str) -> Optional[bytes]: - path = Path(self._dir, self.module, name) + def get(self, name: str) -> Optional[CacheResult]: + path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True) if not path.is_file(): return None # check if it is outdated + if f"{self.module}_{name}" not in self._id_to_attribute: + path.unlink() + return existing_attribute: CacheAttribute = self._id_to_attribute[f"{self.module}_{name}"] if not existing_attribute.is_valid: return with path.open("rb") as f: - return f.read() + return CacheResult(content=f.read(), attribute=existing_attribute) def clean(self): keep = set() @@ -148,7 +165,7 @@ class Cache: if ca.name == "": continue - file = Path(self._dir, ca.module, ca.name) + file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True) if not ca.is_valid: self.logger.debug(f"deleting cache {ca.id}") diff --git a/music_kraken/connection/connection.py b/music_kraken/connection/connection.py index 407e21f..d15aa32 100644 --- a/music_kraken/connection/connection.py +++ b/music_kraken/connection/connection.py @@ -15,6 +15,8 @@ from tqdm import tqdm from .cache import Cache from .rotating import RotatingProxy from ..objects import Target +from ..utils import request_trace +from ..utils.string_processing import shorten_display_url from ..utils.config import main_settings from ..utils.support_classes.download_result import DownloadResult from ..utils.hacking import merge_args @@ -123,12 +125,17 @@ class Connection: return headers - def save(self, r: requests.Response, name: str, error: bool = False, **kwargs): + def save(self, r: requests.Response, name: str, error: bool = False, no_update_if_valid_exists: bool = False, **kwargs): n_kwargs = {} if error: n_kwargs["module"] = "failed_requests" - self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration), **n_kwargs) + if self.cache.get(name) is not None and no_update_if_valid_exists: + return + + self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration), additional_info={ + "encoding": r.encoding, + }, **n_kwargs) def request( self, @@ -143,6 +150,7 @@ class Connection: sleep_after_404: float = None, is_heartbeat: bool = False, disable_cache: bool = None, + enable_cache_readonly: bool = False, method: str = None, name: str = "", exclude_headers: List[str] = None, @@ -152,7 +160,7 @@ class Connection: raise AttributeError("method is not set.") method = method.upper() headers = dict() if headers is None else headers - disable_cache = headers.get("Cache-Control", "").lower() == "no-cache" if disable_cache is None else disable_cache + disable_cache = (headers.get("Cache-Control", "").lower() == "no-cache" if disable_cache is None else disable_cache) or kwargs.get("stream", False) accepted_response_codes = self.ACCEPTED_RESPONSE_CODES if accepted_response_codes is None else accepted_response_codes current_kwargs = copy.copy(locals()) @@ -160,6 +168,7 @@ class Connection: current_kwargs.update(**kwargs) parsed_url = urlparse(url) + trace_string = f"{method} {shorten_display_url(url)} \t{'[stream]' if kwargs.get('stream', False) else ''}" if not raw_headers: _headers = copy.copy(self.HEADER_VALUES) @@ -175,15 +184,23 @@ class Connection: request_url = parsed_url.geturl() if not raw_url else url - if name != "" and not disable_cache: + if name != "" and (not disable_cache or enable_cache_readonly): cached = self.cache.get(name) if cached is not None: + request_trace(f"{trace_string}\t[cached]") + with responses.RequestsMock() as resp: + additional_info = cached.attribute.additional_info + + body = cached.content + if additional_info.get("encoding", None) is not None: + body = body.decode(additional_info["encoding"]) + resp.add( method=method, url=request_url, - body=cached, + body=body, ) return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) @@ -199,6 +216,9 @@ class Connection: if header in headers: del headers[header] + if try_count <= 0: + request_trace(trace_string) + r = None connection_failed = False try: @@ -228,10 +248,10 @@ class Connection: self.lock = False if r is None: - self.LOGGER.warning(f"{self.HOST.netloc} didn't respond at {url}. ({try_count}-{self.TRIES})") + self.LOGGER.warning(f"{parsed_url.netloc} didn't respond at {url}. ({try_count}-{self.TRIES})") self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in headers.items())) else: - self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} at {url}. ({try_count}-{self.TRIES})") + self.LOGGER.warning(f"{parsed_url.netloc} responded wit {r.status_code} at {url}. ({try_count}-{self.TRIES})") self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.request.headers.items())) self.LOGGER.debug("response headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.headers.items())) self.LOGGER.debug(r.content) diff --git a/music_kraken/objects/artwork.py b/music_kraken/objects/artwork.py index 9b9b9ce..43ea87e 100644 --- a/music_kraken/objects/artwork.py +++ b/music_kraken/objects/artwork.py @@ -50,6 +50,9 @@ class Artwork: return None return min(self._variant_mapping.values(), key=lambda x: x["deviation"]) + def get_variant_name(self, variant: ArtworkVariant) -> str: + return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" + def __merge__(self, other: Artwork, override: bool = False) -> None: for key, value in other._variant_mapping.items(): if key not in self._variant_mapping or override: diff --git a/music_kraken/objects/cache.py b/music_kraken/objects/cache.py deleted file mode 100644 index 181a13c..0000000 --- a/music_kraken/objects/cache.py +++ /dev/null @@ -1,110 +0,0 @@ -from collections import defaultdict -from typing import Dict, List, Optional -import weakref - -from .parents import DatabaseObject - -""" -This is a cache for the objects, that et pulled out of the database. -This is necessary, to not have duplicate objects with the same id. - -Using a cache that maps the ojects to their id has multiple benefits: - - if you modify the object at any point, all objects with the same id get modified *(copy by reference)* - - less ram usage - - to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work -""" - - -class ObjectCache: - """ - ObjectCache is a cache for the objects retrieved from a database. - It maps each object to its id and uses weak references to manage its memory usage. - Using a cache for these objects provides several benefits: - - - Modifying an object updates all objects with the same id (due to copy by reference) - - Reduced memory usage - - :attr object_to_id: Dictionary that maps DatabaseObjects to their id. - :attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values. - - :method exists: Check if a DatabaseObject already exists in the cache. - :method append: Add a DatabaseObject to the cache if it does not already exist. - :method extent: Add a list of DatabaseObjects to the cache. - :method remove: Remove a DatabaseObject from the cache by its id. - :method get: Retrieve a DatabaseObject from the cache by its id. """ - object_to_id: Dict[str, DatabaseObject] - weakref_map: Dict[weakref.ref, str] - - def __init__(self) -> None: - self.object_to_id = dict() - self.weakref_map = defaultdict() - - def exists(self, database_object: DatabaseObject) -> bool: - """ - Check if a DatabaseObject with the same id already exists in the cache. - - :param database_object: The DatabaseObject to check for. - :return: True if the DatabaseObject exists, False otherwise. - """ - if database_object.dynamic: - return True - return database_object.id in self.object_to_id - - def on_death(self, weakref_: weakref.ref) -> None: - """ - Callback function that gets triggered when the reference count of a DatabaseObject drops to 0. - This function removes the DatabaseObject from the cache. - - :param weakref_: The weak reference of the DatabaseObject that has been garbage collected. - """ - data_id = self.weakref_map.pop(weakref_) - self.object_to_id.pop(data_id) - - def get_weakref(self, database_object: DatabaseObject) -> weakref.ref: - return weakref.ref(database_object, self.on_death) - - - def append(self, database_object: DatabaseObject) -> bool: - """ - Add a DatabaseObject to the cache. - - :param database_object: The DatabaseObject to add to the cache. - :return: True if the DatabaseObject already exists in the cache, False otherwise. - """ - if self.exists(database_object): - return True - - self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id - self.object_to_id[database_object.id] = database_object - - return False - - def extent(self, database_object_list: List[DatabaseObject]): - """ - adjacent to the extent method of list, this appends n Object - """ - for database_object in database_object_list: - self.append(database_object) - - def remove(self, _id: str): - """ - Remove a DatabaseObject from the cache. - - :param _id: The id of the DatabaseObject to remove from the cache. - """ - data = self.object_to_id.get(_id) - if data: - self.weakref_map.pop(weakref.ref(data)) - self.object_to_id.pop(_id) - - def __getitem__(self, item) -> Optional[DatabaseObject]: - """ - this returns the data obj - :param item: the id of the music object - :return: - """ - - return self.object_to_id.get(item) - - def get(self, _id: str) -> Optional[DatabaseObject]: - return self.__getitem__(_id) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index f1a694b..02bff19 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -161,3 +161,8 @@ class Collection(Generic[T]): def __getitem__(self, item: int): return self._data[item] + + def get(self, item: int, default = None): + if item >= len(self._data): + return default + return self._data[item] diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index fd2a80c..59a3d10 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -267,9 +267,9 @@ class OuterProxy: return r - def __repr__(self): - return f"{type(self).__name__}({', '.join(key + ': ' + str(val) for key, val in self.indexing_values)})" - @property def title_string(self) -> str: return str(self.__getattribute__(self.TITEL)) + + def __repr__(self): + return f"{type(self).__name__}({self.title_string})" diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 2df348e..be6d751 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -27,14 +27,49 @@ from ..utils.string_processing import unify from .parents import OuterProxy as Base from ..utils.config import main_settings +from ..utils.enums.colors import BColors """ All Objects dependent """ CountryTyping = type(list(pycountry.countries)[0]) -OPTION_STRING_DELIMITER = " | " +OPTION_BACKGROUND = BColors.GREY +OPTION_FOREGROUND = BColors.OKBLUE + +def get_collection_string( + collection: Collection[Base], + template: str, + ignore_titles: Set[str] = None, + background: BColors = OPTION_BACKGROUND, + foreground: BColors = OPTION_FOREGROUND +) -> str: + if collection.empty: + return "" + + foreground = foreground.value + background = background.value + + ignore_titles = ignore_titles or set() + + r = background + + element: Base + titel_list: List[str] = [element.title_string.strip() for element in collection if element.title_string not in ignore_titles] + + for i, titel in enumerate(titel_list): + delimiter = ", " + if i == len(collection) - 1: + delimiter = "" + elif i == len(collection) - 2: + delimiter = " and " + + r += foreground + titel + BColors.ENDC.value + background + delimiter + BColors.ENDC.value + + r += BColors.ENDC.value + + return template.format(r) class Song(Base): title: str @@ -152,18 +187,12 @@ class Song(Base): return main_artists return f"{main_artists} feat. {feature_artists}" - def __repr__(self) -> str: - return f"Song(\"{self.title}\")" - @property def option_string(self) -> str: - r = f"{self.__repr__()}" - if not self.album_collection.empty: - r += f" from Album({OPTION_STRING_DELIMITER.join(album.title for album in self.album_collection)})" - if not self.main_artist_collection.empty: - r += f" by Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.main_artist_collection)})" - if not self.feature_artist_collection.empty: - r += f" feat. Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.feature_artist_collection)})" + r = OPTION_FOREGROUND.value + self.title + BColors.ENDC.value + OPTION_BACKGROUND.value + r += get_collection_string(self.album_collection, " from {}", ignore_titles={self.title}) + r += get_collection_string(self.main_artist_collection, " by {}") + r += get_collection_string(self.feature_artist_collection, " feat. {}") return r @property @@ -302,15 +331,16 @@ class Album(Base): id3Mapping.ALBUMSORTORDER: [str(self.albumsort)] if self.albumsort is not None else [] }) - def __repr__(self): - return f"Album(\"{self.title}\")" - @property def option_string(self) -> str: - return f"{self.__repr__()} " \ - f"by Artist({OPTION_STRING_DELIMITER.join([artist.name + str(artist.id) for artist in self.artist_collection])}) " \ - f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})" + r = OPTION_FOREGROUND.value + self.title + BColors.ENDC.value + OPTION_BACKGROUND.value + r += get_collection_string(self.artist_collection, " by {}") + r += get_collection_string(self.label_collection, " under {}") + if len(self.song_collection) > 0: + r += f" with {len(self.song_collection)} songs" + return r + @property def options(self) -> List[P]: options = [*self.artist_collection, self, *self.song_collection] @@ -570,8 +600,18 @@ class Artist(Base): @property def option_string(self) -> str: - return f"{self.__repr__()} " \ - f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})" + r = OPTION_FOREGROUND.value + self.name + BColors.ENDC.value + OPTION_BACKGROUND.value + r += get_collection_string(self.label_collection, " under {}") + + r += OPTION_BACKGROUND.value + if len(self.main_album_collection) > 0: + r += f" with {len(self.main_album_collection)} albums" + + if len(self.feature_song_collection) > 0: + r += f" featured in {len(self.feature_song_collection)} songs" + r += BColors.ENDC.value + + return r @property def options(self) -> List[P]: @@ -689,4 +729,4 @@ class Label(Base): @property def option_string(self): - return self.__repr__() + return OPTION_FOREGROUND.value + self.name + BColors.ENDC.value diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 29ba68c..468067b 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -256,7 +256,7 @@ class Page: stop_at_level=stop_at_level, post_process=False, type_string=type(music_object).__name__, - title_string=music_object.title_string, + entity_string=music_object.option_string, ) if new_music_object is None: @@ -278,7 +278,7 @@ class Page: enforce_type: Type[DatabaseObject] = None, post_process: bool = True, type_string: str = "", - title_string: str = "", + entity_string: str = "", ) -> Optional[DatabaseObject]: obj_type = self.get_source_type(source) @@ -306,7 +306,7 @@ class Page: return None if stop_at_level > 0: - trace(f"fetching {type_string} [{title_string}] [stop_at_level={stop_at_level}]") + trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]") collection: Collection for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: @@ -364,7 +364,7 @@ class Page: skip_details: bool = False, process_metadata_anyway: bool = True ) -> DownloadResult: - trace(f"downloading {type(music_object).__name__} [{music_object.title_string}]") + trace(f"downloading {type(music_object).__name__} [{music_object.option_string}]") skip_next_details = skip_details # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False @@ -451,7 +451,7 @@ class Page: source = sources[0] if not found_on_disc: - r = self.download_song_to_target(source=source, target=temp_target, desc=song.title) + r = self.download_song_to_target(source=source, target=temp_target, desc=song.option_string) if not r.is_fatal_error: r.merge(self._post_process_targets(song, temp_target, diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 1088be0..90064db 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -136,7 +136,7 @@ class Bandcamp(Page): "full_page": True, "search_filter": filter_string, "search_text": search_query, - }) + }, name=f"search_{filter_string}_{search_query}") if r is None: return results @@ -224,7 +224,7 @@ class Bandcamp(Page): def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: artist = Artist() - r = self.connection.get(_parse_artist_url(source.url)) + r = self.connection.get(_parse_artist_url(source.url), name=f"artist_{urlparse(source.url).scheme}_{urlparse(source.url).netloc}") if r is None: return artist @@ -271,7 +271,7 @@ class Bandcamp(Page): def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: album = Album() - r = self.connection.get(source.url) + r = self.connection.get(source.url, name=f"album_{urlparse(source.url).netloc.split('.')[0]}_{urlparse(source.url).path.replace('/', '').replace('album', '')}") if r is None: return album @@ -338,7 +338,7 @@ class Bandcamp(Page): return [] def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - r = self.connection.get(source.url) + r = self.connection.get(source.url, name=f"song_{urlparse(source.url).netloc.split('.')[0]}_{urlparse(source.url).path.replace('/', '').replace('track', '')}") if r is None: return Song() diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 43e644f..28ac0a9 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -26,6 +26,7 @@ from ..objects import ( ) from ..utils.config import logging_settings from ..utils import string_processing, shared +from ..utils.string_processing import clean_song_title from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult @@ -120,6 +121,7 @@ class Musify(Page): self.connection: Connection = Connection( host="https://musify.club/", logger=self.LOGGER, + module="musify", ) self.stream_connection: Connection = Connection( @@ -355,8 +357,10 @@ class Musify(Page): if raw_id.isdigit(): _id = raw_id + + return Song( - title=song_title, + title=clean_song_title(song_title, artist_name=artist_list[0].name if len(artist_list) > 0 else None), main_artist_list=artist_list, source_list=source_list ) @@ -372,7 +376,7 @@ class Musify(Page): def general_search(self, search_query: str) -> List[DatabaseObject]: search_results = [] - r = self.connection.get(f"https://musify.club/search?searchText={search_query}") + r = self.connection.get(f"https://musify.club/search?searchText={search_query}", name="search_" + search_query) if r is None: return [] search_soup: BeautifulSoup = self.get_soup_from_response(r) @@ -390,10 +394,11 @@ class Musify(Page): return search_results def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - # https://musify.club/track/linkin-park-numb-210765 - r = self.connection.get(source.url) + musify_url = parse_url(source.url) + + r = self.connection.get(source.url, name="track_" + musify_url.name_with_id) if r is None: - return Song + return Song() soup = self.get_soup_from_response(r) @@ -502,7 +507,7 @@ class Musify(Page): )) return Song( - title=track_name, + title=clean_song_title(track_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None), source_list=source_list, lyrics_list=lyrics_list, main_artist_list=artist_list, @@ -645,7 +650,7 @@ class Musify(Page): )) return Song( - title=song_name, + title=clean_song_title(song_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None), tracksort=tracksort, main_artist_list=artist_list, source_list=source_list @@ -669,7 +674,7 @@ class Musify(Page): url = parse_url(source.url) endpoint = self.HOST + "/release/" + url.name_with_id - r = self.connection.get(endpoint) + r = self.connection.get(endpoint, name=url.name_with_id) if r is None: return Album() @@ -706,7 +711,7 @@ class Musify(Page): :return: """ - r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent") + r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id) if r is None: return Artist() @@ -1072,7 +1077,7 @@ class Musify(Page): "SortOrder.Property": "dateCreated", "SortOrder.IsAscending": False, "X-Requested-With": "XMLHttpRequest" - }) + }, name="discography_" + url.name_with_id) if r is None: return [] soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") @@ -1123,4 +1128,4 @@ class Musify(Page): self.LOGGER.warning(f"The source has no audio link. Falling back to {endpoint}.") - return self.stream_connection.stream_into(endpoint, target, raw_url=True, exclude_headers=["Host"]) + return self.stream_connection.stream_into(endpoint, target, raw_url=True, exclude_headers=["Host"], name=desc) diff --git a/music_kraken/pages/youtube.py b/music_kraken/pages/youtube.py index 4ce6633..73b92ad 100644 --- a/music_kraken/pages/youtube.py +++ b/music_kraken/pages/youtube.py @@ -2,8 +2,7 @@ from typing import List, Optional, Type, Tuple from urllib.parse import urlparse, urlunparse, parse_qs from enum import Enum -import sponsorblock -from sponsorblock.errors import HTTPException, NotFoundException +import python_sponsorblock from ..objects import Source, DatabaseObject, Song, Target from .abstract import Page @@ -63,8 +62,9 @@ class YouTube(SuperYouTube): ) # the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does - _sponsorblock_connection: Connection = Connection(host="https://sponsor.ajay.app/") - self.sponsorblock_client = sponsorblock.Client(session=_sponsorblock_connection.session) + _sponsorblock_connection: Connection = Connection() + self.sponsorblock = python_sponsorblock.SponsorBlock(silent=True, session=_sponsorblock_connection.session) + super().__init__(*args, **kwargs) @@ -344,10 +344,10 @@ class YouTube(SuperYouTube): segments = [] try: - segments = self.sponsorblock_client.get_skip_segments(parsed.id) + segments = self.sponsorblock.get_segments(parsed.id) except NotFoundException: self.LOGGER.debug(f"No sponsor found for the video {parsed.id}.") except HTTPException as e: self.LOGGER.warning(f"{e}") - return [(segment.start, segment.end) for segment in segments] + return [(segment.segment[0], segment.segment[1]) for segment in segments] diff --git a/music_kraken/pages/youtube_music/_music_object_render.py b/music_kraken/pages/youtube_music/_music_object_render.py index d615ef4..f10d11a 100644 --- a/music_kraken/pages/youtube_music/_music_object_render.py +++ b/music_kraken/pages/youtube_music/_music_object_render.py @@ -2,6 +2,7 @@ from typing import List, Optional from enum import Enum from ...utils.config import youtube_settings, logging_settings +from ...utils.string_processing import clean_song_title from ...objects import Source, DatabaseObject from ..abstract import Page from ...objects import ( @@ -59,7 +60,7 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]: if element_type == PageType.SONG or (element_type == PageType.VIDEO and not youtube_settings["youtube_music_clean_data"]) or (element_type == PageType.OFFICIAL_MUSIC_VIDEO and not youtube_settings["youtube_music_clean_data"]): source = Source(SOURCE_PAGE, f"https://music.youtube.com/watch?v={element_id}") - return Song(title=element_text, source_list=[source]) + return Song(title=clean_song_title(element_text), source_list=[source]) if element_type == PageType.ARTIST or (element_type == PageType.CHANNEL and not youtube_settings["youtube_music_clean_data"]): source = Source(SOURCE_PAGE, f"https://music.youtube.com/channel/{element_id}") diff --git a/music_kraken/pages/youtube_music/super_youtube.py b/music_kraken/pages/youtube_music/super_youtube.py index d391370..420c46d 100644 --- a/music_kraken/pages/youtube_music/super_youtube.py +++ b/music_kraken/pages/youtube_music/super_youtube.py @@ -3,8 +3,7 @@ from urllib.parse import urlparse, urlunparse, parse_qs from enum import Enum import requests -import sponsorblock -from sponsorblock.errors import HTTPException, NotFoundException +import python_sponsorblock from ...objects import Source, DatabaseObject, Song, Target from ..abstract import Page @@ -143,9 +142,8 @@ class SuperYouTube(Page): ) # the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does - _sponsorblock_connection: Connection = Connection(host="https://sponsor.ajay.app/") - self.sponsorblock_client = sponsorblock.Client(session=_sponsorblock_connection.session) - + _sponsorblock_connection: Connection = Connection() + self.sponsorblock = python_sponsorblock.SponsorBlock(silent=True, session=_sponsorblock_connection.session) def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: _url_type = { @@ -213,10 +211,10 @@ class SuperYouTube(Page): segments = [] try: - segments = self.sponsorblock_client.get_skip_segments(parsed.id) + segments = self.sponsorblock.get_segments(parsed.id) except NotFoundException: self.LOGGER.debug(f"No sponsor found for the video {parsed.id}.") except HTTPException as e: self.LOGGER.warning(f"{e}") - return [(segment.start, segment.end) for segment in segments] + return [(segment.segment[0], segment.segment[1]) for segment in segments] diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 5e82a22..6ecbeaf 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -171,7 +171,7 @@ class YoutubeMusic(SuperYouTube): def __init__(self, *args, ydl_opts: dict = None, **kwargs): self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection( logger=self.LOGGER, - accept_language="en-US,en;q=0.5" + accept_language="en-US,en;q=0.5", ) self.credentials: YouTubeMusicCredentials = YouTubeMusicCredentials( api_key=youtube_settings["youtube_music_api_key"], @@ -212,7 +212,7 @@ class YoutubeMusic(SuperYouTube): search for: "innertubeApiKey" """ - r = self.yt_music_connection.get("https://music.youtube.com/") + r = self.yt_music_connection.get("https://music.youtube.com/", name="youtube_music_index.html", disable_cache=True, enable_cache_readonly=True) if r is None: return @@ -232,7 +232,7 @@ class YoutubeMusic(SuperYouTube): 'set_ytc': 'true', 'set_apyt': 'true', 'set_eom': 'false' - }) + }, disable_cache=True) if r is None: return @@ -247,9 +247,9 @@ class YoutubeMusic(SuperYouTube): # save cookies in settings youtube_settings["youtube_music_consent_cookies"] = cookie_dict else: - self.yt_music_connection.save(r, "index.html") + self.yt_music_connection.save(r, "youtube_music_index.html", no_update_if_valid_exists=True) - r = self.yt_music_connection.get("https://music.youtube.com/", name="index.html") + r = self.yt_music_connection.get("https://music.youtube.com/", name="youtube_music_index.html") if r is None: return @@ -374,7 +374,8 @@ class YoutubeMusic(SuperYouTube): }, headers={ "Referer": get_youtube_url(path=f"/search", query=f"q={urlescaped_query}") - } + }, + name=f"search_{search_query}.json" ) if r is None: @@ -411,7 +412,8 @@ class YoutubeMusic(SuperYouTube): json={ "browseId": browse_id, "context": {**self.credentials.context, "adSignalsInfo": {"params": []}} - } + }, + name=f"fetch_artist_{browse_id}.json" ) if r is None: return artist @@ -454,7 +456,8 @@ class YoutubeMusic(SuperYouTube): json={ "browseId": browse_id, "context": {**self.credentials.context, "adSignalsInfo": {"params": []}} - } + }, + name=f"fetch_album_{browse_id}.json" ) if r is None: return album @@ -549,7 +552,12 @@ class YoutubeMusic(SuperYouTube): return self.download_values_by_url[source.url] if ydl_res is None: - ydl_res = self.ydl.extract_info(url=source.url, download=False) + try: + ydl_res = self.ydl.extract_info(url=source.url, download=False) + except DownloadError as e: + self.not_download[source.hash_url] = e + self.LOGGER.error(f"Couldn't fetch song from {source.url}. {e}") + return {"error": e} _best_format = _get_best_format(ydl_res.get("formats", [{}])) self.download_values_by_url[source.url] = { @@ -564,7 +572,7 @@ class YoutubeMusic(SuperYouTube): def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: media = self.fetch_media_url(source) - if source.hash_url not in self.not_download: + if source.hash_url not in self.not_download and "error" not in media: result = self.download_connection.stream_into( media["url"], target, @@ -577,7 +585,7 @@ class YoutubeMusic(SuperYouTube): method="GET", ) else: - result = DownloadResult(error_message=str(self.not_download[source.hash_url])) + result = DownloadResult(error_message=str(media.get("error") or self.not_download[source.hash_url])) if result.is_fatal_error: result.merge(super().download_song_to_target(source=source, target=target, desc=desc)) diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index 2b63305..9226441 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -51,14 +51,20 @@ def trace(msg: str): if not DEBUG_TRACE: return - output("trace: " + msg, BColors.OKBLUE) + output(BColors.OKBLUE.value + "trace: " + BColors.ENDC.value + msg) + +def request_trace(msg: str): + if not DEBUG_TRACE: + return + + output(BColors.OKGREEN.value + "request: " + BColors.ENDC.value + msg) def object_trace(obj): if not DEBUG_OBJECT_TRACE: return appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else "" - output("object: " + str(obj) + appendix, BColors.GREY) + output("object: " + str(obj) + appendix) """ diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index 00b6024..9acd3c8 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -34,13 +34,15 @@ def unify(string: str) -> str: return string.lower() -def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]: +def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Union[str, Path]: def fit_string(string: str) -> str: + nonlocal hidden_ok + if string == "/": return "/" string = string.strip() - while string[0] == ".": + while string[0] == "." and not hidden_ok: if len(string) == 0: return string @@ -93,7 +95,7 @@ def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) -> break substring = raw_song_title[open_bracket_index + 1:close_bracket_index] - if any(disallowed_substring in substring for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS): + if any(disallowed_substring in substring.lower() for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS): raw_song_title = raw_song_title[:open_bracket_index] + raw_song_title[close_bracket_index + 1:] else: start = close_bracket_index + 1 @@ -177,3 +179,8 @@ def match_length(length_1: int | None, length_2: int | None) -> bool: return True return abs(length_1 - length_2) <= ALLOWED_LENGTH_DISTANCE +def shorten_display_url(url: str, max_length: int = 150, chars_at_end: int = 4, shorten_string: str = "[...]") -> str: + if len(url) <= max_length + chars_at_end + len(shorten_string): + return url + + return url[:max_length] + shorten_string + url[-chars_at_end:] diff --git a/pyproject.toml b/pyproject.toml index 16fac20..9c8232b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ "toml~=0.10.2", "typing_extensions~=4.7.1", - "sponsorblock~=0.1.3", + "python-sponsorblock~=0.0.0", "youtube_dl", ] dynamic = [ diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 7ba18a1..0000000 --- a/requirements.txt +++ /dev/null @@ -1,25 +0,0 @@ -requests~=2.31.0 -mutagen~=1.46.0 -musicbrainzngs~=0.7.1 -jellyfish~=0.9.0 -beautifulsoup4~=4.11.1 -pycountry~=24.0.1 -python-dateutil~=2.8.2 -pandoc~=2.3 -SQLAlchemy~=2.0.7 -setuptools~=68.2.0 -tqdm~=4.65.0 -ffmpeg-python~=0.2.0 -platformdirs~=4.2.0 -transliterate~=1.10.2 -sponsorblock~=0.1.3 -regex~=2022.9.13 -pyffmpeg~=2.4.2.18 -ffmpeg-progress-yield~=0.7.8 -pathvalidate~=2.5.2 -guppy3~=3.1.3 -toml~=0.10.2 -typing_extensions~=4.7.1 -responses~=0.24.1 -youtube_dl -merge_args~=0.1.5 \ No newline at end of file