diff --git a/development/actual_donwload.py b/development/actual_donwload.py index 333da4f..81c74bd 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Ghost Bath", - "0", - "d: 1", + "s: #a Ruffiction", + "d: 8", ] diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index a45c01e..dad0b5d 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -215,6 +215,9 @@ class Downloader: return True def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query: + # strip all the values in key_text + key_text = {key: value.strip() for key, value in key_text.items()} + song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True) album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True) artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True) diff --git a/music_kraken/connection/cache.py b/music_kraken/connection/cache.py index c224375..232430f 100644 --- a/music_kraken/connection/cache.py +++ b/music_kraken/connection/cache.py @@ -7,6 +7,7 @@ from functools import lru_cache import logging from ..utils.config import main_settings +from ..utils.string_processing import fit_to_file_system @dataclass @@ -63,7 +64,7 @@ class Cache: :return: the module path """ r = Path(self._dir, module) - r.mkdir(exist_ok=True) + r.mkdir(exist_ok=True, parents=True) return r def _write_index(self, indent: int = 4): @@ -122,13 +123,13 @@ class Cache: ) self._write_attribute(cache_attribute) - cache_path = Path(module_path, name) + cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True) with cache_path.open("wb") as content_file: self.logger.debug(f"writing cache to {cache_path}") content_file.write(content) def get(self, name: str) -> Optional[bytes]: - path = Path(self._dir, self.module, name) + path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True) if not path.is_file(): return None @@ -148,7 +149,7 @@ class Cache: if ca.name == "": continue - file = Path(self._dir, ca.module, ca.name) + file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True) if not ca.is_valid: self.logger.debug(f"deleting cache {ca.id}") diff --git a/music_kraken/connection/connection.py b/music_kraken/connection/connection.py index a570fd0..75de4b5 100644 --- a/music_kraken/connection/connection.py +++ b/music_kraken/connection/connection.py @@ -15,6 +15,7 @@ from tqdm import tqdm from .cache import Cache from .rotating import RotatingProxy from ..objects import Target +from ..utils import request_trace from ..utils.config import main_settings from ..utils.support_classes.download_result import DownloadResult from ..utils.hacking import merge_args @@ -148,6 +149,8 @@ class Connection: exclude_headers: List[str] = None, **kwargs ) -> Optional[requests.Response]: + trace_string = f"{method} {url} \t{'[stream]' if kwargs.get('stream', False) else ''}" + if method is None: raise AttributeError("method is not set.") method = method.upper() @@ -179,6 +182,8 @@ class Connection: cached = self.cache.get(name) if cached is not None: + request_trace(f"{trace_string}\t[cached]") + with responses.RequestsMock() as resp: resp.add( method=method, @@ -199,6 +204,9 @@ class Connection: if header in headers: del headers[header] + if try_count <= 0: + request_trace(trace_string) + r = None connection_failed = False try: diff --git a/music_kraken/objects/cache.py b/music_kraken/objects/cache.py deleted file mode 100644 index 181a13c..0000000 --- a/music_kraken/objects/cache.py +++ /dev/null @@ -1,110 +0,0 @@ -from collections import defaultdict -from typing import Dict, List, Optional -import weakref - -from .parents import DatabaseObject - -""" -This is a cache for the objects, that et pulled out of the database. -This is necessary, to not have duplicate objects with the same id. - -Using a cache that maps the ojects to their id has multiple benefits: - - if you modify the object at any point, all objects with the same id get modified *(copy by reference)* - - less ram usage - - to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work -""" - - -class ObjectCache: - """ - ObjectCache is a cache for the objects retrieved from a database. - It maps each object to its id and uses weak references to manage its memory usage. - Using a cache for these objects provides several benefits: - - - Modifying an object updates all objects with the same id (due to copy by reference) - - Reduced memory usage - - :attr object_to_id: Dictionary that maps DatabaseObjects to their id. - :attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values. - - :method exists: Check if a DatabaseObject already exists in the cache. - :method append: Add a DatabaseObject to the cache if it does not already exist. - :method extent: Add a list of DatabaseObjects to the cache. - :method remove: Remove a DatabaseObject from the cache by its id. - :method get: Retrieve a DatabaseObject from the cache by its id. """ - object_to_id: Dict[str, DatabaseObject] - weakref_map: Dict[weakref.ref, str] - - def __init__(self) -> None: - self.object_to_id = dict() - self.weakref_map = defaultdict() - - def exists(self, database_object: DatabaseObject) -> bool: - """ - Check if a DatabaseObject with the same id already exists in the cache. - - :param database_object: The DatabaseObject to check for. - :return: True if the DatabaseObject exists, False otherwise. - """ - if database_object.dynamic: - return True - return database_object.id in self.object_to_id - - def on_death(self, weakref_: weakref.ref) -> None: - """ - Callback function that gets triggered when the reference count of a DatabaseObject drops to 0. - This function removes the DatabaseObject from the cache. - - :param weakref_: The weak reference of the DatabaseObject that has been garbage collected. - """ - data_id = self.weakref_map.pop(weakref_) - self.object_to_id.pop(data_id) - - def get_weakref(self, database_object: DatabaseObject) -> weakref.ref: - return weakref.ref(database_object, self.on_death) - - - def append(self, database_object: DatabaseObject) -> bool: - """ - Add a DatabaseObject to the cache. - - :param database_object: The DatabaseObject to add to the cache. - :return: True if the DatabaseObject already exists in the cache, False otherwise. - """ - if self.exists(database_object): - return True - - self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id - self.object_to_id[database_object.id] = database_object - - return False - - def extent(self, database_object_list: List[DatabaseObject]): - """ - adjacent to the extent method of list, this appends n Object - """ - for database_object in database_object_list: - self.append(database_object) - - def remove(self, _id: str): - """ - Remove a DatabaseObject from the cache. - - :param _id: The id of the DatabaseObject to remove from the cache. - """ - data = self.object_to_id.get(_id) - if data: - self.weakref_map.pop(weakref.ref(data)) - self.object_to_id.pop(_id) - - def __getitem__(self, item) -> Optional[DatabaseObject]: - """ - this returns the data obj - :param item: the id of the music object - :return: - """ - - return self.object_to_id.get(item) - - def get(self, _id: str) -> Optional[DatabaseObject]: - return self.__getitem__(_id) diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 63796e0..53dd82b 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -121,6 +121,7 @@ class Musify(Page): self.connection: Connection = Connection( host="https://musify.club/", logger=self.LOGGER, + module="musify", ) self.stream_connection: Connection = Connection( @@ -393,10 +394,11 @@ class Musify(Page): return search_results def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - # https://musify.club/track/linkin-park-numb-210765 - r = self.connection.get(source.url) + musify_url = parse_url(source.url) + + r = self.connection.get(source.url, name="track_" + musify_url.name_with_id) if r is None: - return Song + return Song() soup = self.get_soup_from_response(r) @@ -672,7 +674,7 @@ class Musify(Page): url = parse_url(source.url) endpoint = self.HOST + "/release/" + url.name_with_id - r = self.connection.get(endpoint) + r = self.connection.get(endpoint, name=url.name_with_id) if r is None: return Album() @@ -709,7 +711,7 @@ class Musify(Page): :return: """ - r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent") + r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id) if r is None: return Artist() @@ -1075,7 +1077,7 @@ class Musify(Page): "SortOrder.Property": "dateCreated", "SortOrder.IsAscending": False, "X-Requested-With": "XMLHttpRequest" - }) + }, name="discography_" + url.name_with_id) if r is None: return [] soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index 2b63305..9226441 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -51,14 +51,20 @@ def trace(msg: str): if not DEBUG_TRACE: return - output("trace: " + msg, BColors.OKBLUE) + output(BColors.OKBLUE.value + "trace: " + BColors.ENDC.value + msg) + +def request_trace(msg: str): + if not DEBUG_TRACE: + return + + output(BColors.OKGREEN.value + "request: " + BColors.ENDC.value + msg) def object_trace(obj): if not DEBUG_OBJECT_TRACE: return appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else "" - output("object: " + str(obj) + appendix, BColors.GREY) + output("object: " + str(obj) + appendix) """ diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index 0f9aab3..17c9117 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -34,13 +34,15 @@ def unify(string: str) -> str: return string.lower() -def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]: +def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Union[str, Path]: def fit_string(string: str) -> str: + nonlocal hidden_ok + if string == "/": return "/" string = string.strip() - while string[0] == ".": + while string[0] == "." and not hidden_ok: if len(string) == 0: return string