From ede8ce0e8944bd83cb5ba4538d3a46dfbe5c058d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Jan 2024 12:01:50 +0100 Subject: [PATCH 1/6] feat: added cache directory --- requirements.txt | 5 +- .../pages/youtube_music/youtube_music.py | 3 + src/music_kraken/utils/cache.py | 0 .../utils/config/config_files/main_config.py | 8 ++- src/music_kraken/utils/hooks.py | 29 ---------- .../utils/path_manager/locations.py | 57 +++++++++++++++++++ 6 files changed, 69 insertions(+), 33 deletions(-) create mode 100644 src/music_kraken/utils/cache.py delete mode 100644 src/music_kraken/utils/hooks.py diff --git a/requirements.txt b/requirements.txt index 0d644fb..4462589 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ pycountry~=22.3.5 python-dateutil~=2.8.2 pandoc~=2.3 SQLAlchemy~=2.0.7 -setuptools~=60.2.0 +setuptools~=68.2.0 tqdm~=4.65.0 ffmpeg-python~=0.2.0 platformdirs~=3.2.0 @@ -18,3 +18,6 @@ pyffmpeg~=2.4.2.18 ffmpeg-progress-yield~=0.7.8 pathvalidate~=2.5.2 guppy3~=3.1.3 + +toml~=0.10.2 +typing_extensions~=4.7.1 \ No newline at end of file diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index a4c765a..249a3db 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -6,6 +6,9 @@ import json from dataclasses import dataclass import re +from youtube_dl.jsinterp import JSInterpreter +from youtube_dl.extractor.youtube import YoutubeIE + from ...utils.exception.config import SettingValueError from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING diff --git a/src/music_kraken/utils/cache.py b/src/music_kraken/utils/cache.py new file mode 100644 index 0000000..e69de29 diff --git a/src/music_kraken/utils/config/config_files/main_config.py b/src/music_kraken/utils/config/config_files/main_config.py index ba6ef91..6712a4c 100644 --- a/src/music_kraken/utils/config/config_files/main_config.py +++ b/src/music_kraken/utils/config/config_files/main_config.py @@ -12,7 +12,7 @@ from ..attributes.special_attributes import ( AudioFormatAttribute, ) -config = Config([ +config = Config(( Attribute(name="hasnt_yet_started", default_value=False, description="This will be set automatically, to look if it needs to run the scripts that run on start."), Attribute(name="bitrate", default_value=125, description="Streams the audio with given bitrate [kB/s]. Can't stream with a higher Bitrate, than the audio source provides."), AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format. @@ -64,6 +64,8 @@ all the error messages are shown."""), PathAttribute(name="temp_directory", default_value=LOCATIONS.TEMP_DIRECTORY.resolve(), description="All temporary stuff is gonna be dumped in this directory."), PathAttribute(name="log_file", default_value=LOCATIONS.get_log_file("download_logs.log").resolve()), PathAttribute(name="ffmpeg_binary", default_value=LOCATIONS.FFMPEG_BIN.resolve(), description="Set the path to the ffmpeg binary."), + PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(), + description="Set the path of the cache directory."), Attribute( name="not_a_genre_regex", description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n" @@ -93,7 +95,7 @@ But anyways... Freedom of thought, so go ahead and change the messages."""), Attribute(name="id_bits", default_value=64, description="I really dunno why I even made this a setting.. Modifying this is a REALLY dumb idea."), Description("🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️\n"), -], LOCATIONS.get_config_file("main")) +), LOCATIONS.get_config_file("main")) class SettingsStructure(TypedDict): @@ -126,4 +128,4 @@ class SettingsStructure(TypedDict): log_file: Path not_a_genre_regex: List[str] ffmpeg_binary: Path - + cache_directory: Path diff --git a/src/music_kraken/utils/hooks.py b/src/music_kraken/utils/hooks.py deleted file mode 100644 index e3cd954..0000000 --- a/src/music_kraken/utils/hooks.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import List, Iterable, Dict, TypeVar, Generic, Iterator, Any, Type -from enum import Enum -from dataclasses import dataclass -from collections import defaultdict - - -class HookEventTypes(Enum): - pass - - -@dataclass -class Event: - target: Any - - -class Hooks: - def __init__(self, target) -> None: - self.target = target - - self._callbacks: Dict[HookEventTypes, List[callable]] = defaultdict(list) - - def add_event_listener(self, event_type: HookEventTypes, callback: callable): - self._callbacks[event_type].append(callback) - - def trigger_event(self, event_type: HookEventTypes, *args, **kwargs): - event: Event = Event(target=self.target) - - for callback in self._callbacks[event_type]: - callback(event, *args, **kwargs) diff --git a/src/music_kraken/utils/path_manager/locations.py b/src/music_kraken/utils/path_manager/locations.py index 66953d1..a3917bf 100644 --- a/src/music_kraken/utils/path_manager/locations.py +++ b/src/music_kraken/utils/path_manager/locations.py @@ -1,14 +1,63 @@ +import configparser from pathlib import Path import os +from os.path import expandvars +import logging +from sys import platform import tempfile +from typing import Optional + from pyffmpeg import FFmpeg + from .music_directory import get_music_directory from .config_directory import get_config_directory class Locations: + @staticmethod + def _get_env(key: str, default: Path, default_for_windows: bool = True) -> Optional[Path]: + res = os.environ.get(key.upper()) + if res is not None: + return res + + xdg_user_dirs_file = os.environ.get("XDG_CONFIG_HOME") or Path(Path.home(), ".config", "user-dirs.dirs") + xdg_user_dirs_default_file = Path("/etc/xdg/user-dirs.defaults") + + def get_dir_from_xdg_file(xdg_file_path: os.PathLike) -> Optional[Path]: + nonlocal key + + try: + with open(xdg_file_path, 'r') as f: + data = "[XDG_USER_DIRS]\n" + f.read() + config = configparser.ConfigParser(allow_no_value=True) + config.read_string(data) + xdg_config = config['XDG_USER_DIRS'] + + return Path(expandvars(xdg_config[key.lower()].strip('"'))) + + except (FileNotFoundError, KeyError) as e: + logging.warning( + f"Missing file or No entry found for \"{key}\" in: \"{xdg_file_path}\".\n" + ) + logging.debug(str(e)) + + res = get_dir_from_xdg_file(xdg_user_dirs_file) + if res is not None: + return res + + res = get_dir_from_xdg_file(xdg_user_dirs_default_file) + if res is not None: + return res + + logging.warning(f"couldn't find a {key}, falling back to: {default}") + + if not default_for_windows and platform == "linux": + return + + return default + def __init__(self, application_name: os.PathLike = "music-kraken"): self.FILE_ENCODING: str = "utf-8" @@ -21,6 +70,14 @@ class Locations: self.CONFIG_DIRECTORY.mkdir(exist_ok=True, parents=True) self.CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf") self.LEGACY_CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf") + + self.CACHE_DIRECTORY = self._get_env("XDG_CACHE_HOME", Path(Path.home(), ".cache")) + if self.CACHE_DIRECTORY is None: + logging.warning(f"Could not find a cache dir. Falling back to the temp dir: {self.TEMP_DIRECTORY}") + self.CACHE_DIRECTORY = self.TEMP_DIRECTORY + else: + self.CACHE_DIRECTORY = Path(self.CACHE_DIRECTORY, application_name) + self.CACHE_DIRECTORY.mkdir(parents=True, exist_ok=True) self.FFMPEG_BIN = Path(FFmpeg(enable_log=False).get_ffmpeg_bin()) From b0815fdac4cc0554f90176c6350ccd73b8e8ca59 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Jan 2024 12:46:55 +0100 Subject: [PATCH 2/6] feat: implemented caching --- src/music_kraken/utils/cache.py | 109 ++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/src/music_kraken/utils/cache.py b/src/music_kraken/utils/cache.py index e69de29..dbf2c26 100644 --- a/src/music_kraken/utils/cache.py +++ b/src/music_kraken/utils/cache.py @@ -0,0 +1,109 @@ +import json +from pathlib import Path +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import List, Optional + +from .config import main_settings + + +@dataclass +class CacheAttribute: + module: str + name: str + + created: datetime + expires: datetime + + @property + def id(self): + return f"{self.module}_{self.name}" + + @property + def is_valid(self): + return datetime.now() < self.expires + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + +class Cache: + def __init__(self): + self._dir = main_settings["cache_directory"] + self.index = Path(self._dir, "index.json") + + if not self.index.is_file(): + with self.index.open("w") as i: + i.write(json.dumps([])) + + self.cached_attributes: List[CacheAttribute] = [] + self._id_to_attribute = {} + + self._time_fields = {"created", "expires"} + with self.index.open("r") as i: + for c in json.loads(i.read()): + for key in self._time_fields: + c[key] = datetime.fromisoformat(c[key]) + + self.cached_attributes.append(**c) + + def _init_module(self, module: str) -> Path: + """ + :param module: + :return: the module path + """ + r = Path(self._dir, module) + r.mkdir(exist_ok=True) + return r + + def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool: + existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id) + if existing_attribute is not None: + # the attribute exists + if existing_attribute == cached_attribute: + return True + + if existing_attribute.is_valid: + return False + + existing_attribute.__dict__ = cached_attribute.__dict__ + cached_attribute = existing_attribute + else: + self.cached_attributes.append(cached_attribute) + self._id_to_attribute[cached_attribute.id] = cached_attribute + + if write: + _json = [] + for c in self.cached_attributes: + d = c.__dict__ + for key in self._time_fields: + d[key] = d[key].isoformat() + + _json.append(d) + + with self.index.open("w") as f: + f.write(json.dumps(_json, indent=4)) + + return True + + def set(self, content: bytes, module: str, name: str, expires_in: int = 10): + """ + :param content: + :param module: + :param name: + :param expires_in: the unit is days + :return: + """ + + module_path = self._init_module(module) + + cache_attribute = CacheAttribute( + module=module, + name=name, + created=datetime.now(), + expires=datetime.now() + timedelta(days=expires_in), + ) + self._write_attribute(cache_attribute) + + with Path(module_path, name).open("wb") as content_file: + content_file.write(content) From 66f4ad3df5a2627855bf0704704e2eb6ee64d9b6 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Jan 2024 12:54:02 +0100 Subject: [PATCH 3/6] feat: implemented get function --- src/music_kraken/utils/cache.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/music_kraken/utils/cache.py b/src/music_kraken/utils/cache.py index dbf2c26..b24b68f 100644 --- a/src/music_kraken/utils/cache.py +++ b/src/music_kraken/utils/cache.py @@ -3,6 +3,7 @@ from pathlib import Path from dataclasses import dataclass from datetime import datetime, timedelta from typing import List, Optional +from functools import lru_cache from .config import main_settings @@ -45,8 +46,11 @@ class Cache: for key in self._time_fields: c[key] = datetime.fromisoformat(c[key]) - self.cached_attributes.append(**c) + ca = CacheAttribute(**c) + self.cached_attributes.append(ca) + self._id_to_attribute[ca.id] = ca + @lru_cache() def _init_module(self, module: str) -> Path: """ :param module: @@ -67,7 +71,6 @@ class Cache: return False existing_attribute.__dict__ = cached_attribute.__dict__ - cached_attribute = existing_attribute else: self.cached_attributes.append(cached_attribute) self._id_to_attribute[cached_attribute.id] = cached_attribute @@ -107,3 +110,17 @@ class Cache: with Path(module_path, name).open("wb") as content_file: content_file.write(content) + + def get(self, module: str, name: str) -> Optional[bytes]: + path = Path(self._dir, module, name) + + if not path.is_file(): + return None + + # check if it is outdated + existing_attribute: CacheAttribute = self._id_to_attribute[f"{module}_{name}"] + if not existing_attribute.is_valid: + return + + with path.open("rb") as f: + return f.read() From 031f274d6911ca73f7b1aaba44ac6e8ff0ce52d9 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Jan 2024 15:10:50 +0100 Subject: [PATCH 4/6] feat: implemented caching in the request method --- .../{utils => connection}/cache.py | 26 ++++-- src/music_kraken/connection/connection.py | 83 ++++++++++++------- src/music_kraken/pages/bandcamp.py | 1 - .../pages/youtube_music/youtube_music.py | 7 +- src/music_kraken/utils/shared.py | 2 +- 5 files changed, 77 insertions(+), 42 deletions(-) rename src/music_kraken/{utils => connection}/cache.py (82%) diff --git a/src/music_kraken/utils/cache.py b/src/music_kraken/connection/cache.py similarity index 82% rename from src/music_kraken/utils/cache.py rename to src/music_kraken/connection/cache.py index b24b68f..d393be5 100644 --- a/src/music_kraken/utils/cache.py +++ b/src/music_kraken/connection/cache.py @@ -4,8 +4,9 @@ from dataclasses import dataclass from datetime import datetime, timedelta from typing import List, Optional from functools import lru_cache +import logging -from .config import main_settings +from ..utils.config import main_settings @dataclass @@ -29,7 +30,10 @@ class CacheAttribute: class Cache: - def __init__(self): + def __init__(self, module: str, logger: logging.Logger): + self.module = module + self.logger: logging.Logger = logger + self._dir = main_settings["cache_directory"] self.index = Path(self._dir, "index.json") @@ -89,7 +93,7 @@ class Cache: return True - def set(self, content: bytes, module: str, name: str, expires_in: int = 10): + def set(self, content: bytes, name: str, expires_in: float = 10): """ :param content: :param module: @@ -97,28 +101,32 @@ class Cache: :param expires_in: the unit is days :return: """ + if name == "": + return - module_path = self._init_module(module) + module_path = self._init_module(self.module) cache_attribute = CacheAttribute( - module=module, + module=self.module, name=name, created=datetime.now(), expires=datetime.now() + timedelta(days=expires_in), ) self._write_attribute(cache_attribute) - with Path(module_path, name).open("wb") as content_file: + cache_path = Path(module_path, name) + with cache_path.open("wb") as content_file: + self.logger.debug(f"writing cache to {cache_path}") content_file.write(content) - def get(self, module: str, name: str) -> Optional[bytes]: - path = Path(self._dir, module, name) + def get(self, name: str) -> Optional[bytes]: + path = Path(self._dir, self.module, name) if not path.is_file(): return None # check if it is outdated - existing_attribute: CacheAttribute = self._id_to_attribute[f"{module}_{name}"] + existing_attribute: CacheAttribute = self._id_to_attribute[f"{self.module}_{name}"] if not existing_attribute.is_valid: return diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index 3f294e9..7949545 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -5,9 +5,12 @@ import logging import threading import requests +import responses +from responses import matchers from tqdm import tqdm from .rotating import RotatingProxy +from .cache import Cache from ..utils.config import main_settings from ..utils.support_classes.download_result import DownloadResult from ..objects import Target @@ -25,13 +28,18 @@ class Connection: accepted_response_codes: Set[int] = None, semantic_not_found: bool = True, sleep_after_404: float = 0.0, - heartbeat_interval = 0, + heartbeat_interval=0, + module: str = "general", + cache_expiring_duration: float = 10 ): if proxies is None: proxies = main_settings["proxies"] if header_values is None: header_values = dict() + self.cache: Cache = Cache(module=module, logger=logger) + self.cache_expiring_duration = cache_expiring_duration + self.HEADER_VALUES = header_values self.LOGGER = logger @@ -55,23 +63,24 @@ class Connection: @property def user_agent(self) -> str: - return self.session.headers.get("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36") - + return self.session.headers.get("user-agent", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36") def start_heartbeat(self): if self.heartbeat_interval <= 0: self.LOGGER.warning(f"Can't start a heartbeat with {self.heartbeat_interval}s in between.") - self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval, ), daemon=True) + self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,), + daemon=True) self.heartbeat_thread.start() def heartbeat_failed(self): self.LOGGER.warning(f"I just died... (The heartbeat failed)") - def heartbeat(self): # Your code to send heartbeat requests goes here - print("the hearth is beating, but it needs to be implemented ;-;\nFuck youuuu for setting heartbeat in the constructor to true, but not implementing the method Connection.hearbeat()") + print( + "the hearth is beating, but it needs to be implemented ;-;\nFuck youuuu for setting heartbeat in the constructor to true, but not implementing the method Connection.hearbeat()") def _heartbeat_loop(self, interval: float): def heartbeat_wrapper(): @@ -85,8 +94,6 @@ class Connection: heartbeat_wrapper() time.sleep(interval) - - def base_url(self, url: ParseResult = None): if url is None: url = self.HOST @@ -119,9 +126,12 @@ class Connection: return headers - def _request( + def save(self, r: requests.Response, name: str, **kwargs): + self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration)) + + def request( self, - request: Callable, + method: str, try_count: int, accepted_response_codes: set, url: str, @@ -131,8 +141,20 @@ class Connection: raw_url: bool = False, sleep_after_404: float = None, is_heartbeat: bool = False, + name: str = "", **kwargs ) -> Optional[requests.Response]: + if name != "": + cached = self.cache.get(name) + + with responses.RequestsMock() as resp: + resp.add( + method=method, + url=url, + body=cached, + ) + return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) + if sleep_after_404 is None: sleep_after_404 = self.sleep_after_404 if try_count >= self.TRIES: @@ -158,9 +180,10 @@ class Connection: while self.session_is_occupied and not is_heartbeat: pass - r: requests.Response = request(request_url, timeout=timeout, headers=headers, **kwargs) + r: requests.Response = requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) if r.status_code in accepted_response_codes: + self.save(r, name, **kwargs) return r if self.SEMANTIC_NOT_FOUND and r.status_code == 404: @@ -187,15 +210,16 @@ class Connection: if self.heartbeat_interval > 0 and self.heartbeat_thread is None: self.start_heartbeat() - return self._request( - request=request, - try_count=try_count+1, + return self.request( + method=method, + try_count=try_count + 1, accepted_response_codes=accepted_response_codes, url=url, timeout=timeout, headers=headers, sleep_after_404=sleep_after_404, is_heartbeat=is_heartbeat, + name=name, **kwargs ) @@ -213,8 +237,8 @@ class Connection: if accepted_response_codes is None: accepted_response_codes = self.ACCEPTED_RESPONSE_CODES - r = self._request( - request=self.session.get, + r = self.request( + method="GET", try_count=0, accepted_response_codes=accepted_response_codes, url=url, @@ -241,8 +265,8 @@ class Connection: raw_url: bool = False, **kwargs ) -> Optional[requests.Response]: - r = self._request( - request=self.session.post, + r = self.request( + method="POST", try_count=0, accepted_response_codes=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES, url=url, @@ -282,9 +306,9 @@ class Connection: if accepted_response_codes is None: accepted_response_codes = self.ACCEPTED_RESPONSE_CODES - - r = self._request( - request=self.session.get, + + r = self.request( + method="GET", try_count=0, accepted_response_codes=accepted_response_codes, url=url, @@ -310,8 +334,9 @@ class Connection: https://en.wikipedia.org/wiki/Kilobyte > The internationally recommended unit symbol for the kilobyte is kB. """ - - with tqdm(total=total_size-target.size, unit='B', unit_scale=True, unit_divisor=1024, desc=description) as t: + + with tqdm(total=total_size - target.size, unit='B', unit_scale=True, unit_divisor=1024, + desc=description) as t: try: for chunk in r.iter_content(chunk_size=chunk_size): size = f.write(chunk) @@ -321,7 +346,8 @@ class Connection: except requests.exceptions.ConnectionError: if try_count >= self.TRIES: self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.") - return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunksize might help.") + return DownloadResult( + error_message=f"Stream timed out from {url}, reducing the chunksize might help.") self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})") retry = True @@ -329,15 +355,14 @@ class Connection: if total_size > progress: retry = True - if retry: self.LOGGER.warning(f"Retrying stream...") accepted_response_codes.add(206) return self.stream_into( - url = url, - target = target, - description = description, - try_count=try_count+1, + url=url, + target=target, + description=description, + try_count=try_count + 1, progress=progress, accepted_response_codes=accepted_response_codes, timeout=timeout, diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 4a0d5da..1f6f050 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -356,6 +356,5 @@ class Bandcamp(Page): def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: if source.audio_url is None: - print(source) return DownloadResult(error_message="Couldn't find download link.") return self.connection.stream_into(url=source.audio_url, target=target, description=desc) diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index 249a3db..01c7d09 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -59,7 +59,8 @@ class YoutubeMusicConnection(Connection): heartbeat_interval=113.25, header_values={ "Accept-Language": accept_language - } + }, + module="youtube_music", ) # cookie consent for youtube @@ -161,8 +162,10 @@ class YoutubeMusic(SuperYouTube): # save cookies in settings youtube_settings["youtube_music_consent_cookies"] = cookie_dict + else: + self.connection.save(r, "index.html") - r = self.connection.get("https://music.youtube.com/") + r = self.connection.get("https://music.youtube.com/", name="index.html") if r is None: return diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index d1645f8..925a3a6 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -4,7 +4,7 @@ from .config import main_settings DEBUG = True DEBUG_LOGGING = DEBUG and True -DEBUG_YOUTUBE_INITIALIZING = DEBUG and False +DEBUG_YOUTUBE_INITIALIZING = DEBUG and True DEBUG_PAGES = DEBUG and False if DEBUG: From 2d4ba50b57f25040f67a552f06b447052dee1263 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Fri, 19 Jan 2024 18:45:12 +0100 Subject: [PATCH 5/6] feat: clean and clear methods for the cache --- .idea/vcs.xml | 2 - requirements.txt | 5 +- src/music_kraken/connection/cache.py | 79 ++++++++++++++++++++--- src/music_kraken/connection/connection.py | 13 ++-- src/music_kraken/pages/bandcamp.py | 3 +- 5 files changed, 80 insertions(+), 22 deletions(-) diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 0823e82..35eb1dd 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,7 +2,5 @@ - - \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4462589..31605f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -requests~=2.28.1 +requests~=2.31.0 mutagen~=1.46.0 musicbrainzngs~=0.7.1 jellyfish~=0.9.0 @@ -20,4 +20,5 @@ pathvalidate~=2.5.2 guppy3~=3.1.3 toml~=0.10.2 -typing_extensions~=4.7.1 \ No newline at end of file +typing_extensions~=4.7.1 +responses~=0.24.1 \ No newline at end of file diff --git a/src/music_kraken/connection/cache.py b/src/music_kraken/connection/cache.py index d393be5..1f6b780 100644 --- a/src/music_kraken/connection/cache.py +++ b/src/music_kraken/connection/cache.py @@ -64,6 +64,18 @@ class Cache: r.mkdir(exist_ok=True) return r + def _write_index(self, indent: int = 4): + _json = [] + for c in self.cached_attributes: + d = c.__dict__ + for key in self._time_fields: + d[key] = d[key].isoformat() + + _json.append(d) + + with self.index.open("w") as f: + f.write(json.dumps(_json, indent=indent)) + def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool: existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id) if existing_attribute is not None: @@ -80,16 +92,7 @@ class Cache: self._id_to_attribute[cached_attribute.id] = cached_attribute if write: - _json = [] - for c in self.cached_attributes: - d = c.__dict__ - for key in self._time_fields: - d[key] = d[key].isoformat() - - _json.append(d) - - with self.index.open("w") as f: - f.write(json.dumps(_json, indent=4)) + self._write_index() return True @@ -132,3 +135,59 @@ class Cache: with path.open("rb") as f: return f.read() + + def clean(self): + keep = set() + + for ca in self.cached_attributes.copy(): + file = Path(self._dir, ca.module, ca.name) + + if not ca.is_valid: + self.logger.debug(f"deleting cache {ca.id}") + file.unlink() + self.cached_attributes.remove(ca) + del self._id_to_attribute[ca.id] + + else: + keep.add(file) + + # iterate through every module (folder) + for module_path in self._dir.iterdir(): + if not module_path.is_dir(): + continue + + # delete all files not in keep + for path in module_path.iterdir(): + if path not in keep: + self.logger.info(f"Deleting cache {path}") + path.unlink() + + # delete all empty directories + for path in module_path.iterdir(): + if path.is_dir() and not list(path.iterdir()): + self.logger.debug(f"Deleting cache directory {path}") + path.rmdir() + + self._write_index() + + def clear(self): + """ + delete every file in the cache directory + :return: + """ + + for path in self._dir.iterdir(): + if path.is_dir(): + for file in path.iterdir(): + file.unlink() + path.rmdir() + else: + path.unlink() + + self.cached_attributes.clear() + self._id_to_attribute.clear() + + self._write_index() + + def __repr__(self): + return f"" diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index 7949545..e32ad8a 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -1,19 +1,18 @@ -import time -from typing import List, Dict, Callable, Optional, Set -from urllib.parse import urlparse, urlunsplit, ParseResult import logging - import threading +import time +from typing import List, Dict, Optional, Set +from urllib.parse import urlparse, urlunsplit, ParseResult + import requests import responses -from responses import matchers from tqdm import tqdm -from .rotating import RotatingProxy from .cache import Cache +from .rotating import RotatingProxy +from ..objects import Target from ..utils.config import main_settings from ..utils.support_classes.download_result import DownloadResult -from ..objects import Target class Connection: diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 1f6f050..6f65d6c 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -53,7 +53,8 @@ class Bandcamp(Page): def __init__(self, *args, **kwargs): self.connection: Connection = Connection( host="https://bandcamp.com/", - logger=self.LOGGER + logger=self.LOGGER, + module="bandcamp", ) super().__init__(*args, **kwargs) From fba9c31c505210d22204a217382573459cb6c8b8 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 22 Jan 2024 09:36:14 +0100 Subject: [PATCH 6/6] feat: clean and clear methods for the cache --- src/music_kraken/__init__.py | 3 ++- src/music_kraken/__main__.py | 18 ++++++++++++++++++ src/music_kraken/cli/options/cache.py | 21 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 src/music_kraken/cli/options/cache.py diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index b3ee566..57106fc 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -4,12 +4,13 @@ import sys from .utils.shared import DEBUG, DEBUG_LOGGING from .utils.config import logging_settings, main_settings, read_config + read_config() from . import cli - if DEBUG: import sys + sys.setrecursionlimit(100) diff --git a/src/music_kraken/__main__.py b/src/music_kraken/__main__.py index 0425d5d..9da441d 100644 --- a/src/music_kraken/__main__.py +++ b/src/music_kraken/__main__.py @@ -79,6 +79,18 @@ def cli(): action="store_true" ) + parser.add_argument( + "--clear-cache", + help="Deletes the cache.", + action="store_true" + ) + + parser.add_argument( + "--clean-cache", + help="Deletes the outdated cache. (all expired cached files, and not indexed files)", + action="store_true" + ) + arguments = parser.parse_args() if arguments.verbose or arguments.test: @@ -112,6 +124,12 @@ def cli(): if arguments.frontend: cli.set_frontend(silent=False) + if arguments.clear_cache: + cli.clear_cache() + + if arguments.clean_cache: + cli.clean_cache() + # getting the genre genre: str = arguments.genre if arguments.test: diff --git a/src/music_kraken/cli/options/cache.py b/src/music_kraken/cli/options/cache.py new file mode 100644 index 0000000..103696b --- /dev/null +++ b/src/music_kraken/cli/options/cache.py @@ -0,0 +1,21 @@ +from logging import getLogger + +from ...connection.cache import Cache + + +def clear_cache(): + """ + Deletes the cache. + :return: + """ + + Cache("main", getLogger("cache")).clear() + + +def clean_cache(): + """ + Deletes the outdated cache. (all expired cached files, and not indexed files) + :return: + """ + + Cache("main", getLogger("cache")).clean()