feat: fixed cache plus concisten cache throughout musify
ci/woodpecker/push/woodpecker Pipeline was successful Details

This commit is contained in:
Hellow 2024-04-26 01:05:22 +02:00
parent a015b8918e
commit 3d94e6e2dd
8 changed files with 38 additions and 127 deletions

View File

@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__": if __name__ == "__main__":
commands = [ commands = [
"s: #a Ghost Bath", "s: #a Ruffiction",
"0", "d: 8",
"d: 1",
] ]

View File

@ -215,6 +215,9 @@ class Downloader:
return True return True
def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query: def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query:
# strip all the values in key_text
key_text = {key: value.strip() for key, value in key_text.items()}
song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True) song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True)
album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True) album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True)
artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True) artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True)

View File

@ -7,6 +7,7 @@ from functools import lru_cache
import logging import logging
from ..utils.config import main_settings from ..utils.config import main_settings
from ..utils.string_processing import fit_to_file_system
@dataclass @dataclass
@ -63,7 +64,7 @@ class Cache:
:return: the module path :return: the module path
""" """
r = Path(self._dir, module) r = Path(self._dir, module)
r.mkdir(exist_ok=True) r.mkdir(exist_ok=True, parents=True)
return r return r
def _write_index(self, indent: int = 4): def _write_index(self, indent: int = 4):
@ -122,13 +123,13 @@ class Cache:
) )
self._write_attribute(cache_attribute) self._write_attribute(cache_attribute)
cache_path = Path(module_path, name) cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True)
with cache_path.open("wb") as content_file: with cache_path.open("wb") as content_file:
self.logger.debug(f"writing cache to {cache_path}") self.logger.debug(f"writing cache to {cache_path}")
content_file.write(content) content_file.write(content)
def get(self, name: str) -> Optional[bytes]: def get(self, name: str) -> Optional[bytes]:
path = Path(self._dir, self.module, name) path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True)
if not path.is_file(): if not path.is_file():
return None return None
@ -148,7 +149,7 @@ class Cache:
if ca.name == "": if ca.name == "":
continue continue
file = Path(self._dir, ca.module, ca.name) file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True)
if not ca.is_valid: if not ca.is_valid:
self.logger.debug(f"deleting cache {ca.id}") self.logger.debug(f"deleting cache {ca.id}")

View File

@ -15,6 +15,7 @@ from tqdm import tqdm
from .cache import Cache from .cache import Cache
from .rotating import RotatingProxy from .rotating import RotatingProxy
from ..objects import Target from ..objects import Target
from ..utils import request_trace
from ..utils.config import main_settings from ..utils.config import main_settings
from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.download_result import DownloadResult
from ..utils.hacking import merge_args from ..utils.hacking import merge_args
@ -148,6 +149,8 @@ class Connection:
exclude_headers: List[str] = None, exclude_headers: List[str] = None,
**kwargs **kwargs
) -> Optional[requests.Response]: ) -> Optional[requests.Response]:
trace_string = f"{method} {url} \t{'[stream]' if kwargs.get('stream', False) else ''}"
if method is None: if method is None:
raise AttributeError("method is not set.") raise AttributeError("method is not set.")
method = method.upper() method = method.upper()
@ -179,6 +182,8 @@ class Connection:
cached = self.cache.get(name) cached = self.cache.get(name)
if cached is not None: if cached is not None:
request_trace(f"{trace_string}\t[cached]")
with responses.RequestsMock() as resp: with responses.RequestsMock() as resp:
resp.add( resp.add(
method=method, method=method,
@ -199,6 +204,9 @@ class Connection:
if header in headers: if header in headers:
del headers[header] del headers[header]
if try_count <= 0:
request_trace(trace_string)
r = None r = None
connection_failed = False connection_failed = False
try: try:

View File

@ -1,110 +0,0 @@
from collections import defaultdict
from typing import Dict, List, Optional
import weakref
from .parents import DatabaseObject
"""
This is a cache for the objects, that et pulled out of the database.
This is necessary, to not have duplicate objects with the same id.
Using a cache that maps the ojects to their id has multiple benefits:
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
- less ram usage
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
"""
class ObjectCache:
"""
ObjectCache is a cache for the objects retrieved from a database.
It maps each object to its id and uses weak references to manage its memory usage.
Using a cache for these objects provides several benefits:
- Modifying an object updates all objects with the same id (due to copy by reference)
- Reduced memory usage
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
:method exists: Check if a DatabaseObject already exists in the cache.
:method append: Add a DatabaseObject to the cache if it does not already exist.
:method extent: Add a list of DatabaseObjects to the cache.
:method remove: Remove a DatabaseObject from the cache by its id.
:method get: Retrieve a DatabaseObject from the cache by its id. """
object_to_id: Dict[str, DatabaseObject]
weakref_map: Dict[weakref.ref, str]
def __init__(self) -> None:
self.object_to_id = dict()
self.weakref_map = defaultdict()
def exists(self, database_object: DatabaseObject) -> bool:
"""
Check if a DatabaseObject with the same id already exists in the cache.
:param database_object: The DatabaseObject to check for.
:return: True if the DatabaseObject exists, False otherwise.
"""
if database_object.dynamic:
return True
return database_object.id in self.object_to_id
def on_death(self, weakref_: weakref.ref) -> None:
"""
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
This function removes the DatabaseObject from the cache.
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
"""
data_id = self.weakref_map.pop(weakref_)
self.object_to_id.pop(data_id)
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
return weakref.ref(database_object, self.on_death)
def append(self, database_object: DatabaseObject) -> bool:
"""
Add a DatabaseObject to the cache.
:param database_object: The DatabaseObject to add to the cache.
:return: True if the DatabaseObject already exists in the cache, False otherwise.
"""
if self.exists(database_object):
return True
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
self.object_to_id[database_object.id] = database_object
return False
def extent(self, database_object_list: List[DatabaseObject]):
"""
adjacent to the extent method of list, this appends n Object
"""
for database_object in database_object_list:
self.append(database_object)
def remove(self, _id: str):
"""
Remove a DatabaseObject from the cache.
:param _id: The id of the DatabaseObject to remove from the cache.
"""
data = self.object_to_id.get(_id)
if data:
self.weakref_map.pop(weakref.ref(data))
self.object_to_id.pop(_id)
def __getitem__(self, item) -> Optional[DatabaseObject]:
"""
this returns the data obj
:param item: the id of the music object
:return:
"""
return self.object_to_id.get(item)
def get(self, _id: str) -> Optional[DatabaseObject]:
return self.__getitem__(_id)

View File

@ -121,6 +121,7 @@ class Musify(Page):
self.connection: Connection = Connection( self.connection: Connection = Connection(
host="https://musify.club/", host="https://musify.club/",
logger=self.LOGGER, logger=self.LOGGER,
module="musify",
) )
self.stream_connection: Connection = Connection( self.stream_connection: Connection = Connection(
@ -393,10 +394,11 @@ class Musify(Page):
return search_results return search_results
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
# https://musify.club/track/linkin-park-numb-210765 musify_url = parse_url(source.url)
r = self.connection.get(source.url)
r = self.connection.get(source.url, name="track_" + musify_url.name_with_id)
if r is None: if r is None:
return Song return Song()
soup = self.get_soup_from_response(r) soup = self.get_soup_from_response(r)
@ -672,7 +674,7 @@ class Musify(Page):
url = parse_url(source.url) url = parse_url(source.url)
endpoint = self.HOST + "/release/" + url.name_with_id endpoint = self.HOST + "/release/" + url.name_with_id
r = self.connection.get(endpoint) r = self.connection.get(endpoint, name=url.name_with_id)
if r is None: if r is None:
return Album() return Album()
@ -709,7 +711,7 @@ class Musify(Page):
:return: :return:
""" """
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent") r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id)
if r is None: if r is None:
return Artist() return Artist()
@ -1075,7 +1077,7 @@ class Musify(Page):
"SortOrder.Property": "dateCreated", "SortOrder.Property": "dateCreated",
"SortOrder.IsAscending": False, "SortOrder.IsAscending": False,
"X-Requested-With": "XMLHttpRequest" "X-Requested-With": "XMLHttpRequest"
}) }, name="discography_" + url.name_with_id)
if r is None: if r is None:
return [] return []
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")

View File

@ -51,14 +51,20 @@ def trace(msg: str):
if not DEBUG_TRACE: if not DEBUG_TRACE:
return return
output("trace: " + msg, BColors.OKBLUE) output(BColors.OKBLUE.value + "trace: " + BColors.ENDC.value + msg)
def request_trace(msg: str):
if not DEBUG_TRACE:
return
output(BColors.OKGREEN.value + "request: " + BColors.ENDC.value + msg)
def object_trace(obj): def object_trace(obj):
if not DEBUG_OBJECT_TRACE: if not DEBUG_OBJECT_TRACE:
return return
appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else "" appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else ""
output("object: " + str(obj) + appendix, BColors.GREY) output("object: " + str(obj) + appendix)
""" """

View File

@ -34,13 +34,15 @@ def unify(string: str) -> str:
return string.lower() return string.lower()
def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]: def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Union[str, Path]:
def fit_string(string: str) -> str: def fit_string(string: str) -> str:
nonlocal hidden_ok
if string == "/": if string == "/":
return "/" return "/"
string = string.strip() string = string.strip()
while string[0] == ".": while string[0] == "." and not hidden_ok:
if len(string) == 0: if len(string) == 0:
return string return string