feat: fixed cache plus concisten cache throughout musify
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
parent
a015b8918e
commit
3d94e6e2dd
@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG)
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
commands = [
|
commands = [
|
||||||
"s: #a Ghost Bath",
|
"s: #a Ruffiction",
|
||||||
"0",
|
"d: 8",
|
||||||
"d: 1",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -215,6 +215,9 @@ class Downloader:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query:
|
def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query:
|
||||||
|
# strip all the values in key_text
|
||||||
|
key_text = {key: value.strip() for key, value in key_text.items()}
|
||||||
|
|
||||||
song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True)
|
song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True)
|
||||||
album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True)
|
album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True)
|
||||||
artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True)
|
artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True)
|
||||||
|
@ -7,6 +7,7 @@ from functools import lru_cache
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from ..utils.config import main_settings
|
from ..utils.config import main_settings
|
||||||
|
from ..utils.string_processing import fit_to_file_system
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -63,7 +64,7 @@ class Cache:
|
|||||||
:return: the module path
|
:return: the module path
|
||||||
"""
|
"""
|
||||||
r = Path(self._dir, module)
|
r = Path(self._dir, module)
|
||||||
r.mkdir(exist_ok=True)
|
r.mkdir(exist_ok=True, parents=True)
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def _write_index(self, indent: int = 4):
|
def _write_index(self, indent: int = 4):
|
||||||
@ -122,13 +123,13 @@ class Cache:
|
|||||||
)
|
)
|
||||||
self._write_attribute(cache_attribute)
|
self._write_attribute(cache_attribute)
|
||||||
|
|
||||||
cache_path = Path(module_path, name)
|
cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True)
|
||||||
with cache_path.open("wb") as content_file:
|
with cache_path.open("wb") as content_file:
|
||||||
self.logger.debug(f"writing cache to {cache_path}")
|
self.logger.debug(f"writing cache to {cache_path}")
|
||||||
content_file.write(content)
|
content_file.write(content)
|
||||||
|
|
||||||
def get(self, name: str) -> Optional[bytes]:
|
def get(self, name: str) -> Optional[bytes]:
|
||||||
path = Path(self._dir, self.module, name)
|
path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True)
|
||||||
|
|
||||||
if not path.is_file():
|
if not path.is_file():
|
||||||
return None
|
return None
|
||||||
@ -148,7 +149,7 @@ class Cache:
|
|||||||
if ca.name == "":
|
if ca.name == "":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
file = Path(self._dir, ca.module, ca.name)
|
file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True)
|
||||||
|
|
||||||
if not ca.is_valid:
|
if not ca.is_valid:
|
||||||
self.logger.debug(f"deleting cache {ca.id}")
|
self.logger.debug(f"deleting cache {ca.id}")
|
||||||
|
@ -15,6 +15,7 @@ from tqdm import tqdm
|
|||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .rotating import RotatingProxy
|
from .rotating import RotatingProxy
|
||||||
from ..objects import Target
|
from ..objects import Target
|
||||||
|
from ..utils import request_trace
|
||||||
from ..utils.config import main_settings
|
from ..utils.config import main_settings
|
||||||
from ..utils.support_classes.download_result import DownloadResult
|
from ..utils.support_classes.download_result import DownloadResult
|
||||||
from ..utils.hacking import merge_args
|
from ..utils.hacking import merge_args
|
||||||
@ -148,6 +149,8 @@ class Connection:
|
|||||||
exclude_headers: List[str] = None,
|
exclude_headers: List[str] = None,
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> Optional[requests.Response]:
|
) -> Optional[requests.Response]:
|
||||||
|
trace_string = f"{method} {url} \t{'[stream]' if kwargs.get('stream', False) else ''}"
|
||||||
|
|
||||||
if method is None:
|
if method is None:
|
||||||
raise AttributeError("method is not set.")
|
raise AttributeError("method is not set.")
|
||||||
method = method.upper()
|
method = method.upper()
|
||||||
@ -179,6 +182,8 @@ class Connection:
|
|||||||
cached = self.cache.get(name)
|
cached = self.cache.get(name)
|
||||||
|
|
||||||
if cached is not None:
|
if cached is not None:
|
||||||
|
request_trace(f"{trace_string}\t[cached]")
|
||||||
|
|
||||||
with responses.RequestsMock() as resp:
|
with responses.RequestsMock() as resp:
|
||||||
resp.add(
|
resp.add(
|
||||||
method=method,
|
method=method,
|
||||||
@ -199,6 +204,9 @@ class Connection:
|
|||||||
if header in headers:
|
if header in headers:
|
||||||
del headers[header]
|
del headers[header]
|
||||||
|
|
||||||
|
if try_count <= 0:
|
||||||
|
request_trace(trace_string)
|
||||||
|
|
||||||
r = None
|
r = None
|
||||||
connection_failed = False
|
connection_failed = False
|
||||||
try:
|
try:
|
||||||
|
@ -1,110 +0,0 @@
|
|||||||
from collections import defaultdict
|
|
||||||
from typing import Dict, List, Optional
|
|
||||||
import weakref
|
|
||||||
|
|
||||||
from .parents import DatabaseObject
|
|
||||||
|
|
||||||
"""
|
|
||||||
This is a cache for the objects, that et pulled out of the database.
|
|
||||||
This is necessary, to not have duplicate objects with the same id.
|
|
||||||
|
|
||||||
Using a cache that maps the ojects to their id has multiple benefits:
|
|
||||||
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
|
|
||||||
- less ram usage
|
|
||||||
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class ObjectCache:
|
|
||||||
"""
|
|
||||||
ObjectCache is a cache for the objects retrieved from a database.
|
|
||||||
It maps each object to its id and uses weak references to manage its memory usage.
|
|
||||||
Using a cache for these objects provides several benefits:
|
|
||||||
|
|
||||||
- Modifying an object updates all objects with the same id (due to copy by reference)
|
|
||||||
- Reduced memory usage
|
|
||||||
|
|
||||||
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
|
|
||||||
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
|
|
||||||
|
|
||||||
:method exists: Check if a DatabaseObject already exists in the cache.
|
|
||||||
:method append: Add a DatabaseObject to the cache if it does not already exist.
|
|
||||||
:method extent: Add a list of DatabaseObjects to the cache.
|
|
||||||
:method remove: Remove a DatabaseObject from the cache by its id.
|
|
||||||
:method get: Retrieve a DatabaseObject from the cache by its id. """
|
|
||||||
object_to_id: Dict[str, DatabaseObject]
|
|
||||||
weakref_map: Dict[weakref.ref, str]
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.object_to_id = dict()
|
|
||||||
self.weakref_map = defaultdict()
|
|
||||||
|
|
||||||
def exists(self, database_object: DatabaseObject) -> bool:
|
|
||||||
"""
|
|
||||||
Check if a DatabaseObject with the same id already exists in the cache.
|
|
||||||
|
|
||||||
:param database_object: The DatabaseObject to check for.
|
|
||||||
:return: True if the DatabaseObject exists, False otherwise.
|
|
||||||
"""
|
|
||||||
if database_object.dynamic:
|
|
||||||
return True
|
|
||||||
return database_object.id in self.object_to_id
|
|
||||||
|
|
||||||
def on_death(self, weakref_: weakref.ref) -> None:
|
|
||||||
"""
|
|
||||||
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
|
|
||||||
This function removes the DatabaseObject from the cache.
|
|
||||||
|
|
||||||
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
|
|
||||||
"""
|
|
||||||
data_id = self.weakref_map.pop(weakref_)
|
|
||||||
self.object_to_id.pop(data_id)
|
|
||||||
|
|
||||||
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
|
|
||||||
return weakref.ref(database_object, self.on_death)
|
|
||||||
|
|
||||||
|
|
||||||
def append(self, database_object: DatabaseObject) -> bool:
|
|
||||||
"""
|
|
||||||
Add a DatabaseObject to the cache.
|
|
||||||
|
|
||||||
:param database_object: The DatabaseObject to add to the cache.
|
|
||||||
:return: True if the DatabaseObject already exists in the cache, False otherwise.
|
|
||||||
"""
|
|
||||||
if self.exists(database_object):
|
|
||||||
return True
|
|
||||||
|
|
||||||
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
|
|
||||||
self.object_to_id[database_object.id] = database_object
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def extent(self, database_object_list: List[DatabaseObject]):
|
|
||||||
"""
|
|
||||||
adjacent to the extent method of list, this appends n Object
|
|
||||||
"""
|
|
||||||
for database_object in database_object_list:
|
|
||||||
self.append(database_object)
|
|
||||||
|
|
||||||
def remove(self, _id: str):
|
|
||||||
"""
|
|
||||||
Remove a DatabaseObject from the cache.
|
|
||||||
|
|
||||||
:param _id: The id of the DatabaseObject to remove from the cache.
|
|
||||||
"""
|
|
||||||
data = self.object_to_id.get(_id)
|
|
||||||
if data:
|
|
||||||
self.weakref_map.pop(weakref.ref(data))
|
|
||||||
self.object_to_id.pop(_id)
|
|
||||||
|
|
||||||
def __getitem__(self, item) -> Optional[DatabaseObject]:
|
|
||||||
"""
|
|
||||||
this returns the data obj
|
|
||||||
:param item: the id of the music object
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
|
|
||||||
return self.object_to_id.get(item)
|
|
||||||
|
|
||||||
def get(self, _id: str) -> Optional[DatabaseObject]:
|
|
||||||
return self.__getitem__(_id)
|
|
@ -121,6 +121,7 @@ class Musify(Page):
|
|||||||
self.connection: Connection = Connection(
|
self.connection: Connection = Connection(
|
||||||
host="https://musify.club/",
|
host="https://musify.club/",
|
||||||
logger=self.LOGGER,
|
logger=self.LOGGER,
|
||||||
|
module="musify",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.stream_connection: Connection = Connection(
|
self.stream_connection: Connection = Connection(
|
||||||
@ -393,10 +394,11 @@ class Musify(Page):
|
|||||||
return search_results
|
return search_results
|
||||||
|
|
||||||
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
|
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
|
||||||
# https://musify.club/track/linkin-park-numb-210765
|
musify_url = parse_url(source.url)
|
||||||
r = self.connection.get(source.url)
|
|
||||||
|
r = self.connection.get(source.url, name="track_" + musify_url.name_with_id)
|
||||||
if r is None:
|
if r is None:
|
||||||
return Song
|
return Song()
|
||||||
|
|
||||||
soup = self.get_soup_from_response(r)
|
soup = self.get_soup_from_response(r)
|
||||||
|
|
||||||
@ -672,7 +674,7 @@ class Musify(Page):
|
|||||||
url = parse_url(source.url)
|
url = parse_url(source.url)
|
||||||
|
|
||||||
endpoint = self.HOST + "/release/" + url.name_with_id
|
endpoint = self.HOST + "/release/" + url.name_with_id
|
||||||
r = self.connection.get(endpoint)
|
r = self.connection.get(endpoint, name=url.name_with_id)
|
||||||
if r is None:
|
if r is None:
|
||||||
return Album()
|
return Album()
|
||||||
|
|
||||||
@ -709,7 +711,7 @@ class Musify(Page):
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent")
|
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id)
|
||||||
if r is None:
|
if r is None:
|
||||||
return Artist()
|
return Artist()
|
||||||
|
|
||||||
@ -1075,7 +1077,7 @@ class Musify(Page):
|
|||||||
"SortOrder.Property": "dateCreated",
|
"SortOrder.Property": "dateCreated",
|
||||||
"SortOrder.IsAscending": False,
|
"SortOrder.IsAscending": False,
|
||||||
"X-Requested-With": "XMLHttpRequest"
|
"X-Requested-With": "XMLHttpRequest"
|
||||||
})
|
}, name="discography_" + url.name_with_id)
|
||||||
if r is None:
|
if r is None:
|
||||||
return []
|
return []
|
||||||
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
|
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
|
||||||
|
@ -51,14 +51,20 @@ def trace(msg: str):
|
|||||||
if not DEBUG_TRACE:
|
if not DEBUG_TRACE:
|
||||||
return
|
return
|
||||||
|
|
||||||
output("trace: " + msg, BColors.OKBLUE)
|
output(BColors.OKBLUE.value + "trace: " + BColors.ENDC.value + msg)
|
||||||
|
|
||||||
|
def request_trace(msg: str):
|
||||||
|
if not DEBUG_TRACE:
|
||||||
|
return
|
||||||
|
|
||||||
|
output(BColors.OKGREEN.value + "request: " + BColors.ENDC.value + msg)
|
||||||
|
|
||||||
def object_trace(obj):
|
def object_trace(obj):
|
||||||
if not DEBUG_OBJECT_TRACE:
|
if not DEBUG_OBJECT_TRACE:
|
||||||
return
|
return
|
||||||
|
|
||||||
appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else ""
|
appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else ""
|
||||||
output("object: " + str(obj) + appendix, BColors.GREY)
|
output("object: " + str(obj) + appendix)
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -34,13 +34,15 @@ def unify(string: str) -> str:
|
|||||||
return string.lower()
|
return string.lower()
|
||||||
|
|
||||||
|
|
||||||
def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
|
def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Union[str, Path]:
|
||||||
def fit_string(string: str) -> str:
|
def fit_string(string: str) -> str:
|
||||||
|
nonlocal hidden_ok
|
||||||
|
|
||||||
if string == "/":
|
if string == "/":
|
||||||
return "/"
|
return "/"
|
||||||
string = string.strip()
|
string = string.strip()
|
||||||
|
|
||||||
while string[0] == ".":
|
while string[0] == "." and not hidden_ok:
|
||||||
if len(string) == 0:
|
if len(string) == 0:
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user