feature/sponsorblock #17
@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
if __name__ == "__main__":
|
||||
commands = [
|
||||
"s: #a Ghost Bath",
|
||||
"0",
|
||||
"d: 1",
|
||||
"s: #a Ruffiction",
|
||||
"d: 8",
|
||||
]
|
||||
|
||||
|
||||
|
@ -215,6 +215,9 @@ class Downloader:
|
||||
return True
|
||||
|
||||
def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query:
|
||||
# strip all the values in key_text
|
||||
key_text = {key: value.strip() for key, value in key_text.items()}
|
||||
|
||||
song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True)
|
||||
album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True)
|
||||
artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True)
|
||||
|
@ -7,6 +7,7 @@ from functools import lru_cache
|
||||
import logging
|
||||
|
||||
from ..utils.config import main_settings
|
||||
from ..utils.string_processing import fit_to_file_system
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -63,7 +64,7 @@ class Cache:
|
||||
:return: the module path
|
||||
"""
|
||||
r = Path(self._dir, module)
|
||||
r.mkdir(exist_ok=True)
|
||||
r.mkdir(exist_ok=True, parents=True)
|
||||
return r
|
||||
|
||||
def _write_index(self, indent: int = 4):
|
||||
@ -122,13 +123,13 @@ class Cache:
|
||||
)
|
||||
self._write_attribute(cache_attribute)
|
||||
|
||||
cache_path = Path(module_path, name)
|
||||
cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True)
|
||||
with cache_path.open("wb") as content_file:
|
||||
self.logger.debug(f"writing cache to {cache_path}")
|
||||
content_file.write(content)
|
||||
|
||||
def get(self, name: str) -> Optional[bytes]:
|
||||
path = Path(self._dir, self.module, name)
|
||||
path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True)
|
||||
|
||||
if not path.is_file():
|
||||
return None
|
||||
@ -148,7 +149,7 @@ class Cache:
|
||||
if ca.name == "":
|
||||
continue
|
||||
|
||||
file = Path(self._dir, ca.module, ca.name)
|
||||
file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True)
|
||||
|
||||
if not ca.is_valid:
|
||||
self.logger.debug(f"deleting cache {ca.id}")
|
||||
|
@ -15,6 +15,7 @@ from tqdm import tqdm
|
||||
from .cache import Cache
|
||||
from .rotating import RotatingProxy
|
||||
from ..objects import Target
|
||||
from ..utils import request_trace
|
||||
from ..utils.config import main_settings
|
||||
from ..utils.support_classes.download_result import DownloadResult
|
||||
from ..utils.hacking import merge_args
|
||||
@ -148,6 +149,8 @@ class Connection:
|
||||
exclude_headers: List[str] = None,
|
||||
**kwargs
|
||||
) -> Optional[requests.Response]:
|
||||
trace_string = f"{method} {url} \t{'[stream]' if kwargs.get('stream', False) else ''}"
|
||||
|
||||
if method is None:
|
||||
raise AttributeError("method is not set.")
|
||||
method = method.upper()
|
||||
@ -179,6 +182,8 @@ class Connection:
|
||||
cached = self.cache.get(name)
|
||||
|
||||
if cached is not None:
|
||||
request_trace(f"{trace_string}\t[cached]")
|
||||
|
||||
with responses.RequestsMock() as resp:
|
||||
resp.add(
|
||||
method=method,
|
||||
@ -199,6 +204,9 @@ class Connection:
|
||||
if header in headers:
|
||||
del headers[header]
|
||||
|
||||
if try_count <= 0:
|
||||
request_trace(trace_string)
|
||||
|
||||
r = None
|
||||
connection_failed = False
|
||||
try:
|
||||
|
@ -1,110 +0,0 @@
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Optional
|
||||
import weakref
|
||||
|
||||
from .parents import DatabaseObject
|
||||
|
||||
"""
|
||||
This is a cache for the objects, that et pulled out of the database.
|
||||
This is necessary, to not have duplicate objects with the same id.
|
||||
|
||||
Using a cache that maps the ojects to their id has multiple benefits:
|
||||
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
|
||||
- less ram usage
|
||||
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
|
||||
"""
|
||||
|
||||
|
||||
class ObjectCache:
|
||||
"""
|
||||
ObjectCache is a cache for the objects retrieved from a database.
|
||||
It maps each object to its id and uses weak references to manage its memory usage.
|
||||
Using a cache for these objects provides several benefits:
|
||||
|
||||
- Modifying an object updates all objects with the same id (due to copy by reference)
|
||||
- Reduced memory usage
|
||||
|
||||
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
|
||||
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
|
||||
|
||||
:method exists: Check if a DatabaseObject already exists in the cache.
|
||||
:method append: Add a DatabaseObject to the cache if it does not already exist.
|
||||
:method extent: Add a list of DatabaseObjects to the cache.
|
||||
:method remove: Remove a DatabaseObject from the cache by its id.
|
||||
:method get: Retrieve a DatabaseObject from the cache by its id. """
|
||||
object_to_id: Dict[str, DatabaseObject]
|
||||
weakref_map: Dict[weakref.ref, str]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.object_to_id = dict()
|
||||
self.weakref_map = defaultdict()
|
||||
|
||||
def exists(self, database_object: DatabaseObject) -> bool:
|
||||
"""
|
||||
Check if a DatabaseObject with the same id already exists in the cache.
|
||||
|
||||
:param database_object: The DatabaseObject to check for.
|
||||
:return: True if the DatabaseObject exists, False otherwise.
|
||||
"""
|
||||
if database_object.dynamic:
|
||||
return True
|
||||
return database_object.id in self.object_to_id
|
||||
|
||||
def on_death(self, weakref_: weakref.ref) -> None:
|
||||
"""
|
||||
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
|
||||
This function removes the DatabaseObject from the cache.
|
||||
|
||||
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
|
||||
"""
|
||||
data_id = self.weakref_map.pop(weakref_)
|
||||
self.object_to_id.pop(data_id)
|
||||
|
||||
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
|
||||
return weakref.ref(database_object, self.on_death)
|
||||
|
||||
|
||||
def append(self, database_object: DatabaseObject) -> bool:
|
||||
"""
|
||||
Add a DatabaseObject to the cache.
|
||||
|
||||
:param database_object: The DatabaseObject to add to the cache.
|
||||
:return: True if the DatabaseObject already exists in the cache, False otherwise.
|
||||
"""
|
||||
if self.exists(database_object):
|
||||
return True
|
||||
|
||||
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
|
||||
self.object_to_id[database_object.id] = database_object
|
||||
|
||||
return False
|
||||
|
||||
def extent(self, database_object_list: List[DatabaseObject]):
|
||||
"""
|
||||
adjacent to the extent method of list, this appends n Object
|
||||
"""
|
||||
for database_object in database_object_list:
|
||||
self.append(database_object)
|
||||
|
||||
def remove(self, _id: str):
|
||||
"""
|
||||
Remove a DatabaseObject from the cache.
|
||||
|
||||
:param _id: The id of the DatabaseObject to remove from the cache.
|
||||
"""
|
||||
data = self.object_to_id.get(_id)
|
||||
if data:
|
||||
self.weakref_map.pop(weakref.ref(data))
|
||||
self.object_to_id.pop(_id)
|
||||
|
||||
def __getitem__(self, item) -> Optional[DatabaseObject]:
|
||||
"""
|
||||
this returns the data obj
|
||||
:param item: the id of the music object
|
||||
:return:
|
||||
"""
|
||||
|
||||
return self.object_to_id.get(item)
|
||||
|
||||
def get(self, _id: str) -> Optional[DatabaseObject]:
|
||||
return self.__getitem__(_id)
|
@ -121,6 +121,7 @@ class Musify(Page):
|
||||
self.connection: Connection = Connection(
|
||||
host="https://musify.club/",
|
||||
logger=self.LOGGER,
|
||||
module="musify",
|
||||
)
|
||||
|
||||
self.stream_connection: Connection = Connection(
|
||||
@ -393,10 +394,11 @@ class Musify(Page):
|
||||
return search_results
|
||||
|
||||
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
|
||||
# https://musify.club/track/linkin-park-numb-210765
|
||||
r = self.connection.get(source.url)
|
||||
musify_url = parse_url(source.url)
|
||||
|
||||
r = self.connection.get(source.url, name="track_" + musify_url.name_with_id)
|
||||
if r is None:
|
||||
return Song
|
||||
return Song()
|
||||
|
||||
soup = self.get_soup_from_response(r)
|
||||
|
||||
@ -672,7 +674,7 @@ class Musify(Page):
|
||||
url = parse_url(source.url)
|
||||
|
||||
endpoint = self.HOST + "/release/" + url.name_with_id
|
||||
r = self.connection.get(endpoint)
|
||||
r = self.connection.get(endpoint, name=url.name_with_id)
|
||||
if r is None:
|
||||
return Album()
|
||||
|
||||
@ -709,7 +711,7 @@ class Musify(Page):
|
||||
:return:
|
||||
"""
|
||||
|
||||
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent")
|
||||
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id)
|
||||
if r is None:
|
||||
return Artist()
|
||||
|
||||
@ -1075,7 +1077,7 @@ class Musify(Page):
|
||||
"SortOrder.Property": "dateCreated",
|
||||
"SortOrder.IsAscending": False,
|
||||
"X-Requested-With": "XMLHttpRequest"
|
||||
})
|
||||
}, name="discography_" + url.name_with_id)
|
||||
if r is None:
|
||||
return []
|
||||
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
|
||||
|
@ -51,14 +51,20 @@ def trace(msg: str):
|
||||
if not DEBUG_TRACE:
|
||||
return
|
||||
|
||||
output("trace: " + msg, BColors.OKBLUE)
|
||||
output(BColors.OKBLUE.value + "trace: " + BColors.ENDC.value + msg)
|
||||
|
||||
def request_trace(msg: str):
|
||||
if not DEBUG_TRACE:
|
||||
return
|
||||
|
||||
output(BColors.OKGREEN.value + "request: " + BColors.ENDC.value + msg)
|
||||
|
||||
def object_trace(obj):
|
||||
if not DEBUG_OBJECT_TRACE:
|
||||
return
|
||||
|
||||
appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else ""
|
||||
output("object: " + str(obj) + appendix, BColors.GREY)
|
||||
output("object: " + str(obj) + appendix)
|
||||
|
||||
|
||||
"""
|
||||
|
@ -34,13 +34,15 @@ def unify(string: str) -> str:
|
||||
return string.lower()
|
||||
|
||||
|
||||
def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
|
||||
def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Union[str, Path]:
|
||||
def fit_string(string: str) -> str:
|
||||
nonlocal hidden_ok
|
||||
|
||||
if string == "/":
|
||||
return "/"
|
||||
string = string.strip()
|
||||
|
||||
while string[0] == ".":
|
||||
while string[0] == "." and not hidden_ok:
|
||||
if len(string) == 0:
|
||||
return string
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user