feature/sponsorblock #17

Merged
Hazel merged 20 commits from feature/sponsorblock into experimental 2024-04-27 09:41:36 +00:00
8 changed files with 38 additions and 127 deletions
Showing only changes of commit 3d94e6e2dd - Show all commits

View File

@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__":
commands = [
"s: #a Ghost Bath",
"0",
"d: 1",
"s: #a Ruffiction",
"d: 8",
]

View File

@ -215,6 +215,9 @@ class Downloader:
return True
def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query:
# strip all the values in key_text
key_text = {key: value.strip() for key, value in key_text.items()}
song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True)
album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True)
artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True)

View File

@ -7,6 +7,7 @@ from functools import lru_cache
import logging
from ..utils.config import main_settings
from ..utils.string_processing import fit_to_file_system
@dataclass
@ -63,7 +64,7 @@ class Cache:
:return: the module path
"""
r = Path(self._dir, module)
r.mkdir(exist_ok=True)
r.mkdir(exist_ok=True, parents=True)
return r
def _write_index(self, indent: int = 4):
@ -122,13 +123,13 @@ class Cache:
)
self._write_attribute(cache_attribute)
cache_path = Path(module_path, name)
cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True)
with cache_path.open("wb") as content_file:
self.logger.debug(f"writing cache to {cache_path}")
content_file.write(content)
def get(self, name: str) -> Optional[bytes]:
path = Path(self._dir, self.module, name)
path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True)
if not path.is_file():
return None
@ -148,7 +149,7 @@ class Cache:
if ca.name == "":
continue
file = Path(self._dir, ca.module, ca.name)
file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True)
if not ca.is_valid:
self.logger.debug(f"deleting cache {ca.id}")

View File

@ -15,6 +15,7 @@ from tqdm import tqdm
from .cache import Cache
from .rotating import RotatingProxy
from ..objects import Target
from ..utils import request_trace
from ..utils.config import main_settings
from ..utils.support_classes.download_result import DownloadResult
from ..utils.hacking import merge_args
@ -148,6 +149,8 @@ class Connection:
exclude_headers: List[str] = None,
**kwargs
) -> Optional[requests.Response]:
trace_string = f"{method} {url} \t{'[stream]' if kwargs.get('stream', False) else ''}"
if method is None:
raise AttributeError("method is not set.")
method = method.upper()
@ -179,6 +182,8 @@ class Connection:
cached = self.cache.get(name)
if cached is not None:
request_trace(f"{trace_string}\t[cached]")
with responses.RequestsMock() as resp:
resp.add(
method=method,
@ -199,6 +204,9 @@ class Connection:
if header in headers:
del headers[header]
if try_count <= 0:
request_trace(trace_string)
r = None
connection_failed = False
try:

View File

@ -1,110 +0,0 @@
from collections import defaultdict
from typing import Dict, List, Optional
import weakref
from .parents import DatabaseObject
"""
This is a cache for the objects, that et pulled out of the database.
This is necessary, to not have duplicate objects with the same id.
Using a cache that maps the ojects to their id has multiple benefits:
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
- less ram usage
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
"""
class ObjectCache:
"""
ObjectCache is a cache for the objects retrieved from a database.
It maps each object to its id and uses weak references to manage its memory usage.
Using a cache for these objects provides several benefits:
- Modifying an object updates all objects with the same id (due to copy by reference)
- Reduced memory usage
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
:method exists: Check if a DatabaseObject already exists in the cache.
:method append: Add a DatabaseObject to the cache if it does not already exist.
:method extent: Add a list of DatabaseObjects to the cache.
:method remove: Remove a DatabaseObject from the cache by its id.
:method get: Retrieve a DatabaseObject from the cache by its id. """
object_to_id: Dict[str, DatabaseObject]
weakref_map: Dict[weakref.ref, str]
def __init__(self) -> None:
self.object_to_id = dict()
self.weakref_map = defaultdict()
def exists(self, database_object: DatabaseObject) -> bool:
"""
Check if a DatabaseObject with the same id already exists in the cache.
:param database_object: The DatabaseObject to check for.
:return: True if the DatabaseObject exists, False otherwise.
"""
if database_object.dynamic:
return True
return database_object.id in self.object_to_id
def on_death(self, weakref_: weakref.ref) -> None:
"""
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
This function removes the DatabaseObject from the cache.
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
"""
data_id = self.weakref_map.pop(weakref_)
self.object_to_id.pop(data_id)
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
return weakref.ref(database_object, self.on_death)
def append(self, database_object: DatabaseObject) -> bool:
"""
Add a DatabaseObject to the cache.
:param database_object: The DatabaseObject to add to the cache.
:return: True if the DatabaseObject already exists in the cache, False otherwise.
"""
if self.exists(database_object):
return True
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
self.object_to_id[database_object.id] = database_object
return False
def extent(self, database_object_list: List[DatabaseObject]):
"""
adjacent to the extent method of list, this appends n Object
"""
for database_object in database_object_list:
self.append(database_object)
def remove(self, _id: str):
"""
Remove a DatabaseObject from the cache.
:param _id: The id of the DatabaseObject to remove from the cache.
"""
data = self.object_to_id.get(_id)
if data:
self.weakref_map.pop(weakref.ref(data))
self.object_to_id.pop(_id)
def __getitem__(self, item) -> Optional[DatabaseObject]:
"""
this returns the data obj
:param item: the id of the music object
:return:
"""
return self.object_to_id.get(item)
def get(self, _id: str) -> Optional[DatabaseObject]:
return self.__getitem__(_id)

View File

@ -121,6 +121,7 @@ class Musify(Page):
self.connection: Connection = Connection(
host="https://musify.club/",
logger=self.LOGGER,
module="musify",
)
self.stream_connection: Connection = Connection(
@ -393,10 +394,11 @@ class Musify(Page):
return search_results
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
# https://musify.club/track/linkin-park-numb-210765
r = self.connection.get(source.url)
musify_url = parse_url(source.url)
r = self.connection.get(source.url, name="track_" + musify_url.name_with_id)
if r is None:
return Song
return Song()
soup = self.get_soup_from_response(r)
@ -672,7 +674,7 @@ class Musify(Page):
url = parse_url(source.url)
endpoint = self.HOST + "/release/" + url.name_with_id
r = self.connection.get(endpoint)
r = self.connection.get(endpoint, name=url.name_with_id)
if r is None:
return Album()
@ -709,7 +711,7 @@ class Musify(Page):
:return:
"""
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent")
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id)
if r is None:
return Artist()
@ -1075,7 +1077,7 @@ class Musify(Page):
"SortOrder.Property": "dateCreated",
"SortOrder.IsAscending": False,
"X-Requested-With": "XMLHttpRequest"
})
}, name="discography_" + url.name_with_id)
if r is None:
return []
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")

View File

@ -51,14 +51,20 @@ def trace(msg: str):
if not DEBUG_TRACE:
return
output("trace: " + msg, BColors.OKBLUE)
output(BColors.OKBLUE.value + "trace: " + BColors.ENDC.value + msg)
def request_trace(msg: str):
if not DEBUG_TRACE:
return
output(BColors.OKGREEN.value + "request: " + BColors.ENDC.value + msg)
def object_trace(obj):
if not DEBUG_OBJECT_TRACE:
return
appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else ""
output("object: " + str(obj) + appendix, BColors.GREY)
output("object: " + str(obj) + appendix)
"""

View File

@ -34,13 +34,15 @@ def unify(string: str) -> str:
return string.lower()
def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
def fit_to_file_system(string: Union[str, Path], hidden_ok: bool = False) -> Union[str, Path]:
def fit_string(string: str) -> str:
nonlocal hidden_ok
if string == "/":
return "/"
string = string.strip()
while string[0] == ".":
while string[0] == "." and not hidden_ok:
if len(string) == 0:
return string