feat: clean and clear methods for the cache

This commit is contained in:
Hazel 2024-01-19 18:45:12 +01:00
parent 031f274d69
commit 2d4ba50b57
5 changed files with 80 additions and 22 deletions

View File

@ -2,7 +2,5 @@
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
<mapping directory="$PROJECT_DIR$/../forks/sponsorblock.py" vcs="Git" />
<mapping directory="$PROJECT_DIR$/../rythmbox-id3-lyrics-support" vcs="Git" />
</component>
</project>

View File

@ -1,4 +1,4 @@
requests~=2.28.1
requests~=2.31.0
mutagen~=1.46.0
musicbrainzngs~=0.7.1
jellyfish~=0.9.0
@ -20,4 +20,5 @@ pathvalidate~=2.5.2
guppy3~=3.1.3
toml~=0.10.2
typing_extensions~=4.7.1
typing_extensions~=4.7.1
responses~=0.24.1

View File

@ -64,6 +64,18 @@ class Cache:
r.mkdir(exist_ok=True)
return r
def _write_index(self, indent: int = 4):
_json = []
for c in self.cached_attributes:
d = c.__dict__
for key in self._time_fields:
d[key] = d[key].isoformat()
_json.append(d)
with self.index.open("w") as f:
f.write(json.dumps(_json, indent=indent))
def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool:
existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id)
if existing_attribute is not None:
@ -80,16 +92,7 @@ class Cache:
self._id_to_attribute[cached_attribute.id] = cached_attribute
if write:
_json = []
for c in self.cached_attributes:
d = c.__dict__
for key in self._time_fields:
d[key] = d[key].isoformat()
_json.append(d)
with self.index.open("w") as f:
f.write(json.dumps(_json, indent=4))
self._write_index()
return True
@ -132,3 +135,59 @@ class Cache:
with path.open("rb") as f:
return f.read()
def clean(self):
keep = set()
for ca in self.cached_attributes.copy():
file = Path(self._dir, ca.module, ca.name)
if not ca.is_valid:
self.logger.debug(f"deleting cache {ca.id}")
file.unlink()
self.cached_attributes.remove(ca)
del self._id_to_attribute[ca.id]
else:
keep.add(file)
# iterate through every module (folder)
for module_path in self._dir.iterdir():
if not module_path.is_dir():
continue
# delete all files not in keep
for path in module_path.iterdir():
if path not in keep:
self.logger.info(f"Deleting cache {path}")
path.unlink()
# delete all empty directories
for path in module_path.iterdir():
if path.is_dir() and not list(path.iterdir()):
self.logger.debug(f"Deleting cache directory {path}")
path.rmdir()
self._write_index()
def clear(self):
"""
delete every file in the cache directory
:return:
"""
for path in self._dir.iterdir():
if path.is_dir():
for file in path.iterdir():
file.unlink()
path.rmdir()
else:
path.unlink()
self.cached_attributes.clear()
self._id_to_attribute.clear()
self._write_index()
def __repr__(self):
return f"<Cache {self.module}>"

View File

@ -1,19 +1,18 @@
import time
from typing import List, Dict, Callable, Optional, Set
from urllib.parse import urlparse, urlunsplit, ParseResult
import logging
import threading
import time
from typing import List, Dict, Optional, Set
from urllib.parse import urlparse, urlunsplit, ParseResult
import requests
import responses
from responses import matchers
from tqdm import tqdm
from .rotating import RotatingProxy
from .cache import Cache
from .rotating import RotatingProxy
from ..objects import Target
from ..utils.config import main_settings
from ..utils.support_classes.download_result import DownloadResult
from ..objects import Target
class Connection:

View File

@ -53,7 +53,8 @@ class Bandcamp(Page):
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://bandcamp.com/",
logger=self.LOGGER
logger=self.LOGGER,
module="bandcamp",
)
super().__init__(*args, **kwargs)