added cache expiration

This commit is contained in:
Hazel Noack 2025-06-11 12:05:44 +02:00
parent 4c3527e702
commit 511401cd51

View File

@ -1,10 +1,11 @@
from typing import Optional
from codecs import encode from codecs import encode
from hashlib import sha1 from hashlib import sha1
from pathlib import Path from pathlib import Path
import requests import requests
import pickle import pickle
import sqlite3 import sqlite3
from datetime import datetime from datetime import datetime, timedelta
from . import CACHE_DIRECTORY from . import CACHE_DIRECTORY
@ -18,7 +19,7 @@ def _init_db():
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS url_cache ( CREATE TABLE IF NOT EXISTS url_cache (
url_hash TEXT PRIMARY KEY, url_hash TEXT PRIMARY KEY,
last_updated TIMESTAMP expires_at TIMESTAMP
) )
""") """)
conn.commit() conn.commit()
@ -36,16 +37,37 @@ def get_url_file(url: str) -> Path:
def has_cache(url: str) -> bool: def has_cache(url: str) -> bool:
cache_exists = get_url_file(url).exists() url_hash = get_url_hash(url)
if not cache_exists: cache_file = get_url_file(url)
if not cache_file.exists():
return False return False
# Check if the URL hash exists in the database # Check if the cache has expired
url_hash = get_url_hash(url)
with sqlite3.connect(DB_FILE) as conn: with sqlite3.connect(DB_FILE) as conn:
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute("SELECT 1 FROM url_cache WHERE url_hash = ?", (url_hash,)) cursor.execute(
return cursor.fetchone() is not None "SELECT expires_at FROM url_cache WHERE url_hash = ?",
(url_hash,)
)
result = cursor.fetchone()
if result is None:
return False # No expiration record exists
expires_at = datetime.fromisoformat(result[0])
if datetime.now() > expires_at:
# Cache expired, clean it up
cache_file.unlink(missing_ok=True)
cursor.execute(
"DELETE FROM url_cache WHERE url_hash = ?",
(url_hash,)
)
conn.commit()
return False
return True
def get_cache(url: str) -> requests.Response: def get_cache(url: str) -> requests.Response:
@ -53,9 +75,18 @@ def get_cache(url: str) -> requests.Response:
return pickle.load(cache_file) return pickle.load(cache_file)
def write_cache(url: str, resp: requests.Response): def write_cache(
url: str,
resp: requests.Response,
expires_after: Optional[timedelta] = None
):
url_hash = get_url_hash(url) url_hash = get_url_hash(url)
current_time = datetime.now().isoformat()
# Default expiration: 24 hours from now
if expires_after is None:
expires_after = timedelta(hours=1)
expires_at = datetime.now() + expires_after
# Write the cache file # Write the cache file
with get_url_file(url).open("wb") as url_file: with get_url_file(url).open("wb") as url_file:
@ -64,7 +95,7 @@ def write_cache(url: str, resp: requests.Response):
# Update the database # Update the database
with sqlite3.connect(DB_FILE) as conn: with sqlite3.connect(DB_FILE) as conn:
conn.execute( conn.execute(
"INSERT OR REPLACE INTO url_cache (url_hash, last_updated) VALUES (?, ?)", "INSERT OR REPLACE INTO url_cache (url_hash, expires_at) VALUES (?, ?)",
(url_hash, current_time) (url_hash, expires_at.isoformat())
) )
conn.commit() conn.commit()