added rate limiting

This commit is contained in:
Hazel Noack 2025-06-10 15:55:31 +02:00
parent bef2c214bc
commit 62a6bc387f

View File

@ -16,6 +16,7 @@ from hashlib import sha1
from pathlib import Path from pathlib import Path
import requests import requests
import uuid import uuid
import time
from . import __name__ from . import __name__
@ -52,6 +53,29 @@ temp_path.mkdir(exist_ok=True)
__assets__ = str(Path(Path(__file__).parent, "assets")) __assets__ = str(Path(Path(__file__).parent, "assets"))
REQUEST_DELAY = 3 # in seconds
ADDITIONAL_DELAY_PER_TRY = 1
last_request = 0
def get_request(session: requests.Session, url: str, attempt: int = 0) -> requests.Response:
global last_request, REQUEST_DELAY, ADDITIONAL_DELAY_PER_TRY
current_delay = REQUEST_DELAY + (ADDITIONAL_DELAY_PER_TRY * attempt)
elapsed_time = time.time() - last_request
to_wait = current_delay - elapsed_time
if to_wait > 0:
print(f"waiting {to_wait} at attempt {attempt}: {url}")
time.sleep(to_wait)
last_request = time.time()
resp = session.get(url, headers=headers)
if resp.status_code == 429:
return get_request(session, url, attempt=attempt + 1)
return resp
class Asset: class Asset:
""" """
- `content`: the `bytes` content of the image - `content`: the `bytes` content of the image
@ -104,7 +128,7 @@ class Asset:
return return
try: try:
r = self.session.get(self.url, headers=headers) r = get_request(self.session, self.url)
self.content = r.content self.content = r.content
temp.write_bytes(r.content) temp.write_bytes(r.content)
self.success = True self.success = True
@ -141,7 +165,7 @@ class ScribbleChapter:
) )
def load(self): def load(self):
resp = self.session.get(self.source_url, headers=headers) resp = get_request(self.session, self.source_url)
soup = BeautifulSoup(resp.text, "lxml") soup = BeautifulSoup(resp.text, "lxml")
if self.parent.disable_author_quotes: if self.parent.disable_author_quotes:
@ -153,7 +177,8 @@ class ScribbleChapter:
log.debug(f'Found language {tag["lang"]}') log.debug(f'Found language {tag["lang"]}')
self.parent.languages.append(tag["lang"]) self.parent.languages.append(tag["lang"])
self.title = soup.find(class_="chapter-title").text t = soup.find(class_="chapter-title")
self.title = t.text
log.info(f"{self.parent.title} Chapter {self.index}: {self.title}") log.info(f"{self.parent.title} Chapter {self.index}: {self.title}")
if not mimetypes.inited: if not mimetypes.inited:
@ -279,14 +304,14 @@ class ScribbleBook:
def load(self, limit_chapters: Optional[int] = None): def load(self, limit_chapters: Optional[int] = None):
self.load_metadata() self.load_metadata()
print(str(self)) print(f"{self.title} by {self.author}:")
self.fetch_chapters(limit=limit_chapters) self.fetch_chapters(limit=limit_chapters)
if limit_chapters is not None: if limit_chapters is not None:
self.chapters = self.chapters[:limit_chapters] self.chapters = self.chapters[:limit_chapters]
for chapter in self.chapters: for chapter in self.chapters:
print(str(chapter)) print(f"- {chapter.title}")
chapter.load() chapter.load()
def load_metadata(self) -> None: def load_metadata(self) -> None:
@ -300,10 +325,8 @@ class ScribbleBook:
self.slug = _parts[-1] self.slug = _parts[-1]
self.identifier = _parts[-2] self.identifier = _parts[-2]
html = self.session.get(self.source_url, headers=headers) resp = get_request(self.session, self.source_url)
soup = BeautifulSoup(resp.text, "lxml")
html = self.session.get(self.source_url)
soup = BeautifulSoup(html.text, "lxml")
for tag in soup.find_all(lambda x: x.has_attr("lang")): for tag in soup.find_all(lambda x: x.has_attr("lang")):
log.debug(f'Found language {tag["lang"]}') log.debug(f'Found language {tag["lang"]}')