generated from Hazel/python-project
added rate limiting
This commit is contained in:
parent
bef2c214bc
commit
62a6bc387f
@ -16,6 +16,7 @@ from hashlib import sha1
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import requests
|
import requests
|
||||||
import uuid
|
import uuid
|
||||||
|
import time
|
||||||
|
|
||||||
from . import __name__
|
from . import __name__
|
||||||
|
|
||||||
@ -52,6 +53,29 @@ temp_path.mkdir(exist_ok=True)
|
|||||||
__assets__ = str(Path(Path(__file__).parent, "assets"))
|
__assets__ = str(Path(Path(__file__).parent, "assets"))
|
||||||
|
|
||||||
|
|
||||||
|
REQUEST_DELAY = 3 # in seconds
|
||||||
|
ADDITIONAL_DELAY_PER_TRY = 1
|
||||||
|
last_request = 0
|
||||||
|
def get_request(session: requests.Session, url: str, attempt: int = 0) -> requests.Response:
|
||||||
|
global last_request, REQUEST_DELAY, ADDITIONAL_DELAY_PER_TRY
|
||||||
|
|
||||||
|
current_delay = REQUEST_DELAY + (ADDITIONAL_DELAY_PER_TRY * attempt)
|
||||||
|
elapsed_time = time.time() - last_request
|
||||||
|
to_wait = current_delay - elapsed_time
|
||||||
|
|
||||||
|
if to_wait > 0:
|
||||||
|
print(f"waiting {to_wait} at attempt {attempt}: {url}")
|
||||||
|
time.sleep(to_wait)
|
||||||
|
|
||||||
|
last_request = time.time()
|
||||||
|
resp = session.get(url, headers=headers)
|
||||||
|
|
||||||
|
if resp.status_code == 429:
|
||||||
|
return get_request(session, url, attempt=attempt + 1)
|
||||||
|
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
class Asset:
|
class Asset:
|
||||||
"""
|
"""
|
||||||
- `content`: the `bytes` content of the image
|
- `content`: the `bytes` content of the image
|
||||||
@ -104,7 +128,7 @@ class Asset:
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
r = self.session.get(self.url, headers=headers)
|
r = get_request(self.session, self.url)
|
||||||
self.content = r.content
|
self.content = r.content
|
||||||
temp.write_bytes(r.content)
|
temp.write_bytes(r.content)
|
||||||
self.success = True
|
self.success = True
|
||||||
@ -141,7 +165,7 @@ class ScribbleChapter:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
resp = self.session.get(self.source_url, headers=headers)
|
resp = get_request(self.session, self.source_url)
|
||||||
soup = BeautifulSoup(resp.text, "lxml")
|
soup = BeautifulSoup(resp.text, "lxml")
|
||||||
|
|
||||||
if self.parent.disable_author_quotes:
|
if self.parent.disable_author_quotes:
|
||||||
@ -153,7 +177,8 @@ class ScribbleChapter:
|
|||||||
log.debug(f'Found language {tag["lang"]}')
|
log.debug(f'Found language {tag["lang"]}')
|
||||||
self.parent.languages.append(tag["lang"])
|
self.parent.languages.append(tag["lang"])
|
||||||
|
|
||||||
self.title = soup.find(class_="chapter-title").text
|
t = soup.find(class_="chapter-title")
|
||||||
|
self.title = t.text
|
||||||
log.info(f"{self.parent.title} Chapter {self.index}: {self.title}")
|
log.info(f"{self.parent.title} Chapter {self.index}: {self.title}")
|
||||||
|
|
||||||
if not mimetypes.inited:
|
if not mimetypes.inited:
|
||||||
@ -279,14 +304,14 @@ class ScribbleBook:
|
|||||||
|
|
||||||
def load(self, limit_chapters: Optional[int] = None):
|
def load(self, limit_chapters: Optional[int] = None):
|
||||||
self.load_metadata()
|
self.load_metadata()
|
||||||
print(str(self))
|
print(f"{self.title} by {self.author}:")
|
||||||
|
|
||||||
self.fetch_chapters(limit=limit_chapters)
|
self.fetch_chapters(limit=limit_chapters)
|
||||||
if limit_chapters is not None:
|
if limit_chapters is not None:
|
||||||
self.chapters = self.chapters[:limit_chapters]
|
self.chapters = self.chapters[:limit_chapters]
|
||||||
|
|
||||||
for chapter in self.chapters:
|
for chapter in self.chapters:
|
||||||
print(str(chapter))
|
print(f"- {chapter.title}")
|
||||||
chapter.load()
|
chapter.load()
|
||||||
|
|
||||||
def load_metadata(self) -> None:
|
def load_metadata(self) -> None:
|
||||||
@ -300,10 +325,8 @@ class ScribbleBook:
|
|||||||
self.slug = _parts[-1]
|
self.slug = _parts[-1]
|
||||||
self.identifier = _parts[-2]
|
self.identifier = _parts[-2]
|
||||||
|
|
||||||
html = self.session.get(self.source_url, headers=headers)
|
resp = get_request(self.session, self.source_url)
|
||||||
|
soup = BeautifulSoup(resp.text, "lxml")
|
||||||
html = self.session.get(self.source_url)
|
|
||||||
soup = BeautifulSoup(html.text, "lxml")
|
|
||||||
|
|
||||||
for tag in soup.find_all(lambda x: x.has_attr("lang")):
|
for tag in soup.find_all(lambda x: x.has_attr("lang")):
|
||||||
log.debug(f'Found language {tag["lang"]}')
|
log.debug(f'Found language {tag["lang"]}')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user