2 changed files with 60 additions and 23 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -2,7 +2,7 @@
 name = "scribble_to_epub"
 dependencies = [
    'beautifulsoup4~=4.13.4',
-    'easy-requests~=0.0.0',
+    'requests',
    'EbookLib~=0.19',
    'arrow~=1.3.0',
    'ftfy~=6.3.1',
--- a/scribble_to_epub/scribblehub.py
+++ b/scribble_to_epub/scribblehub.py
@ -14,8 +14,9 @@ import math
 from codecs import encode
 from hashlib import sha1
 from pathlib import Path
+import requests
 import uuid
-from python_requests import Connection, set_cache_directory
+import time

 from . import __name__

@ -35,9 +36,10 @@ requests_log.setLevel(logging.DEBUG)
 requests_log.propagate = True
 """

-set_cache_directory(Path("/tmp", __name__))
+
 log = logging.getLogger(__name__)

+headers = {"User-Agent": "node"}

 CHAPTER_MATCH = re.compile(
    r"(?P<url_root>.*)/read/(?P<story_id>\d*)-(?P<slug>.*?)/chapter/(?P<chapter_id>\d*)"
@ -45,9 +47,33 @@ CHAPTER_MATCH = re.compile(
 STORY_MATCH = re.compile(r"(?P<url_root>.*)/series/(?P<story_id>\d*)/(?P<slug>[a-z-]*)")
 DATE_MATCH = re.compile("Last updated: .*")

+temp_path = Path("/tmp", __name__)
+temp_path.mkdir(exist_ok=True)
+
 __assets__ = str(Path(Path(__file__).parent, "assets"))


+REQUEST_DELAY = 3   # in seconds
+ADDITIONAL_DELAY_PER_TRY = 1
+last_request = 0
+def get_request(session: requests.Session, url: str, attempt: int = 0) -> requests.Response:
+    global last_request, REQUEST_DELAY, ADDITIONAL_DELAY_PER_TRY
+    
+    current_delay = REQUEST_DELAY + (ADDITIONAL_DELAY_PER_TRY * attempt)
+    elapsed_time = time.time() - last_request
+    to_wait = current_delay - elapsed_time
+
+    if to_wait > 0:
+        log.info(f"waiting {to_wait} at attempt {attempt}: {url}")
+        time.sleep(to_wait)
+
+    last_request = time.time()
+    resp = session.get(url, headers=headers)
+
+    if resp.status_code == 429:
+        return get_request(session, url, attempt=attempt + 1)
+    
+    return resp


 class Asset:
@ -87,13 +113,30 @@ class Asset:
    def relpath(self) -> str:
        return f"static/{self.filename}"

-    def __init__(self, url: str, connection: Optional[Connection] = None):
+    def __init__(self, url: str, session: Optional[requests.Session] = None):
        self.url = url
-        self.connection = connection or Connection()
+        self.session = session or requests.Session()
+
+        self.fetch()
+
+    def fetch(self):
+        temp = Path(temp_path, self.filename)
+
+        if temp.exists():
+            self.content = temp.read_bytes()
+            self.success = True
+            return
+        
+        try:
+            r = get_request(self.session, self.url)
+            self.content = r.content
+            temp.write_bytes(r.content)
+            self.success = True
+        except requests.HTTPError as e:
+            log.warning(
+                f'Issue fetching asset {self.url} because "{e.response.status_code}: {e.response.reason}"'
+            )

-        resp = self.connection.get(self.url)
-        self.content = resp.content
-        self.success = True


 class ScribbleChapter:
@ -104,11 +147,11 @@ class ScribbleChapter:
    text: str   # HTML content of chapter
    date: arrow.Arrow

-    def __init__(self, parent: ScribbleBook, url: str, connection: Connection):
+    def __init__(self, parent: ScribbleBook, url: str, session: cloudscraper.CloudScraper):
        self.parent = parent
        self.source_url = url

-        self.connection = connection
+        self.session = session
        self.add_asset = self.parent.add_asset

    def __str__(self):
@ -122,7 +165,7 @@ class ScribbleChapter:
        )
    
    def load(self):
-        resp = self.connection.get(self.source_url)
+        resp = get_request(self.session, self.source_url)
        soup = BeautifulSoup(resp.text, "lxml")

        if self.parent.disable_author_quotes:
@ -241,13 +284,7 @@ class ScribbleBook:
        self.tags = []

        self.chapters: List[ScribbleChapter] = []
-
-        self.connection = Connection(
-            session=cloudscraper.create_scraper(),
-            request_delay=3,
-            additional_delay_per_try=1,
-            max_retries=10,
-        )
+        self.session = cloudscraper.create_scraper()

        if file_name is not None:
            self.file_name = file_name
@ -258,7 +295,7 @@ class ScribbleBook:
        if url.strip() == "":
            return
        
-        a = Asset(url, self.connection)
+        a = Asset(url, self.session)
        if a.success:
            self.assets[a.url] = a
            return a
@ -288,7 +325,7 @@ class ScribbleBook:
        self.slug = _parts[-1]
        self.identifier = _parts[-2]

-        resp = self.connection.get(self.source_url)
+        resp = get_request(self.session, self.source_url)
        soup = BeautifulSoup(resp.text, "lxml")

        for tag in soup.find_all(lambda x: x.has_attr("lang")):
@ -338,19 +375,19 @@ class ScribbleBook:
            page_count = min(page_count, limit)

        for page in range(1, page_count + 1):
-            chapter_resp = self.connection.post(
+            chapter_resp = self.session.post(
                "https://www.scribblehub.com/wp-admin/admin-ajax.php",
                {
                    "action": "wi_getreleases_pagination",
                    "pagenum": page,
                    "mypostid": self.identifier,
                },
-                cache_identifier=f"pagenum{page}mypostid{self.identifier}",
+                headers=headers,
            )

            chapter_soup = BeautifulSoup(chapter_resp.text, "lxml")
            for chapter_tag in chapter_soup.find_all(class_="toc_w"):
-                chapter = ScribbleChapter(self, chapter_tag.a["href"], self.connection)
+                chapter = ScribbleChapter(self, chapter_tag.a["href"], self.session)
                chapter.index = int(chapter_tag["order"])
                chapter.title = chapter_tag.a.text
                chapter.date = arrow.get(