bypass cloudflare

2025-06-10 12:13:02 +02:00 · 2025-06-10 12:13:02 +02:00 · 307f86aeee
commit 307f86aeee
parent c9efcce693
4 changed files with 74 additions and 2 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,10 @@
 [project]
 name = "scribble_to_epub"
-dependencies = []
+dependencies = [
    'beautifulsoup4',
    'requests',
    'EbookLib',
 ]
 authors = []
 description = "This scrapes books from https://www.scribblehub.com/ and  creates epub from them"
 readme = "README.md"
--- a/scribble_to_epub/main.py
+++ b/scribble_to_epub/main.py
@ -1,5 +1,7 @@
 import argparse
 from .scribblehub import ScribbleBook
 def cli():
    parser = argparse.ArgumentParser(
@ -15,7 +17,8 @@ def cli():
    args = parser.parse_args()
    print(f"Running scribble_to_epub for URL: {args.url}")
-    # You would call your main scraping and EPUB creation logic here
+
    ScribbleBook(args.url)
 if __name__ == "__main__":
--- a/scribble_to_epub/connection.py
+++ b/scribble_to_epub/connection.py
@ -0,0 +1,36 @@
 import cloudscraper
 import logging
 try:
    import http.client as http_client
 except ImportError:
    # Python 2
    import httplib as http_client
 http_client.HTTPConnection.debuglevel = 1
 # You must initialize logging, otherwise you'll not see debug output.
 logging.basicConfig()
 logging.getLogger().setLevel(logging.DEBUG)
 requests_log = logging.getLogger("requests.packages.urllib3")
 requests_log.setLevel(logging.DEBUG)
 requests_log.propagate = True
 def get_session() -> cloudscraper.CloudScraper:
    """
    session = requests.Session()
    session.headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "en-US,en;q=0.5",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0",
        "Referer": "https://www.scribblehub.com/"
    }
    return session
    """
    return cloudscraper.create_scraper()
--- a/scribble_to_epub/scribblehub.py
+++ b/scribble_to_epub/scribblehub.py
@ -0,0 +1,29 @@
 from functools import cached_property
 from .connection import get_session
 class ScribbleBook:
    def __init__(self, url: str):
        self.session = get_session()
        self.source_url = url
        _parts = [p for p in self.source_url.split("/") if len(p.strip())]
        self.slug = _parts[-1]
        self.identifier = _parts[-2]
        print(f"scraping {self.slug} ({self.identifier})")
        self.chapters = []
        self.languages = []
        self.genres = []
        self.tags = []
        self.load()
    def load(self) -> None:
        """
        Load the metadata for this object
        """
        html = self.session.get(self.source_url)
        print(html)