feat: fetching assets for chapter

This commit is contained in:
Hazel Noack 2025-06-10 14:18:42 +02:00
parent 465afa46a2
commit fdd9f3c17f
2 changed files with 16 additions and 14 deletions

View File

@ -18,7 +18,8 @@ def cli():
print(f"Running scribble_to_epub for URL: {args.url}") print(f"Running scribble_to_epub for URL: {args.url}")
ScribbleBook(args.url) scribble_book = ScribbleBook(args.url)
scribble_book.load(limit_chapters=1)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -241,16 +241,7 @@ class ScribbleBook:
self.tags = [] self.tags = []
self.chapters: List[ScribbleChapter] = [] self.chapters: List[ScribbleChapter] = []
# fetching metadata
self.session = cloudscraper.create_scraper() self.session = cloudscraper.create_scraper()
self.load_metadata()
print(str(self))
self.get_chapters()
c = self.chapters[0]
c.load()
print(c.text)
def add_asset(self, url: str): def add_asset(self, url: str):
if url is None: if url is None:
@ -264,6 +255,16 @@ class ScribbleBook:
else: else:
log.warning(f"couldn't fetch asset {url}") log.warning(f"couldn't fetch asset {url}")
def load(self, limit_chapters: Optional[int] = None):
self.load_metadata()
print(str(self))
self.fetch_chapters(limit=limit_chapters)
for chapter in self.chapters:
print(str(chapter))
chapter.load()
def load_metadata(self) -> None: def load_metadata(self) -> None:
""" """
Load the metadata for this object Load the metadata for this object
@ -315,7 +316,7 @@ class ScribbleBook:
continue continue
self.rights = ftfy.fix_text(img.next.string) self.rights = ftfy.fix_text(img.next.string)
def get_chapters(self) -> None: def fetch_chapters(self, limit: Optional[int] = None) -> None:
""" """
Fetch the chapters for the work, based on the TOC API Fetch the chapters for the work, based on the TOC API
""" """
@ -324,6 +325,9 @@ class ScribbleBook:
f"Expecting {self.chapter_count} chapters, page_count={page_count}" f"Expecting {self.chapter_count} chapters, page_count={page_count}"
) )
if limit is not None:
page_count = min(page_count, limit)
for page in range(1, page_count + 1): for page in range(1, page_count + 1):
chapter_resp = self.session.post( chapter_resp = self.session.post(
"https://www.scribblehub.com/wp-admin/admin-ajax.php", "https://www.scribblehub.com/wp-admin/admin-ajax.php",
@ -346,6 +350,3 @@ class ScribbleBook:
self.chapters.append(chapter) self.chapters.append(chapter)
self.chapters.sort(key=lambda x: x.index) self.chapters.sort(key=lambda x: x.index)
for c in self.chapters:
print(str(c))