From 146c3fc9143c6dfb8fc17b40705bd75014bc7a7b Mon Sep 17 00:00:00 2001 From: Hazel Noack Date: Tue, 10 Jun 2025 14:27:24 +0200 Subject: [PATCH] feat: started building book metadata --- scribble_to_epub/__main__.py | 1 + scribble_to_epub/scribblehub.py | 31 ++++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/scribble_to_epub/__main__.py b/scribble_to_epub/__main__.py index f6b8330..1188786 100644 --- a/scribble_to_epub/__main__.py +++ b/scribble_to_epub/__main__.py @@ -20,6 +20,7 @@ def cli(): scribble_book = ScribbleBook(args.url) scribble_book.load(limit_chapters=1) + scribble_book.build() if __name__ == "__main__": diff --git a/scribble_to_epub/scribblehub.py b/scribble_to_epub/scribblehub.py index 076ad56..dcc3305 100644 --- a/scribble_to_epub/scribblehub.py +++ b/scribble_to_epub/scribblehub.py @@ -15,6 +15,7 @@ from codecs import encode from hashlib import sha1 from pathlib import Path import requests +import uuid from . import __name__ @@ -209,13 +210,10 @@ class ScribbleBook: identifier: str # unique identifier (e.g. UUID, hosting site book ID, ISBN, etc.) genres: List[str] tags: List[str] + rights: str chapter_count: int - @cached_property - def rights(self) -> str: - return f"© {self.date.year} {self.author}" - def __str__(self): return ( f"BookMetadata(\n" @@ -313,6 +311,7 @@ class ScribbleBook: imgs = soup.find(class_="sb_content copyright").find_all("img") + self.rights = "" for img in imgs: if "copy" not in img["class"]: continue @@ -354,4 +353,26 @@ class ScribbleBook: self.chapters.sort(key=lambda x: x.index) def build(self): - pass + book = epub.EpubBook() + + # set up metadata + book.add_metadata("DC", "identifier", f"uuid:{uuid.uuid4()}", {"id": "BookId"}) + book.add_metadata( + "DC", "identifier", f"url:{self.source_url}", {"id": "Source"} + ) + book.add_metadata("DC", "subject", ",".join(self.tags), {"id": "tags"}) + book.add_metadata( + "DC", "subject", ",".join(self.genres), {"id": "genre"} + ) + book.set_title(self.title) + + book.add_metadata("DC", "date", self.date.isoformat()) + book.add_author(self.author) + book.add_metadata("DC", "publisher", self.publisher) + book.add_metadata( + "DC", + "rights", + f"Copyright © {self.date.year} {self.author} {self.rights}", + ) + book.add_metadata("DC", "description", self.description) +