From 292d71edc52009fff973784134799d05d7a7141e Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 16 Apr 2025 15:26:43 +0200 Subject: [PATCH] feat: added proper content --- .gitignore | 3 ++- stsg/build.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 961b463..ede09e9 100644 --- a/.gitignore +++ b/.gitignore @@ -173,4 +173,5 @@ cython_debug/ # PyPI configuration file .pypirc -dist \ No newline at end of file +dist +context.json \ No newline at end of file diff --git a/stsg/build.py b/stsg/build.py index fe9f615..feac937 100644 --- a/stsg/build.py +++ b/stsg/build.py @@ -30,6 +30,44 @@ def get_first_header_content(content, fallback: str = ""): return fallback +def shorten_text_and_clean(html_string, max_length=config.formatting.article_preview_length): + soup = BeautifulSoup(html_string, 'html.parser') + + # Keep track of total characters added + total_chars = 0 + finished = False + + # Function to recursively trim and clean text + def process_element(element): + nonlocal total_chars, finished + + for child in list(element.children): + if finished: + child.extract() + continue + + if isinstance(child, str): + remaining = max_length - total_chars + if remaining <= 0: + child.extract() + finished = True + elif len(child) > remaining: + child.replace_with(child[:remaining] + '...') + total_chars = max_length + finished = True + else: + total_chars += len(child) + elif hasattr(child, 'children'): + process_element(child) + # Remove empty tags + if not child.text.strip(): + child.decompose() + + process_element(soup) + + return str(soup) + + def stem_to_language_code(stem: str) -> str: language_code = stem.lower().replace("-", "_") @@ -98,8 +136,16 @@ class ArticleTranslation: def __init_context__(self): self.context["meta"] = self.article.context_meta + self.context["url"] = self.url self.context["language"] = LANGUAGES[self.language_code] + html_content = self.file.read_text() + if self.file.suffix == ".md": + html_content = markdown.markdown(html_content) + + self.context["content"] = html_content + self.context["preview"] = shorten_text_and_clean(html_string=html_content) + def build(self): self.dist_path.mkdir(parents=True, exist_ok=True) @@ -110,8 +156,6 @@ class ArticleTranslation: r = { "article_content": self.article_content, "article_preview": self.article_preview, - "article_url": self.url, - "article_slug": self.article.slug, "article_title": self.title, } @@ -269,11 +313,15 @@ def build(): logger.info("building page tree...") tree = Article(directory=Path(config.setup.source_directory, "articles"), is_root=True) + + print(ARTICLE_LAKE.keys()) + logger.info("compiling tree context...") tree.__init_context__() import json - print(json.dumps(tree.context, indent=4)) + with Path("context.json").open("w") as f: + json.dump(tree.context, f, indent=4) # build article reverence values for article_overview in ARTICLE_LAKE.values():