From 6994662bb4eb0949eac1eed29d941dcb7dca8b2a Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 17 Apr 2025 14:05:59 +0200 Subject: [PATCH] feat: shift the headers in the preview --- stsg.toml | 3 ++- stsg/__init__.py | 3 ++- stsg/build.py | 32 ++++++++++++++++++++++---------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/stsg.toml b/stsg.toml index 6ca37b5..6664dea 100644 --- a/stsg.toml +++ b/stsg.toml @@ -3,7 +3,8 @@ source_directory = "src" dist_directory = "dist" [formatting] -article_preview_length = 400 +preview_length = 400 +preview_header_shift = 2 datetime_format = "%d. %B %Y" default_language = "de" diff --git a/stsg/__init__.py b/stsg/__init__.py index 30db4d5..c6164a1 100644 --- a/stsg/__init__.py +++ b/stsg/__init__.py @@ -4,9 +4,10 @@ class config: dist_directory = "dist" class formatting: - article_preview_length = 200 datetime_format = "%d. %B %Y" fallback_language = "en" + preview_length = 400 + preview_header_shift = 2 languages = { "af": { diff --git a/stsg/build.py b/stsg/build.py index 939a48f..bf4caeb 100644 --- a/stsg/build.py +++ b/stsg/build.py @@ -14,13 +14,6 @@ import jinja2 from . import config -def replace_values(template: str, values: Dict[str, str]) -> str: - for key, value in values.items(): - template = template.replace("{" + key + "}", value) - - return template - - def get_first_header_content(content, fallback: str = ""): soup = BeautifulSoup(content, 'html.parser') for level in range(1, 7): @@ -31,7 +24,7 @@ def get_first_header_content(content, fallback: str = ""): return fallback -def shorten_text_and_clean(html_string, max_length=config.formatting.article_preview_length): +def shorten_text_and_clean(html_string, max_length=config.formatting.preview_length): soup = BeautifulSoup(html_string, 'html.parser') # Keep track of total characters added @@ -69,6 +62,23 @@ def shorten_text_and_clean(html_string, max_length=config.formatting.article_pre return str(soup) +def shift_headings(html_string, header_shift=config.formatting.preview_header_shift): + soup = BeautifulSoup(html_string, 'html.parser') + + for level in range(6, 0, -1): # Start from h6 to h1 to avoid overwriting + old_tag = f'h{level}' + for tag in soup.find_all(old_tag): + new_level = min(level + header_shift, 6) # Cap at h6 + new_tag = f'h{new_level}' + tag.name = new_tag + + return str(soup) + + +def get_preview_text(html_string: str): + return shift_headings(shorten_text_and_clean(html_string)) + + def stem_to_language_code(stem: str) -> str: language_code = stem.lower().replace("-", "_") @@ -83,7 +93,6 @@ def stem_to_language_code(stem: str) -> str: exit(1) - class TemplateDict(dict): def __init__(self, folder: Path): self.folder = folder @@ -99,8 +108,10 @@ class TemplateDict(dict): self[name] = t return t + TEMPLATE: Dict[str, jinja2.Template] = TemplateDict(Path(config.setup.source_directory, "templates")) + class LanguageDict(dict): def __missing__(self, key: str): if key not in config.languages: @@ -116,6 +127,7 @@ class LanguageDict(dict): return lang_dict + LANGUAGES = LanguageDict() @@ -147,7 +159,7 @@ class ArticleTranslation: self.context["title"] = get_first_header_content(html_content, fallback=LANGUAGES[self.language_code]["native_name"]) self.context["content"] = html_content - self.context["preview"] = shorten_text_and_clean(html_string=html_content) + self.context["preview"] = get_preview_text(html_string=html_content) def build(self): self.dist_path.mkdir(parents=True, exist_ok=True)