diff --git a/src/templates/article_card.html b/src/templates/article_card.html new file mode 100644 index 0000000..e8dd226 --- /dev/null +++ b/src/templates/article_card.html @@ -0,0 +1,12 @@ +
+ +
+

{article_title}

+

+ {article_preview} +
+ +

+
+
+
\ No newline at end of file diff --git a/src/templates/translation_card.html b/src/templates/translation_card.html new file mode 100644 index 0000000..3d896a9 --- /dev/null +++ b/src/templates/translation_card.html @@ -0,0 +1,12 @@ +
+ +
+

{article_language_flag} {article_title}

+

+ {article_preview} +
+ +

+
+
+
\ No newline at end of file diff --git a/stsg/build.py b/stsg/build.py new file mode 100644 index 0000000..cebd1f1 --- /dev/null +++ b/stsg/build.py @@ -0,0 +1,275 @@ +from __future__ import annotations +import logging +import shutil +from pathlib import Path +import os +import markdown +from typing import Optional, Union, Dict, Generator, List, DefaultDict, Any +from bs4 import BeautifulSoup +from collections import defaultdict +import toml +from datetime import datetime +import jinja2 + +from . import config + + +def replace_values(template: str, values: Dict[str, str]) -> str: + for key, value in values.items(): + template = template.replace("{" + key + "}", value) + + return template + + +def get_first_header_content(content, fallback: str = ""): + soup = BeautifulSoup(content, 'html.parser') + for level in range(1, 7): + header = soup.find(f'h{level}') + if header: + return header.get_text(strip=True) + + return fallback + + +def shorten_text_and_clean(html_string, max_length=config.formatting.article_preview_length): + soup = BeautifulSoup(html_string, 'html.parser') + + # Keep track of total characters added + total_chars = 0 + finished = False + + # Function to recursively trim and clean text + def process_element(element): + nonlocal total_chars, finished + + for child in list(element.children): + if finished: + child.extract() + continue + + if isinstance(child, str): + remaining = max_length - total_chars + if remaining <= 0: + child.extract() + finished = True + elif len(child) > remaining: + child.replace_with(child[:remaining] + '...') + total_chars = max_length + finished = True + else: + total_chars += len(child) + elif hasattr(child, 'children'): + process_element(child) + # Remove empty tags + if not child.text.strip(): + child.decompose() + + process_element(soup) + + return str(soup) + + +def stem_to_language_code(stem: str) -> str: + language_code = stem.lower().replace("-", "_") + + if language_code in config.languages: + return language_code + + language_code = language_code.split("_")[0] + if language_code in config.languages: + return language_code + + logger.error("Didn't recognize %s as a valid language code, add it to the config, or fix your structure.", stem) + exit(1) + + + +class TemplateDict(dict): + def __init__(self, folder: Path): + self.folder = folder + super().__init__() + + def __missing__(self, name: str) -> jinja2.Template: + f = self.folder / (name + ".html") + if not f.exists(): + logger.error("no template with the name %s exists", name) + exit(1) + + t = jinja2.Template(f.read_text()) + self[name] = t + return t + +TEMPLATE: Dict[str, jinja2.Template] = TemplateDict(Path(config.setup.source_directory, "templates")) + +class LanguageDict(dict): + def __missing__(self, key: str): + if key not in config.languages: + raise KeyError(key) + + lang_dict = config.languages[key] + lang_dict["priority"] = lang_dict.get("priority", 0) + + elements = key.split("_") + if len(elements) > 1: + elements[-1] = elements[-1].upper() + lang_dict["code"] = "-".join(elements) + + return lang_dict + +LANGUAGES = LanguageDict() + + +class ArticleTranslation: + def __init__(self, file: Path, article: Article): + self.file = file + self.article = article + + self.context: Dict[str, Any] = {} + + # initializing the location of the article translation + self.language_code = stem_to_language_code(self.file.stem) + self.location_in_tree = [self.language_code, *self.article.location_in_tree] + self.url = "/" + "/".join(self.location_in_tree) + self.dist_path = Path(config.setup.dist_directory, *self.location_in_tree) + + self.priority = LANGUAGES[self.language_code]["priority"] + self.real_language_code = LANGUAGES[self.language_code]["code"] + + # TODO remove + self.article_content = self.file.read_text() + self.article_preview = self.article_content[:config.formatting.article_preview_length] + "..." + if self.file.suffix == ".md": + self.article_content = markdown.markdown(self.article_content) + self.article_preview = markdown.markdown(self.article_preview) + + self.title = get_first_header_content(self.article_content, fallback="") + + def __init_context__(self): + self.context["meta"] = self.article.context_meta + self.context["url"] = self.url + self.context["language"] = LANGUAGES[self.language_code] + self.context["article_url"] = self.article.url + + html_content = self.file.read_text() + if self.file.suffix == ".md": + html_content = markdown.markdown(html_content) + + self.context["title"] = get_first_header_content(html_content, fallback=LANGUAGES[self.language_code]["native_name"]) + self.context["content"] = html_content + self.context["preview"] = shorten_text_and_clean(html_string=html_content) + + def build(self): + self.dist_path.mkdir(parents=True, exist_ok=True) + + with Path(self.dist_path, "index.html").open("w") as f: + f.write(TEMPLATE["article_translation"].render(self.context)) + + +class Article: + def __init__(self, directory: Path, location_in_tree: Optional[List[str]] = None, is_root: bool = False): + self.directory = directory + + self.context: Dict[str, Any] = {} + self.context_meta = self.context["meta"] = {} + + # initializing the config values of the article + config_file = self.directory / "index.toml" + self.config = toml.load(config_file) if config_file.exists() else {} + + # initializing the location and slug of the article + self.slug = self.config.get("name", self.directory.name) + if self.slug in ARTICLE_LAKE: + logger.error("two articles have the same name at %s and %r", ARTICLE_LAKE[self.slug].directory, self.directory) + exit(1) + ARTICLE_LAKE[self.slug] = self + + self.location_in_tree: List[str] = location_in_tree or [] + if not is_root: + self.location_in_tree.append(self.slug) + self.url = "/" + "/".join(self.location_in_tree) + self.dist_path = Path(config.setup.dist_directory, *self.location_in_tree) + + # build the tree + self.child_articles: List[Article] = [] + self.article_translations_list: List[ArticleTranslation] = [] + self.article_translations_map: Dict[str, ArticleTranslation] = {} + + for c in self.directory.iterdir(): + if c.name == "index.toml": + continue + + if c.is_file(): + at = ArticleTranslation(c, self) + self.article_translations_list.append(at) + self.article_translations_map[at.language_code] = at + elif c.is_dir(): + self.child_articles.append(Article( + directory=c, + location_in_tree=self.location_in_tree.copy(), + )) + + self.article_translations_list.sort(key=lambda a: a.priority, reverse=True) + + logger.info("found %s at %s with the translations %s", self.slug, ".".join(list(self.location_in_tree)), ",".join(self.article_translations_map.keys())) + + def __init_context__(self): + self.context["url"] = self.url + self.context_meta["slug"] = self.slug + + modified_at = datetime.fromisoformat(self.config["datetime"]) if "datetime" in self.config else datetime.fromtimestamp(self.directory.stat().st_mtime) + self.context_meta["date"] = modified_at.strftime(config.formatting.datetime_format) + self.context_meta["iso_date"] = modified_at.isoformat() + + # recursive context structures + translation_list = self.context["translations"] = [] + child_article_list = self.context["children"] = [] + + for article_translation in self.article_translations_list: + self.context[article_translation.real_language_code] = article_translation.context + translation_list.append(article_translation.context) + + for child_article in self.child_articles: + child_article_list.append(child_article.context) + + # recursively build context + for at in self.article_translations_list: + at.__init_context__() + for a in self.child_articles: + a.__init_context__() + + def build(self): + self.dist_path.mkdir(parents=True, exist_ok=True) + + with Path(self.dist_path, "index.html").open("w") as f: + f.write(TEMPLATE["article"].render(self.context)) + + for at in self.article_translations_list: + at.build() + + for ac in self.child_articles: + ac.build() + + +# GLOBALS +logger = logging.getLogger("stsg.build") +ARTICLE_LAKE: Dict[str, Article] = {} +ARTICLE_REFERENCE_VALUES: DefaultDict[str, Dict[str, str]] = defaultdict(dict) + +def build(): + logger.info("starting build process...") + + logger.info("copying static folder...") + shutil.copytree(Path(config.setup.source_directory, "static"), Path(config.setup.dist_directory, "static"), dirs_exist_ok=True) + + logger.info("building page tree...") + tree = Article(directory=Path(config.setup.source_directory, "articles"), is_root=True) + + logger.info("compiling tree context...") + tree.__init_context__() + + import json + with Path("context.json").open("w") as f: + json.dump(tree.context, f, indent=4) + + logger.info("dumping page tree...") + tree.build() \ No newline at end of file