354 lines
13 KiB
Python
354 lines
13 KiB
Python
from __future__ import annotations
|
|
import logging
|
|
import shutil
|
|
from pathlib import Path
|
|
import os
|
|
from markdown2 import markdown
|
|
from typing import Optional, Union, Dict, Generator, List, DefaultDict, Any, TypedDict, Set
|
|
from bs4 import BeautifulSoup
|
|
from collections import defaultdict
|
|
import toml
|
|
from datetime import datetime
|
|
import jinja2
|
|
|
|
from . import config
|
|
|
|
def get_first_header_content(content, fallback: str = ""):
|
|
soup = BeautifulSoup(content, 'html.parser')
|
|
for level in range(1, 7):
|
|
header = soup.find(f'h{level}')
|
|
if header:
|
|
return header.get_text(strip=True)
|
|
|
|
return fallback
|
|
|
|
|
|
def shorten_text_and_clean(html_string, max_length=config.formatting.preview_length):
|
|
soup = BeautifulSoup(html_string, 'html.parser')
|
|
|
|
# Keep track of total characters added
|
|
total_chars = 0
|
|
finished = False
|
|
|
|
# Function to recursively trim and clean text
|
|
def process_element(element):
|
|
nonlocal total_chars, finished
|
|
|
|
for child in list(element.children):
|
|
if finished:
|
|
child.extract()
|
|
continue
|
|
|
|
if isinstance(child, str):
|
|
remaining = max_length - total_chars
|
|
if remaining <= 0:
|
|
child.extract()
|
|
finished = True
|
|
elif len(child) > remaining:
|
|
child.replace_with(child[:remaining] + '...')
|
|
total_chars = max_length
|
|
finished = True
|
|
else:
|
|
total_chars += len(child)
|
|
elif hasattr(child, 'children'):
|
|
process_element(child)
|
|
# Remove empty tags
|
|
if not child.text.strip():
|
|
child.decompose()
|
|
|
|
process_element(soup)
|
|
|
|
return str(soup)
|
|
|
|
|
|
def shift_headings(html_string, header_shift=config.formatting.preview_header_shift):
|
|
soup = BeautifulSoup(html_string, 'html.parser')
|
|
|
|
for level in range(6, 0, -1): # Start from h6 to h1 to avoid overwriting
|
|
old_tag = f'h{level}'
|
|
for tag in soup.find_all(old_tag):
|
|
new_level = min(level + header_shift, 6) # Cap at h6
|
|
new_tag = f'h{new_level}'
|
|
tag.name = new_tag
|
|
|
|
return str(soup)
|
|
|
|
|
|
def get_preview_text(html_string: str):
|
|
return shift_headings(shorten_text_and_clean(html_string))
|
|
|
|
|
|
def stem_to_language_code(stem: str) -> str:
|
|
language_code = stem.lower().replace("-", "_")
|
|
|
|
if language_code in config.languages:
|
|
return language_code
|
|
|
|
language_code = language_code.split("_")[0]
|
|
if language_code in config.languages:
|
|
return language_code
|
|
|
|
logger.error("Didn't recognize %s as a valid language code, add it to the config, or fix your structure.", stem)
|
|
exit(1)
|
|
|
|
|
|
class TemplateDict(dict):
|
|
def __init__(self, folder: Path):
|
|
self.folder = folder
|
|
super().__init__()
|
|
|
|
def __missing__(self, name: str) -> jinja2.Template:
|
|
f = self.folder / (name + ".html")
|
|
if not f.exists():
|
|
logger.error("no template with the name %s exists", name)
|
|
exit(1)
|
|
|
|
t = jinja2.Template(f.read_text())
|
|
self[name] = t
|
|
return t
|
|
|
|
|
|
TEMPLATE: Dict[str, jinja2.Template] = TemplateDict(Path(config.setup.source_directory, "templates"))
|
|
|
|
|
|
class LanguageDict(dict):
|
|
def __missing__(self, key: str):
|
|
if key not in config.languages:
|
|
raise KeyError(key)
|
|
|
|
lang_dict = config.languages[key]
|
|
lang_dict["priority"] = lang_dict.get("priority", 0)
|
|
|
|
elements = key.split("_")
|
|
if len(elements) > 1:
|
|
elements[-1] = elements[-1].upper()
|
|
lang_dict["code"] = "-".join(elements)
|
|
|
|
return lang_dict
|
|
|
|
|
|
LANGUAGES = LanguageDict()
|
|
|
|
|
|
def compile_cross_article_context(cross_article_context):
|
|
title = cross_article_context["title"]
|
|
url = cross_article_context["url"]
|
|
|
|
cross_article_context["link"] = f'<a href="{url}">{title}</a>'
|
|
|
|
|
|
class ArticleTranslation:
|
|
def __init__(self, file: Path, article: Article):
|
|
self.file = file
|
|
self.article = article
|
|
|
|
self.context: Dict[str, Any] = {}
|
|
|
|
# initializing the location of the article translation
|
|
self.language_code = stem_to_language_code(self.file.stem)
|
|
self.location_in_tree = [self.language_code, *self.article.location_in_tree]
|
|
self.url = "/" + "/".join(self.location_in_tree)
|
|
self.dist_path = Path(config.setup.dist_directory, *self.location_in_tree)
|
|
self.cross_article_context = TRANSLATED_CROSS_ARTICLE_CONTEXT[self.language_code][self.article.slug] = {}
|
|
|
|
|
|
self.priority = LANGUAGES[self.language_code]["priority"]
|
|
self.real_language_code = LANGUAGES[self.language_code]["code"]
|
|
|
|
self.html_content = self.file.read_text()
|
|
if self.file.suffix == ".md":
|
|
self.html_content = markdown(self.html_content, extras=["fenced-code-blocks"])
|
|
|
|
def __init_context__(self):
|
|
self.context["meta"] = self.article.context_shared
|
|
self.context["url"] = self.url
|
|
self.context["language"] = LANGUAGES[self.language_code]
|
|
self.context["article_url"] = self.article.url
|
|
self.context["title"] = get_first_header_content(self.html_content, fallback=LANGUAGES[self.language_code]["native_name"])
|
|
|
|
self.cross_article_context.update(self.article.context_shared)
|
|
self.cross_article_context["title"] = self.context["title"]
|
|
self.cross_article_context["article_url"] = self.article.url
|
|
self.cross_article_context["url"] = self.url
|
|
compile_cross_article_context(self.cross_article_context)
|
|
|
|
# get children
|
|
self.context["children"] = [
|
|
c.article_translations_map[self.language_code].context for c in self.article.child_articles
|
|
if self.language_code in c.article_translations_map
|
|
]
|
|
|
|
self.linked_context = self.context["linked"] = []
|
|
self.related_context = self.context["related"] = []
|
|
|
|
def __init_content_context__(self):
|
|
template = jinja2.Template(self.html_content)
|
|
template.environment.accessed_keys = []
|
|
template.environment.context_class = ContextDict
|
|
|
|
self.html_content = template.render({
|
|
**CROSS_ARTICLE_CONTEXT,
|
|
**TRANSLATED_CROSS_ARTICLE_CONTEXT[self.language_code],
|
|
})
|
|
|
|
template.environment.context_class = jinja2.runtime.Context
|
|
accessed_keys = template.environment.accessed_keys
|
|
for key in accessed_keys:
|
|
a = ARTICLE_LAKE[key]
|
|
if self.language_code in a.article_translations_map:
|
|
self.linked_context.append(a.article_translations_map[self.language_code].context)
|
|
|
|
self.related_context.extend(self.linked_context)
|
|
self.related_context.extend(self.context["children"])
|
|
|
|
self.context["content"] = self.html_content
|
|
self.context["preview"] = get_preview_text(html_string=self.html_content)
|
|
|
|
def build(self):
|
|
self.dist_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
with Path(self.dist_path, "index.html").open("w") as f:
|
|
f.write(TEMPLATE["article_translation"].render(self.context))
|
|
|
|
|
|
class Article:
|
|
def __init__(self, directory: Path, location_in_tree: Optional[List[str]] = None, is_root: bool = False, parent: Optional[Article] = None):
|
|
self.directory = directory
|
|
|
|
self.context: Dict[str, Any] = {}
|
|
self.context_shared: Dict[str, Any] = {}
|
|
if parent is not None:
|
|
self.context["parent"] = parent.context_shared
|
|
|
|
# initializing the config values of the article
|
|
config_file = self.directory / "index.toml"
|
|
self.config = toml.load(config_file) if config_file.exists() else {}
|
|
|
|
# initializing the location and slug of the article
|
|
self.slug = self.config.get("name", self.directory.name)
|
|
if self.slug in ARTICLE_LAKE:
|
|
logger.error("two articles have the same name at %s and %r", ARTICLE_LAKE[self.slug].directory, self.directory)
|
|
exit(1)
|
|
self.cross_article_context = CROSS_ARTICLE_CONTEXT[self.slug] = {}
|
|
ARTICLE_LAKE[self.slug] = self
|
|
|
|
self.location_in_tree: List[str] = location_in_tree or []
|
|
if not is_root:
|
|
self.location_in_tree.append(self.slug)
|
|
self.url = "/" + "/".join(self.location_in_tree)
|
|
self.dist_path = Path(config.setup.dist_directory, *self.location_in_tree)
|
|
|
|
# build the tree
|
|
self.child_articles: List[Article] = []
|
|
self.article_translations_list: List[ArticleTranslation] = []
|
|
self.article_translations_map: Dict[str, ArticleTranslation] = {}
|
|
|
|
for c in self.directory.iterdir():
|
|
if c.name == "index.toml":
|
|
continue
|
|
|
|
if c.is_file():
|
|
at = ArticleTranslation(c, self)
|
|
self.article_translations_list.append(at)
|
|
self.article_translations_map[at.language_code] = at
|
|
elif c.is_dir():
|
|
self.child_articles.append(Article(
|
|
directory=c,
|
|
location_in_tree=self.location_in_tree.copy(),
|
|
parent=self,
|
|
))
|
|
|
|
self.article_translations_list.sort(key=lambda a: a.priority, reverse=True)
|
|
|
|
logger.info("found %s at %s with the translations %s", self.slug, ".".join(list(self.location_in_tree)), ",".join(self.article_translations_map.keys()))
|
|
|
|
def __init_context__(self):
|
|
self.context_shared["url"] = self.url
|
|
self.context_shared["slug"] = self.slug
|
|
|
|
modified_at = datetime.fromisoformat(self.config["datetime"]) if "datetime" in self.config else datetime.fromtimestamp(self.directory.stat().st_mtime)
|
|
self.context_shared["date"] = modified_at.strftime(config.formatting.datetime_format)
|
|
self.context_shared["iso_date"] = modified_at.isoformat()
|
|
|
|
self.context.update(self.context_shared)
|
|
|
|
self.cross_article_context.update(self.context_shared)
|
|
self.cross_article_context["title"] = self.context_shared["slug"]
|
|
self.cross_article_context["article_url"] = self.context_shared["url"]
|
|
compile_cross_article_context(self.cross_article_context)
|
|
|
|
# recursive context structures
|
|
translation_list = self.context["translations"] = []
|
|
child_article_list = self.context["children"] = []
|
|
|
|
for article_translation in self.article_translations_list:
|
|
self.context[article_translation.real_language_code] = article_translation.context
|
|
translation_list.append(article_translation.context)
|
|
|
|
for child_article in self.child_articles:
|
|
child_article_list.append(child_article.context)
|
|
|
|
# recursively build context
|
|
for at in self.article_translations_list:
|
|
at.__init_context__()
|
|
for a in self.child_articles:
|
|
a.__init_context__()
|
|
|
|
def __init_content_context__(self):
|
|
for at in self.article_translations_list:
|
|
at.__init_content_context__()
|
|
for a in self.child_articles:
|
|
a.__init_content_context__()
|
|
|
|
def build(self):
|
|
self.dist_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
with Path(self.dist_path, "index.html").open("w") as f:
|
|
f.write(TEMPLATE["article"].render(self.context))
|
|
|
|
for at in self.article_translations_list:
|
|
at.build()
|
|
|
|
for ac in self.child_articles:
|
|
ac.build()
|
|
|
|
|
|
class ContextDict(jinja2.runtime.Context):
|
|
def resolve_or_missing(self, key: str) -> Any:
|
|
self.environment.accessed_keys.append(key)
|
|
return super().resolve_or_missing(key)
|
|
|
|
|
|
|
|
# GLOBALS
|
|
logger = logging.getLogger("stsg.build")
|
|
CROSS_ARTICLE_CONTEXT: Dict[str, Dict[str, Any]] = {}
|
|
TRANSLATED_CROSS_ARTICLE_CONTEXT: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
|
|
ARTICLE_LAKE: Dict[str, Article] = {}
|
|
ARTICLE_REFERENCE_VALUES: DefaultDict[str, Dict[str, str]] = defaultdict(dict)
|
|
|
|
|
|
def build():
|
|
logger.info("starting build process...")
|
|
|
|
logger.info("copying static folder...")
|
|
shutil.copytree(Path(config.setup.source_directory, "static"), Path(config.setup.dist_directory, "static"), dirs_exist_ok=True)
|
|
|
|
logger.info("building page tree...")
|
|
tree = Article(directory=Path(config.setup.source_directory, "articles"), is_root=True)
|
|
|
|
logger.info("compiling tree context...")
|
|
tree.__init_context__()
|
|
tree.__init_content_context__()
|
|
|
|
import json
|
|
with Path("context.json").open("w") as f:
|
|
json.dump(tree.context, f, indent=4)
|
|
with Path("cross_article_context.json").open("w") as f:
|
|
json.dump(CROSS_ARTICLE_CONTEXT, f, indent=4)
|
|
with Path("t_cross_article_context.json").open("w") as f:
|
|
json.dump(TRANSLATED_CROSS_ARTICLE_CONTEXT, f, indent=4)
|
|
|
|
logger.info("dumping page tree...")
|
|
tree.build()
|