470 lines
15 KiB
Python
470 lines
15 KiB
Python
from __future__ import annotations
|
|
import logging
|
|
import shutil
|
|
from pathlib import Path
|
|
import os
|
|
from markdown2 import markdown
|
|
from typing import Optional, Union, Dict, Generator, List, DefaultDict, Any, TypedDict, Set
|
|
from bs4 import BeautifulSoup
|
|
from collections import defaultdict, UserList
|
|
import toml
|
|
from datetime import datetime
|
|
import jinja2
|
|
from functools import cached_property
|
|
|
|
|
|
from .definitions import *
|
|
from . import config
|
|
|
|
|
|
def shorten_text_and_clean(html_string, max_length=config.formatting.preview_length):
|
|
soup = BeautifulSoup(html_string, 'html.parser')
|
|
|
|
# Keep track of total characters added
|
|
total_chars = 0
|
|
finished = False
|
|
|
|
# Function to recursively trim and clean text
|
|
def process_element(element):
|
|
nonlocal total_chars, finished
|
|
|
|
for child in list(element.children):
|
|
if finished:
|
|
child.extract()
|
|
continue
|
|
|
|
if isinstance(child, str):
|
|
remaining = max_length - total_chars
|
|
if remaining <= 0:
|
|
child.extract()
|
|
finished = True
|
|
elif len(child) > remaining:
|
|
child.replace_with(child[:remaining] + '...')
|
|
total_chars = max_length
|
|
finished = True
|
|
else:
|
|
total_chars += len(child)
|
|
elif hasattr(child, 'children'):
|
|
process_element(child)
|
|
# Remove empty tags
|
|
if not child.text.strip():
|
|
child.decompose()
|
|
|
|
process_element(soup)
|
|
|
|
return str(soup)
|
|
|
|
|
|
def shift_headings(html_string, header_shift=config.formatting.preview_header_shift):
|
|
soup = BeautifulSoup(html_string, 'html.parser')
|
|
|
|
for level in range(6, 0, -1): # Start from h6 to h1 to avoid overwriting
|
|
old_tag = f'h{level}'
|
|
for tag in soup.find_all(old_tag):
|
|
new_level = min(level + header_shift, 6) # Cap at h6
|
|
new_tag = f'h{new_level}'
|
|
tag.name = new_tag
|
|
|
|
return str(soup)
|
|
|
|
|
|
def get_preview_text(html_string: str):
|
|
return shift_headings(shorten_text_and_clean(html_string))
|
|
|
|
|
|
|
|
class TemplateDict(dict):
|
|
def __init__(self, folder: Path):
|
|
self.folder = folder
|
|
super().__init__()
|
|
|
|
def __missing__(self, name: str) -> jinja2.Template:
|
|
f = self.folder / (name + ".html")
|
|
if not f.exists():
|
|
logger.error("no template with the name %s exists", name)
|
|
exit(1)
|
|
|
|
t = jinja2.Template(f.read_text())
|
|
self[name] = t
|
|
return t
|
|
|
|
|
|
TEMPLATE: Dict[str, jinja2.Template] = TemplateDict(Path(config.setup.source_directory, "templates"))
|
|
|
|
|
|
class LanguageDict(dict):
|
|
def __missing__(self, key: str):
|
|
if key not in config.languages:
|
|
raise KeyError(key)
|
|
|
|
lang_dict = config.languages[key]
|
|
lang_dict["priority"] = lang_dict.get("priority", 0)
|
|
|
|
elements = key.split("_")
|
|
if len(elements) > 1:
|
|
elements[-1] = elements[-1].upper()
|
|
lang_dict["code"] = "-".join(elements)
|
|
|
|
return lang_dict
|
|
|
|
|
|
LANGUAGES = LanguageDict()
|
|
|
|
|
|
def add_html_link(c):
|
|
name = c["name"]
|
|
url = c["url"]
|
|
|
|
c["link"] = f'<a href="{url}">{name}</a>'
|
|
|
|
|
|
def get_translated_articles(articles: List[Article], language_code: str = None) -> List[Union[ArticleTranslation, Article]]:
|
|
result = {}
|
|
|
|
for a in articles:
|
|
if a.slug in result:
|
|
continue
|
|
|
|
if language_code is None:
|
|
result[a.slug] = a
|
|
continue
|
|
|
|
if not config.fall_back_to_overview_in_translation and language_code not in a.article_translations_map:
|
|
continue
|
|
|
|
result[a.slug] = a.article_translations_map.get(language_code, a)
|
|
|
|
|
|
class ArticleList(UserList):
|
|
def __init__(self, iterable):
|
|
super().__init__(item for item in iterable)
|
|
|
|
self.used_slugs = set()
|
|
|
|
def append(self, a: Union[Article, str]):
|
|
if isinstance(a, str):
|
|
a = ARTICLE_LAKE[a]
|
|
|
|
if a.slug in self.used_slugs:
|
|
return
|
|
|
|
self.used_slugs.add(a.slug)
|
|
self.data.append(a)
|
|
|
|
def extend(self, other):
|
|
for a in other:
|
|
self.append(a)
|
|
|
|
def get_translated(self, language_code: str) -> ArticleList[Union[ArticleTranslation, Article]]:
|
|
res = ArticleList([])
|
|
|
|
for a in self:
|
|
if not config.fall_back_to_overview_in_translation and language_code not in a.article_translations_map:
|
|
continue
|
|
|
|
res.append(a.article_translations_map.get(language_code, a))
|
|
|
|
return res
|
|
|
|
@property
|
|
def context(self) -> List[Union[ArticleContext, ArticleTranslationContext]]:
|
|
return [a.context for a in self]
|
|
|
|
|
|
class ArticleTranslation:
|
|
article: Article
|
|
slug: str = property(fget=lambda self: self.article.slug)
|
|
file: Path
|
|
|
|
@cached_property
|
|
def html_content(self) -> str:
|
|
html_content = self.file.read_text()
|
|
if self.file.suffix == ".md":
|
|
return markdown(html_content, extras=config.formatting.markdown_extras)
|
|
return html_content
|
|
|
|
@cached_property
|
|
def language_code(self) -> str:
|
|
language_code = self.file.stem.lower().replace("-", "_")
|
|
|
|
if language_code in config.languages:
|
|
return language_code
|
|
|
|
language_code = language_code.split("_")[0]
|
|
if language_code in config.languages:
|
|
return language_code
|
|
|
|
logger.error("Didn't recognize %s as a valid language code, add it to the config, or fix your structure.", stem)
|
|
exit(1)
|
|
|
|
@cached_property
|
|
def priority(self) -> int:
|
|
return LANGUAGES[self.language_code]["priority"]
|
|
|
|
@cached_property
|
|
def slug_path(self) -> List[str]:
|
|
return [self.language_code, *self.article.slug_path]
|
|
|
|
@cached_property
|
|
def url(self) -> str:
|
|
return "/" + "/".join(self.slug_path)
|
|
|
|
@cached_property
|
|
def dist_path(self) -> Path:
|
|
return Path(config.setup.dist_directory, *self.slug_path)
|
|
|
|
context: ArticleTranslationContext
|
|
cross_article_context: Dict[str, Any]
|
|
|
|
def __init__(self, file: Path, article: Article):
|
|
self.article = article
|
|
self.file = file
|
|
|
|
self.context = TRANSLATED_CROSS_ARTICLE_CONTEXT[self.language_code][self.article.slug] = {}
|
|
|
|
@cached_property
|
|
def name(self) -> str:
|
|
soup = BeautifulSoup(self.html_content, 'html.parser')
|
|
for level in range(1, 7):
|
|
header = soup.find(f'h{level}')
|
|
if header:
|
|
return header.get_text(strip=True)
|
|
|
|
return self.article.name
|
|
|
|
def __init_context__(self):
|
|
self.context["slug"] = self.article.slug
|
|
self.context["name"] = self.name
|
|
self.context["url"] = self.url
|
|
add_html_link(self.context)
|
|
self.context["date"] = self.article.modified_at.strftime(config.formatting.datetime_format)
|
|
self.context["year"] = str(self.article.modified_at.year)
|
|
self.context["iso_date"] = self.article.modified_at.isoformat()
|
|
self.context["author"] = self.article.author
|
|
|
|
self.context["language"] = LANGUAGES[self.language_code]
|
|
self.context["article_url"] = self.article.url
|
|
|
|
# get children
|
|
self.context["children"] = self.article.child_articles.get_translated(self.language_code).context
|
|
self.context["breadcrumbs"] = ArticleList(self.article.article_path).get_translated(self.language_code).context
|
|
|
|
def __init_content_context__(self):
|
|
template = jinja2.Template(self.html_content)
|
|
template.environment.accessed_keys = []
|
|
template.environment.context_class = ContextDict
|
|
|
|
self.html_content = template.render({
|
|
**CROSS_ARTICLE_CONTEXT,
|
|
**TRANSLATED_CROSS_ARTICLE_CONTEXT[self.language_code],
|
|
})
|
|
|
|
template.environment.context_class = jinja2.runtime.Context
|
|
accessed_keys = template.environment.accessed_keys
|
|
|
|
for key in accessed_keys:
|
|
self.article.linked_articles.append(key)
|
|
|
|
self.context["content"] = self.html_content
|
|
self.context["preview"] = get_preview_text(html_string=self.html_content)
|
|
|
|
self.context["linked"] = self.article.linked_articles.get_translated(self.language_code).context
|
|
self.context["related"] = self.article.related_articles.get_translated(self.language_code).context
|
|
|
|
def build(self):
|
|
self.dist_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
with Path(self.dist_path, "index.html").open("w") as f:
|
|
f.write(TEMPLATE["article_translation"].render(self.context))
|
|
|
|
def build_wiki(self, language_code: str):
|
|
pass
|
|
|
|
|
|
class Article:
|
|
directory: Path
|
|
|
|
@cached_property
|
|
def config(self) -> ArticleConfig:
|
|
config_file = self.directory / "index.toml"
|
|
return toml.load(config_file) if config_file.exists() else {}
|
|
|
|
@cached_property
|
|
def slug(self) -> str:
|
|
slug = self.config.get("name", self.directory.name)
|
|
if slug in ARTICLE_LAKE:
|
|
logger.error("two articles have the same name at %s and %r", ARTICLE_LAKE[slug].directory, self.directory)
|
|
exit(1)
|
|
return slug
|
|
|
|
@cached_property
|
|
def name(self) -> str:
|
|
return self.config.get("name", self.slug)
|
|
|
|
article_path: List[Article]
|
|
|
|
@cached_property
|
|
def slug_path(self) -> List[str]:
|
|
return [a.slug for a in self.article_path[1:]]
|
|
|
|
@cached_property
|
|
def url(self) -> str:
|
|
return "/" + "/".join(self.slug_path)
|
|
|
|
@cached_property
|
|
def dist_path(self) -> Path:
|
|
return Path(config.setup.dist_directory, *self.slug_path)
|
|
|
|
context: ArticleContext
|
|
|
|
child_articles: ArticleList[Article]
|
|
article_translations_list: List[ArticleTranslation]
|
|
article_translations_map: Dict[str, ArticleTranslation]
|
|
|
|
linked_articles: ArticleList[Article]
|
|
|
|
@cached_property
|
|
def related_articles(self) -> ArticleList[Article]:
|
|
res = ArticleList(self.child_articles)
|
|
res.extend(self.linked_articles)
|
|
return res
|
|
|
|
def __init__(self, directory: Path, article_path: Optional[List[str]] = None, is_root: bool = False, parent: Optional[Article] = None):
|
|
self.directory = directory
|
|
|
|
self.article_path: List[Article] = article_path or []
|
|
self.article_path.append(self)
|
|
|
|
self.context = CROSS_ARTICLE_CONTEXT[self.slug] = {}
|
|
|
|
ARTICLE_LAKE[self.slug] = self
|
|
|
|
self.linked_articles = ArticleList([])
|
|
|
|
# build the tree
|
|
self.child_articles = ArticleList([])
|
|
self.article_translations_list = []
|
|
self.article_translations_map = {}
|
|
|
|
for c in self.directory.iterdir():
|
|
if c.name == "index.toml":
|
|
continue
|
|
|
|
if c.is_file():
|
|
at = ArticleTranslation(c, self)
|
|
self.article_translations_list.append(at)
|
|
self.article_translations_map[at.language_code] = at
|
|
elif c.is_dir():
|
|
self.child_articles.append(Article(
|
|
directory=c,
|
|
article_path=self.article_path.copy(),
|
|
parent=self,
|
|
))
|
|
|
|
self.article_translations_list.sort(key=lambda a: a.priority, reverse=True)
|
|
|
|
logger.info("found %s at %s with the translations %s", self.slug, ".".join(list(self.slug_path)), ",".join(self.article_translations_map.keys()))
|
|
|
|
@cached_property
|
|
def modified_at(self) -> datetime:
|
|
if "iso_date" in self.config:
|
|
return datetime.fromisoformat(self.config["iso_date"])
|
|
|
|
"""
|
|
TODO
|
|
scann every article file and use the youngest article file
|
|
"""
|
|
|
|
return datetime.fromtimestamp(self.directory.stat().st_mtime)
|
|
|
|
@cached_property
|
|
def author(self) -> str:
|
|
return self.config.get("author", config.default_author)
|
|
|
|
def __init_context__(self):
|
|
self.context["slug"] = self.slug
|
|
self.context["name"] = self.name
|
|
self.context["url"] = self.url
|
|
add_html_link(self.context)
|
|
self.context["date"] = self.modified_at.strftime(config.formatting.datetime_format)
|
|
self.context["year"] = str(self.modified_at.year)
|
|
self.context["iso_date"] = self.modified_at.isoformat()
|
|
self.context["author"] = self.author
|
|
|
|
# recursive context structures
|
|
self.context["translations"] = [c.context for c in self.article_translations_list]
|
|
self.context["children"] = self.child_articles.context
|
|
self.context["breadcrumbs"] = [b.context for b in self.article_path]
|
|
for lang, article in self.article_translations_map.items():
|
|
self.context[lang] = article.context
|
|
|
|
for at in self.article_translations_list:
|
|
at.__init_context__()
|
|
|
|
for a in self.child_articles:
|
|
a.__init_context__()
|
|
|
|
def __init_content_context__(self):
|
|
for at in self.article_translations_list:
|
|
at.__init_content_context__()
|
|
|
|
self.context["linked"] = self.linked_articles.context
|
|
self.context["related"] = self.related_articles.context
|
|
|
|
for a in self.child_articles:
|
|
a.__init_content_context__()
|
|
|
|
def build(self):
|
|
self.dist_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
with Path(self.dist_path, "index.html").open("w") as f:
|
|
f.write(TEMPLATE["article"].render(self.context))
|
|
|
|
for at in self.article_translations_list:
|
|
at.build()
|
|
|
|
for ac in self.child_articles:
|
|
ac.build()
|
|
|
|
|
|
class ContextDict(jinja2.runtime.Context):
|
|
def resolve_or_missing(self, key: str) -> Any:
|
|
self.environment.accessed_keys.append(key)
|
|
return super().resolve_or_missing(key)
|
|
|
|
|
|
|
|
# GLOBALS
|
|
logger = logging.getLogger("stsg.build")
|
|
ARTICLE_LAKE: Dict[str, Article] = {}
|
|
CROSS_ARTICLE_CONTEXT: Dict[str, Dict[str, Any]] = {}
|
|
TRANSLATED_CROSS_ARTICLE_CONTEXT: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
|
|
ARTICLE_REFERENCE_VALUES: DefaultDict[str, Dict[str, str]] = defaultdict(dict)
|
|
|
|
|
|
def build():
|
|
logger.info("starting build process...")
|
|
|
|
logger.info("copying static folder...")
|
|
shutil.copytree(Path(config.setup.source_directory, "static"), Path(config.setup.dist_directory, "static"), dirs_exist_ok=True)
|
|
|
|
logger.info("building page tree...")
|
|
tree = Article(directory=Path(config.setup.source_directory, "articles"), is_root=True)
|
|
|
|
logger.info("compiling tree context...")
|
|
tree.__init_context__()
|
|
tree.__init_content_context__()
|
|
|
|
"""
|
|
import json
|
|
with Path("context.json").open("w") as f:
|
|
json.dump(tree.context, f, indent=4)
|
|
with Path("cross_article_context.json").open("w") as f:
|
|
json.dump(CROSS_ARTICLE_CONTEXT, f, indent=4)
|
|
with Path("t_cross_article_context.json").open("w") as f:
|
|
json.dump(TRANSLATED_CROSS_ARTICLE_CONTEXT, f, indent=4)
|
|
"""
|
|
|
|
logger.info("dumping page tree...")
|
|
tree.build()
|