feat: added proper content

This commit is contained in:
Hazel 2025-04-16 15:26:43 +02:00
parent c9fb8fda93
commit 292d71edc5
2 changed files with 53 additions and 4 deletions

3
.gitignore vendored
View File

@ -173,4 +173,5 @@ cython_debug/
# PyPI configuration file # PyPI configuration file
.pypirc .pypirc
dist dist
context.json

View File

@ -30,6 +30,44 @@ def get_first_header_content(content, fallback: str = ""):
return fallback return fallback
def shorten_text_and_clean(html_string, max_length=config.formatting.article_preview_length):
soup = BeautifulSoup(html_string, 'html.parser')
# Keep track of total characters added
total_chars = 0
finished = False
# Function to recursively trim and clean text
def process_element(element):
nonlocal total_chars, finished
for child in list(element.children):
if finished:
child.extract()
continue
if isinstance(child, str):
remaining = max_length - total_chars
if remaining <= 0:
child.extract()
finished = True
elif len(child) > remaining:
child.replace_with(child[:remaining] + '...')
total_chars = max_length
finished = True
else:
total_chars += len(child)
elif hasattr(child, 'children'):
process_element(child)
# Remove empty tags
if not child.text.strip():
child.decompose()
process_element(soup)
return str(soup)
def stem_to_language_code(stem: str) -> str: def stem_to_language_code(stem: str) -> str:
language_code = stem.lower().replace("-", "_") language_code = stem.lower().replace("-", "_")
@ -98,8 +136,16 @@ class ArticleTranslation:
def __init_context__(self): def __init_context__(self):
self.context["meta"] = self.article.context_meta self.context["meta"] = self.article.context_meta
self.context["url"] = self.url
self.context["language"] = LANGUAGES[self.language_code] self.context["language"] = LANGUAGES[self.language_code]
html_content = self.file.read_text()
if self.file.suffix == ".md":
html_content = markdown.markdown(html_content)
self.context["content"] = html_content
self.context["preview"] = shorten_text_and_clean(html_string=html_content)
def build(self): def build(self):
self.dist_path.mkdir(parents=True, exist_ok=True) self.dist_path.mkdir(parents=True, exist_ok=True)
@ -110,8 +156,6 @@ class ArticleTranslation:
r = { r = {
"article_content": self.article_content, "article_content": self.article_content,
"article_preview": self.article_preview, "article_preview": self.article_preview,
"article_url": self.url,
"article_slug": self.article.slug,
"article_title": self.title, "article_title": self.title,
} }
@ -269,11 +313,15 @@ def build():
logger.info("building page tree...") logger.info("building page tree...")
tree = Article(directory=Path(config.setup.source_directory, "articles"), is_root=True) tree = Article(directory=Path(config.setup.source_directory, "articles"), is_root=True)
print(ARTICLE_LAKE.keys())
logger.info("compiling tree context...") logger.info("compiling tree context...")
tree.__init_context__() tree.__init_context__()
import json import json
print(json.dumps(tree.context, indent=4)) with Path("context.json").open("w") as f:
json.dump(tree.context, f, indent=4)
# build article reverence values # build article reverence values
for article_overview in ARTICLE_LAKE.values(): for article_overview in ARTICLE_LAKE.values():