from __future__ import annotations
import logging
import shutil
from pathlib import Path
import os
from markdown2 import markdown
from typing import Optional, Union, Dict, Generator, List, DefaultDict, Any, TypedDict, Set
from bs4 import BeautifulSoup
from collections import defaultdict
import toml
from datetime import datetime
import jinja2
from functools import cached_property


from . import config


def get_first_header_content(content, fallback: str = ""):
    soup = BeautifulSoup(content, 'html.parser')
    for level in range(1, 7):
        header = soup.find(f'h{level}')
        if header:
            return header.get_text(strip=True)

    return fallback


def shorten_text_and_clean(html_string, max_length=config.formatting.preview_length):
    soup = BeautifulSoup(html_string, 'html.parser')

    # Keep track of total characters added
    total_chars = 0
    finished = False

    # Function to recursively trim and clean text
    def process_element(element):
        nonlocal total_chars, finished

        for child in list(element.children):
            if finished:
                child.extract()
                continue

            if isinstance(child, str):
                remaining = max_length - total_chars
                if remaining <= 0:
                    child.extract()
                    finished = True
                elif len(child) > remaining:
                    child.replace_with(child[:remaining] + '...')
                    total_chars = max_length
                    finished = True
                else:
                    total_chars += len(child)
            elif hasattr(child, 'children'):
                process_element(child)
                # Remove empty tags
                if not child.text.strip():
                    child.decompose()

    process_element(soup)

    return str(soup)


def shift_headings(html_string, header_shift=config.formatting.preview_header_shift):
    soup = BeautifulSoup(html_string, 'html.parser')

    for level in range(6, 0, -1):  # Start from h6 to h1 to avoid overwriting
        old_tag = f'h{level}'
        for tag in soup.find_all(old_tag):
            new_level = min(level + header_shift, 6)  # Cap at h6
            new_tag = f'h{new_level}'
            tag.name = new_tag

    return str(soup)


def get_preview_text(html_string: str):
    return shift_headings(shorten_text_and_clean(html_string))
    

class TemplateDict(dict):
    def __init__(self, folder: Path):
        self.folder = folder
        super().__init__()

    def __missing__(self, name: str) -> jinja2.Template:
        f = self.folder / (name + ".html")
        if not f.exists():
            logger.error("no template with the name %s exists", name)
            exit(1)

        t = jinja2.Template(f.read_text())
        self[name] = t
        return t


TEMPLATE: Dict[str, jinja2.Template] = TemplateDict(Path(config.setup.source_directory, "templates"))


class LanguageDict(dict):
    def __missing__(self, key: str):
        if key not in config.languages:
            raise KeyError(key)

        lang_dict = config.languages[key]
        lang_dict["priority"] = lang_dict.get("priority", 0)

        elements = key.split("_")
        if len(elements) > 1:
            elements[-1] = elements[-1].upper()
        lang_dict["code"] = "-".join(elements)

        return lang_dict


LANGUAGES = LanguageDict()


def compile_cross_article_context(cross_article_context):
    title = cross_article_context["title"]
    url = cross_article_context["url"]

    cross_article_context["link"] = f'<a href="{url}">{title}</a>'


class ArticleTranslationContext(TypedDict):
    slug: str
    name: str
    datetime: str
    author: str
    url: str


class ArticleTranslation:
    article: Article
    file: Path

    @cached_property
    def html_content(self) -> str:
        html_content = self.file.read_text()
        if self.file.suffix == ".md":
            return markdown(html_content, extras=config.formatting.markdown_extras)
        return html_content

    @cached_property
    def language_code(self) -> str:
        language_code = self.file.stem.lower().replace("-", "_")

        if language_code in config.languages:
            return language_code

        language_code = language_code.split("_")[0]
        if language_code in config.languages:
            return language_code

        logger.error("Didn't recognize %s as a valid language code, add it to the config, or fix your structure.", stem)
        exit(1)

    @cached_property
    def priority(self) -> int:
        return LANGUAGES[self.language_code]["priority"]

    @cached_property
    def slug_path(self) -> List[str]:
        return [self.language_code, *self.article.slug_path]

    @cached_property
    def url(self) -> str:
        return "/" + "/".join(self.slug_path)
    
    @cached_property
    def dist_path(self) -> Path:
        return Path(config.setup.dist_directory, *self.slug_path)
    
    context: ArticleTranslationContext
    cross_article_context: Dict[str, Any]

    def __init__(self, file: Path, article: Article):
        self.article = article
        self.file = file

        self.context = {}
        self.cross_article_context = TRANSLATED_CROSS_ARTICLE_CONTEXT[self.language_code][self.article.slug] = {}

        
    def __init_context__(self):
        self.context["meta"] = self.article.context_shared
        self.context["url"] = self.url
        self.context["language"] = LANGUAGES[self.language_code]
        self.context["article_url"] = self.article.url
        self.context["title"] = get_first_header_content(self.html_content, fallback=LANGUAGES[self.language_code]["native_name"])

        self.cross_article_context.update(self.article.context_shared)
        self.cross_article_context["title"] = self.context["title"]
        self.cross_article_context["article_url"] = self.article.url
        self.cross_article_context["url"] = self.url
        compile_cross_article_context(self.cross_article_context)

        # get children
        self.context["children"] = [
            c.article_translations_map[self.language_code].context for c in self.article.child_articles
            if self.language_code in c.article_translations_map
        ]

        self.linked_context = self.context["linked"] = []
        self.related_context = self.context["related"] = []

    def __init_content_context__(self):
        template = jinja2.Template(self.html_content)
        template.environment.accessed_keys = []
        template.environment.context_class = ContextDict

        self.html_content = template.render({
            **CROSS_ARTICLE_CONTEXT,
            **TRANSLATED_CROSS_ARTICLE_CONTEXT[self.language_code],
        })

        template.environment.context_class = jinja2.runtime.Context
        accessed_keys = template.environment.accessed_keys
        for key in accessed_keys:
            a = ARTICLE_LAKE[key]
            if self.language_code in a.article_translations_map:
                self.linked_context.append(a.article_translations_map[self.language_code].context)

        self.related_context.extend(self.linked_context)
        self.related_context.extend(self.context["children"])

        self.context["content"] = self.html_content
        self.context["preview"] = get_preview_text(html_string=self.html_content)

    def build(self):
        self.dist_path.mkdir(parents=True, exist_ok=True)

        with Path(self.dist_path, "index.html").open("w") as f:
            f.write(TEMPLATE["article_translation"].render(self.context))


class ArticleConfig(TypedDict):
    slug: str
    name: str
    datetime: str
    author: str


class ArticleContext(TypedDict):
    slug: str
    name: str
    datetime: str
    author: str
    url: str

class Article:
    directory: Path

    @cached_property
    def config(self) -> ArticleConfig:
        config_file = self.directory / "index.toml"
        return toml.load(config_file) if config_file.exists() else {}
    
    @cached_property
    def slug(self) -> str:
        slug = self.config.get("name", self.directory.name)
        if slug in ARTICLE_LAKE:
            logger.error("two articles have the same name at %s and %r", ARTICLE_LAKE[slug].directory, self.directory)
            exit(1)
        return slug
    
    @cached_property
    def name(self) -> str:
        return self.config.get("name", self.slug)

    article_path: List[Article]

    @cached_property
    def slug_path(self) -> List[str]:
        return [a.slug for a in self.article_path[1:]]
    
    @cached_property
    def url(self) -> str:
        return "/" + "/".join(self.slug_path)
    
    @cached_property
    def dist_path(self) -> Path:
        return Path(config.setup.dist_directory, *self.slug_path)

    context: ArticleContext
    context_shared: Dict[str, Any]
    cross_article_context: Dict[str, Any]

    def __init__(self, directory: Path, article_path: Optional[List[str]] = None, is_root: bool = False, parent: Optional[Article] = None):
        self.directory = directory

        self.article_path: List[Article] = article_path or []
        self.article_path.append(self)

        self.context: ArticleContext = {}
        self.context_shared = {}
        self.cross_article_context = CROSS_ARTICLE_CONTEXT[self.slug] = {}
        
        ARTICLE_LAKE[self.slug] = self

        # build the tree
        self.child_articles: List[Article] = []
        self.article_translations_list: List[ArticleTranslation] = []
        self.article_translations_map: Dict[str, ArticleTranslation] = {}

        for c in self.directory.iterdir():
            if c.name == "index.toml":
                continue

            if c.is_file():
                at = ArticleTranslation(c, self)
                self.article_translations_list.append(at)
                self.article_translations_map[at.language_code] = at
            elif c.is_dir():
                self.child_articles.append(Article(
                    directory=c,
                    article_path=self.article_path.copy(),
                    parent=self,
                ))

        self.article_translations_list.sort(key=lambda a: a.priority, reverse=True)

        logger.info("found %s at %s with the translations %s", self.slug, ".".join(list(self.slug_path)), ",".join(self.article_translations_map.keys()))

    def __init_context__(self):
        self.context_shared["url"] = self.url
        self.context_shared["slug"] = self.slug

        modified_at = datetime.fromisoformat(self.config["datetime"]) if "datetime" in self.config else datetime.fromtimestamp(self.directory.stat().st_mtime)
        self.context_shared["date"] = modified_at.strftime(config.formatting.datetime_format)
        self.context_shared["iso_date"] = modified_at.isoformat()

        self.context.update(self.context_shared)

        self.cross_article_context.update(self.context_shared)
        self.cross_article_context["title"] = self.context_shared["slug"]
        self.cross_article_context["article_url"] = self.context_shared["url"]
        compile_cross_article_context(self.cross_article_context)

        # recursive context structures
        translation_list = self.context["translations"] = []
        child_article_list = self.context["children"] = []

        for article_translation in self.article_translations_list:
            self.context[article_translation.language_code] = article_translation.context
            translation_list.append(article_translation.context)

        for child_article in self.child_articles:
            child_article_list.append(child_article.context)

        # recursively build context
        for at in self.article_translations_list:
            at.__init_context__()
        for a in self.child_articles:
            a.__init_context__()

    def __init_content_context__(self):
        for at in self.article_translations_list:
            at.__init_content_context__()
        for a in self.child_articles:
            a.__init_content_context__()

    def build(self):
        self.dist_path.mkdir(parents=True, exist_ok=True)

        with Path(self.dist_path, "index.html").open("w") as f:
            f.write(TEMPLATE["article"].render(self.context))

        for at in self.article_translations_list:
            at.build()

        for ac in self.child_articles:
            ac.build()


class ContextDict(jinja2.runtime.Context):
    def resolve_or_missing(self, key: str) -> Any:
        self.environment.accessed_keys.append(key)
        return super().resolve_or_missing(key)


# GLOBALS
logger = logging.getLogger("stsg.build")
CROSS_ARTICLE_CONTEXT: Dict[str, Dict[str, Any]] = {}
TRANSLATED_CROSS_ARTICLE_CONTEXT: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
ARTICLE_LAKE: Dict[str, Article] = {}
ARTICLE_REFERENCE_VALUES: DefaultDict[str, Dict[str, str]] = defaultdict(dict)


def build():
    logger.info("starting build process...")

    logger.info("copying static folder...")
    shutil.copytree(Path(config.setup.source_directory, "static"), Path(config.setup.dist_directory, "static"), dirs_exist_ok=True)

    logger.info("building page tree...")
    tree = Article(directory=Path(config.setup.source_directory, "articles"), is_root=True)

    logger.info("compiling tree context...")
    tree.__init_context__()
    tree.__init_content_context__()

    import json
    with Path("context.json").open("w") as f:
        json.dump(tree.context, f, indent=4)
    with Path("cross_article_context.json").open("w") as f:
        json.dump(CROSS_ARTICLE_CONTEXT, f, indent=4)
    with Path("t_cross_article_context.json").open("w") as f:
        json.dump(TRANSLATED_CROSS_ARTICLE_CONTEXT, f, indent=4)

    logger.info("dumping page tree...")
    tree.build()