"""AutomaticEDA document model — format-independent blocks and chapters. This is the intermediate layer between *content* (what an EDA chapter wants to say) and *output format* (PDF for mobile reading, PPTX for sharing). A document is an ordered list of :class:`Chapter`. A chapter is ``{id, title, version, blocks}``. A block is one of a small, closed set of presentation primitives (heading, markdown, key/value table, data table, figure, image, caption, note). Neither renderer knows anything about the EDA profile: they only know how to lay out blocks so that **nothing is ever cut** — long text wraps to whole lines, long tables split by rows repeating the header, figures and images are scaled to fit entirely. Each chapter declares its own ``version`` so every page/slide can be stamped `` · v`` and tracked in a manifest for continuous, per-chapter improvement. Reading is defensive throughout (the ``eda`` group "dict-no-throw" style): the normalizers accept dataclass blocks *or* plain dicts, coerce anything unknown into a readable :class:`Note` instead of raising, and the renderers degrade a malformed block to text rather than crashing the whole document. """ from __future__ import annotations import json import os from dataclasses import dataclass, field from typing import Any, Callable, Optional # Global engine version. Bump when the document model or a renderer changes in a # way that affects output. Individual chapters carry their own CHAPTER_VERSION. ENGINE_VERSION = "1.0.0" ENGINE_NAME = "AutomaticEDA" # --------------------------------------------------------------------------- # # Block primitives. Each carries a stable ``kind`` string so renderers can # dispatch by kind (works for dataclass instances and for plain dicts alike). # --------------------------------------------------------------------------- # @dataclass class Heading: """A section heading. ``level`` 1 (largest) .. 3 (smallest).""" text: str = "" level: int = 1 kind: str = field(default="heading", init=False) @dataclass class Markdown: """A block of light markdown text. Supported subset (everything else is rendered verbatim, never dropped): ``#``/``##``/``###`` headings, ``-``/``*`` bullet lists, ``| a | b |`` tables (consecutive pipe lines become a data table), blank lines as paragraph breaks, and ``**bold**`` inline markers (markers are stripped, the text is kept). Text is wrapped to whole lines so it is never cut mid-line. """ text: str = "" kind: str = field(default="markdown", init=False) @dataclass class KVTable: """A two-column key/value table. ``rows`` is a list of ``(label, value)``.""" rows: list = field(default_factory=list) title: Optional[str] = None kind: str = field(default="kv_table", init=False) @dataclass class DataTable: """A tabular block with a header row. If it does not fit in the remaining page/slide space it is split by rows, **repeating the header** on each continuation. Long cell text wraps inside its column (the row grows taller) so no cell content is ever lost. """ header: list = field(default_factory=list) rows: list = field(default_factory=list) # list[list[Any]] title: Optional[str] = None note: Optional[str] = None kind: str = field(default="data_table", init=False) @dataclass class Figure: """A matplotlib figure, scaled to fit entirely (never cropped). Provide either an already-built ``fig`` (a ``matplotlib.figure.Figure``) or a zero-arg ``make`` callable that returns one (lazy: only built when the renderer needs it). ``height_in`` is an optional hint for the target height on the page; renderers clamp it to the available space preserving aspect. """ fig: Any = None make: Optional[Callable[[], Any]] = None caption: Optional[str] = None height_in: Optional[float] = None kind: str = field(default="figure", init=False) @dataclass class Image: """A raster image (PNG/JPG) by path, scaled to fit entirely.""" path: str = "" caption: Optional[str] = None height_in: Optional[float] = None kind: str = field(default="image", init=False) @dataclass class Caption: """Small auxiliary text rendered under a figure/table.""" text: str = "" kind: str = field(default="caption", init=False) @dataclass class Note: """Small auxiliary note (italic). Also the fallback for unknown content.""" text: str = "" kind: str = field(default="note", init=False) @dataclass class Group: """A keep-together unit: its blocks render on the SAME page/slide. Renderers measure the whole group first; if it does not fit in the remaining space they move it *whole* to the next page (PDF) or slide (PPTX) before drawing anything — so a heading never gets stranded apart from the figure and text it introduces. If the group is taller than a full page even on its own, it starts on a fresh page and flows (honest degradation, never cut). Use it to bind ``Heading`` + ``Markdown`` + ``Figure`` of one idea together (see the DISTR NUM / AGREGACION chapters). When ``page_break_before`` is True the renderer additionally forces the group to *start* on a fresh page/slide (unless the current one is already empty), so a chapter can give each unit its own page — e.g. one categorical column per page (see CAT DISTR). It is purely additive: the default False keeps the plain keep-together behaviour for every existing chapter. """ blocks: list = field(default_factory=list) title: Optional[str] = None page_break_before: bool = False kind: str = field(default="group", init=False) @dataclass class GlossaryEntry: """One glossary term: a clickable destination at the end of the document. Rendered as the term ``label`` (heading) plus its ``definition`` (markdown). The renderers register its page/slide position as the link target so every in-text appearance of the same ``key`` becomes a real clickable jump (PDF link annotation via PyMuPDF; PPTX internal slide jump).""" key: str = "" label: str = "" definition: str = "" kind: str = field(default="glossary_entry", init=False) @dataclass class Chapter: """An ordered set of blocks with an id, a title and a generation version.""" id: str = "" title: str = "" version: str = "1.0.0" blocks: list = field(default_factory=list) # --------------------------------------------------------------------------- # # Defensive normalizers — accept dataclasses OR plain dicts, never raise. # --------------------------------------------------------------------------- # _BLOCK_BY_KIND = { "heading": Heading, "markdown": Markdown, "kv_table": KVTable, "data_table": DataTable, "figure": Figure, "image": Image, "caption": Caption, "note": Note, "group": Group, "glossary_entry": GlossaryEntry, } def as_block(obj: Any): """Coerce a value into a block dataclass. Unknown values become a Note.""" if isinstance(obj, (Heading, Markdown, KVTable, DataTable, Figure, Image, Caption, Note, Group, GlossaryEntry)): if isinstance(obj, Group): obj.blocks = as_blocks(obj.blocks) return obj if isinstance(obj, dict): kind = obj.get("kind") cls = _BLOCK_BY_KIND.get(kind) if cls is None: return Note(text=_safe_str(obj)) # Build only with fields the dataclass accepts (ignore extras). try: if cls is Heading: return Heading(text=_safe_str(obj.get("text")), level=int(obj.get("level", 1) or 1)) if cls is Markdown: return Markdown(text=_safe_str(obj.get("text"))) if cls is KVTable: return KVTable(rows=list(obj.get("rows") or []), title=obj.get("title")) if cls is DataTable: return DataTable(header=list(obj.get("header") or []), rows=list(obj.get("rows") or []), title=obj.get("title"), note=obj.get("note")) if cls is Figure: return Figure(fig=obj.get("fig"), make=obj.get("make"), caption=obj.get("caption"), height_in=obj.get("height_in")) if cls is Image: return Image(path=_safe_str(obj.get("path")), caption=obj.get("caption"), height_in=obj.get("height_in")) if cls is Caption: return Caption(text=_safe_str(obj.get("text"))) if cls is Note: return Note(text=_safe_str(obj.get("text"))) if cls is Group: return Group(blocks=as_blocks(obj.get("blocks")), title=obj.get("title"), page_break_before=bool( obj.get("page_break_before", False))) if cls is GlossaryEntry: return GlossaryEntry(key=_safe_str(obj.get("key")), label=_safe_str(obj.get("label")), definition=_safe_str(obj.get("definition"))) except Exception: # noqa: BLE001 — never raise on a malformed block. return Note(text=_safe_str(obj)) return Note(text=_safe_str(obj)) def as_blocks(seq: Any) -> list: """Normalize an arbitrary sequence into a list of block dataclasses.""" if seq is None: return [] if not isinstance(seq, (list, tuple)): return [as_block(seq)] return [as_block(b) for b in seq] def as_chapter(obj: Any) -> Optional[Chapter]: """Coerce a value into a Chapter (or None). Accepts a dict or a Chapter.""" if obj is None: return None if isinstance(obj, Chapter): obj.blocks = as_blocks(obj.blocks) return obj if isinstance(obj, dict): return Chapter( id=_safe_str(obj.get("id")), title=_safe_str(obj.get("title")) or _safe_str(obj.get("id")), version=_safe_str(obj.get("version")) or "1.0.0", blocks=as_blocks(obj.get("blocks")), ) return None def as_chapters(seq: Any) -> list: """Normalize a sequence of chapters, dropping anything that can't coerce.""" if seq is None: return [] if isinstance(seq, Chapter): return [as_chapter(seq)] if not isinstance(seq, (list, tuple)): return [] out = [] for c in seq: ch = as_chapter(c) if ch is not None: out.append(ch) return out def _safe_str(v: Any) -> str: """str() that never raises and maps None to ''.""" if v is None: return "" try: return str(v) except Exception: # noqa: BLE001 return "" # --------------------------------------------------------------------------- # # Glossary collector — chapters register the terms they use; the glosario # chapter renders them at the end and the renderers wire the clickable links. # --------------------------------------------------------------------------- # class GlossaryCollector: """Accumulates glossary terms registered by chapters during document build. A single instance is created by :func:`build_document` and passed to every chapter via ``ctx['glossary']``. A chapter calls ``add(key, label, definition)`` to declare a term it explains (e.g. ``"entropia"`` → "Entropía"), and marks each in-text appearance with the inline span ``[[term:key]]texto visible[[/term]]`` (see ``text_layout.parse_inline_rich``). The ``glosario`` chapter reads ``terms()`` to emit one :class:`GlossaryEntry` per term; the renderers turn every marked appearance into a real click that jumps to that entry. First registration of a key wins (idempotent); never raises.""" def __init__(self): self._terms: dict = {} self._order: list = [] def add(self, key: Any, label: Any = None, definition: Any = "") -> str: """Register a term and return its normalized key (''. if invalid).""" try: k = _safe_str(key).strip() if not k: return "" if k not in self._terms: self._terms[k] = { "key": k, "label": _safe_str(label).strip() or k, "definition": _safe_str(definition), } self._order.append(k) return k except Exception: # noqa: BLE001 — collecting a term never breaks a build. return "" def has(self, key: Any) -> bool: return _safe_str(key).strip() in self._terms def get(self, key: Any) -> Optional[dict]: return self._terms.get(_safe_str(key).strip()) def terms(self, by: str = "label") -> list: """Return the registered terms as dicts. ``by='label'`` (default) sorts alphabetically by visible label; ``by='order'`` keeps first-appearance order.""" if by == "order": return [self._terms[k] for k in self._order] return sorted(self._terms.values(), key=lambda t: _safe_str(t.get("label")).lower()) def __len__(self) -> int: return len(self._terms) def __bool__(self) -> bool: return bool(self._terms) # --------------------------------------------------------------------------- # # Manifest — per-chapter versions and page/slide counts for tracking. # --------------------------------------------------------------------------- # def merge_manifest(manifest_path: str, renderer: str, chapters_meta: list, generated_at: str, engine_version: str = ENGINE_VERSION) -> dict: """Read-modify-write the AutomaticEDA manifest, merging one renderer's run. The manifest lives next to the outputs as ``automatic_eda_manifest.json`` and records, per chapter, its version plus the page count (PDF) and slide count (PPTX). Calling either renderer creates or updates it. Never raises: on any error returns the in-memory manifest without writing. Args: manifest_path: path to the JSON manifest to create or update. renderer: "pdf" or "pptx" — selects which count key is written. chapters_meta: list of ``{"id", "version", "n_pages"|"n_slides"}``. generated_at: ISO-ish timestamp string for this run. engine_version: AutomaticEDA engine version. Returns: The merged manifest dict (also written to disk on success). """ data: dict = {} try: if manifest_path and os.path.exists(manifest_path): with open(manifest_path, "r", encoding="utf-8") as fh: loaded = json.load(fh) if isinstance(loaded, dict): data = loaded except Exception: # noqa: BLE001 — a corrupt manifest is overwritten. data = {} data["engine"] = ENGINE_NAME data["engine_version"] = engine_version data["generated_at"] = generated_at chapters = data.get("chapters") if not isinstance(chapters, dict): chapters = {} count_key = "n_slides" if renderer == "pptx" else "n_pages" for cm in chapters_meta or []: if not isinstance(cm, dict): continue cid = cm.get("id") if not cid: continue entry = chapters.get(cid) if not isinstance(entry, dict): entry = {} entry["version"] = cm.get("version") or entry.get("version") or "1.0.0" entry[count_key] = cm.get(count_key, cm.get("n_pages", cm.get("n_slides"))) chapters[cid] = entry data["chapters"] = chapters try: parent = os.path.dirname(os.path.abspath(manifest_path)) os.makedirs(parent, exist_ok=True) with open(manifest_path, "w", encoding="utf-8") as fh: json.dump(data, fh, ensure_ascii=False, indent=2, default=str) except Exception: # noqa: BLE001 — never raise from the manifest writer. pass return data