"""AutomaticEDA document model — format-independent blocks and chapters. This is the intermediate layer between *content* (what an EDA chapter wants to say) and *output format* (PDF for mobile reading, PPTX for sharing). A document is an ordered list of :class:`Chapter`. A chapter is ``{id, title, version, blocks}``. A block is one of a small, closed set of presentation primitives (heading, markdown, key/value table, data table, figure, image, caption, note). Neither renderer knows anything about the EDA profile: they only know how to lay out blocks so that **nothing is ever cut** — long text wraps to whole lines, long tables split by rows repeating the header, figures and images are scaled to fit entirely. Each chapter declares its own ``version`` so every page/slide can be stamped `` · v`` and tracked in a manifest for continuous, per-chapter improvement. Reading is defensive throughout (the ``eda`` group "dict-no-throw" style): the normalizers accept dataclass blocks *or* plain dicts, coerce anything unknown into a readable :class:`Note` instead of raising, and the renderers degrade a malformed block to text rather than crashing the whole document. """ from __future__ import annotations import json import os from dataclasses import dataclass, field from typing import Any, Callable, Optional # Global engine version. Bump when the document model or a renderer changes in a # way that affects output. Individual chapters carry their own CHAPTER_VERSION. ENGINE_VERSION = "1.0.0" ENGINE_NAME = "AutomaticEDA" # --------------------------------------------------------------------------- # # Block primitives. Each carries a stable ``kind`` string so renderers can # dispatch by kind (works for dataclass instances and for plain dicts alike). # --------------------------------------------------------------------------- # @dataclass class Heading: """A section heading. ``level`` 1 (largest) .. 3 (smallest).""" text: str = "" level: int = 1 kind: str = field(default="heading", init=False) @dataclass class Markdown: """A block of light markdown text. Supported subset (everything else is rendered verbatim, never dropped): ``#``/``##``/``###`` headings, ``-``/``*`` bullet lists, ``| a | b |`` tables (consecutive pipe lines become a data table), blank lines as paragraph breaks, and ``**bold**`` inline markers (markers are stripped, the text is kept). Text is wrapped to whole lines so it is never cut mid-line. """ text: str = "" kind: str = field(default="markdown", init=False) @dataclass class KVTable: """A two-column key/value table. ``rows`` is a list of ``(label, value)``.""" rows: list = field(default_factory=list) title: Optional[str] = None kind: str = field(default="kv_table", init=False) @dataclass class DataTable: """A tabular block with a header row. If it does not fit in the remaining page/slide space it is split by rows, **repeating the header** on each continuation. Long cell text wraps inside its column (the row grows taller) so no cell content is ever lost. """ header: list = field(default_factory=list) rows: list = field(default_factory=list) # list[list[Any]] title: Optional[str] = None note: Optional[str] = None kind: str = field(default="data_table", init=False) @dataclass class Figure: """A matplotlib figure, scaled to fit entirely (never cropped). Provide either an already-built ``fig`` (a ``matplotlib.figure.Figure``) or a zero-arg ``make`` callable that returns one (lazy: only built when the renderer needs it). ``height_in`` is an optional hint for the target height on the page; renderers clamp it to the available space preserving aspect. """ fig: Any = None make: Optional[Callable[[], Any]] = None caption: Optional[str] = None height_in: Optional[float] = None kind: str = field(default="figure", init=False) @dataclass class Image: """A raster image (PNG/JPG) by path, scaled to fit entirely.""" path: str = "" caption: Optional[str] = None height_in: Optional[float] = None kind: str = field(default="image", init=False) @dataclass class Caption: """Small auxiliary text rendered under a figure/table.""" text: str = "" kind: str = field(default="caption", init=False) @dataclass class Note: """Small auxiliary note (italic). Also the fallback for unknown content.""" text: str = "" kind: str = field(default="note", init=False) @dataclass class Chapter: """An ordered set of blocks with an id, a title and a generation version.""" id: str = "" title: str = "" version: str = "1.0.0" blocks: list = field(default_factory=list) # --------------------------------------------------------------------------- # # Defensive normalizers — accept dataclasses OR plain dicts, never raise. # --------------------------------------------------------------------------- # _BLOCK_BY_KIND = { "heading": Heading, "markdown": Markdown, "kv_table": KVTable, "data_table": DataTable, "figure": Figure, "image": Image, "caption": Caption, "note": Note, } def as_block(obj: Any): """Coerce a value into a block dataclass. Unknown values become a Note.""" if isinstance(obj, (Heading, Markdown, KVTable, DataTable, Figure, Image, Caption, Note)): return obj if isinstance(obj, dict): kind = obj.get("kind") cls = _BLOCK_BY_KIND.get(kind) if cls is None: return Note(text=_safe_str(obj)) # Build only with fields the dataclass accepts (ignore extras). try: if cls is Heading: return Heading(text=_safe_str(obj.get("text")), level=int(obj.get("level", 1) or 1)) if cls is Markdown: return Markdown(text=_safe_str(obj.get("text"))) if cls is KVTable: return KVTable(rows=list(obj.get("rows") or []), title=obj.get("title")) if cls is DataTable: return DataTable(header=list(obj.get("header") or []), rows=list(obj.get("rows") or []), title=obj.get("title"), note=obj.get("note")) if cls is Figure: return Figure(fig=obj.get("fig"), make=obj.get("make"), caption=obj.get("caption"), height_in=obj.get("height_in")) if cls is Image: return Image(path=_safe_str(obj.get("path")), caption=obj.get("caption"), height_in=obj.get("height_in")) if cls is Caption: return Caption(text=_safe_str(obj.get("text"))) if cls is Note: return Note(text=_safe_str(obj.get("text"))) except Exception: # noqa: BLE001 — never raise on a malformed block. return Note(text=_safe_str(obj)) return Note(text=_safe_str(obj)) def as_blocks(seq: Any) -> list: """Normalize an arbitrary sequence into a list of block dataclasses.""" if seq is None: return [] if not isinstance(seq, (list, tuple)): return [as_block(seq)] return [as_block(b) for b in seq] def as_chapter(obj: Any) -> Optional[Chapter]: """Coerce a value into a Chapter (or None). Accepts a dict or a Chapter.""" if obj is None: return None if isinstance(obj, Chapter): obj.blocks = as_blocks(obj.blocks) return obj if isinstance(obj, dict): return Chapter( id=_safe_str(obj.get("id")), title=_safe_str(obj.get("title")) or _safe_str(obj.get("id")), version=_safe_str(obj.get("version")) or "1.0.0", blocks=as_blocks(obj.get("blocks")), ) return None def as_chapters(seq: Any) -> list: """Normalize a sequence of chapters, dropping anything that can't coerce.""" if seq is None: return [] if isinstance(seq, Chapter): return [as_chapter(seq)] if not isinstance(seq, (list, tuple)): return [] out = [] for c in seq: ch = as_chapter(c) if ch is not None: out.append(ch) return out def _safe_str(v: Any) -> str: """str() that never raises and maps None to ''.""" if v is None: return "" try: return str(v) except Exception: # noqa: BLE001 return "" # --------------------------------------------------------------------------- # # Manifest — per-chapter versions and page/slide counts for tracking. # --------------------------------------------------------------------------- # def merge_manifest(manifest_path: str, renderer: str, chapters_meta: list, generated_at: str, engine_version: str = ENGINE_VERSION) -> dict: """Read-modify-write the AutomaticEDA manifest, merging one renderer's run. The manifest lives next to the outputs as ``automatic_eda_manifest.json`` and records, per chapter, its version plus the page count (PDF) and slide count (PPTX). Calling either renderer creates or updates it. Never raises: on any error returns the in-memory manifest without writing. Args: manifest_path: path to the JSON manifest to create or update. renderer: "pdf" or "pptx" — selects which count key is written. chapters_meta: list of ``{"id", "version", "n_pages"|"n_slides"}``. generated_at: ISO-ish timestamp string for this run. engine_version: AutomaticEDA engine version. Returns: The merged manifest dict (also written to disk on success). """ data: dict = {} try: if manifest_path and os.path.exists(manifest_path): with open(manifest_path, "r", encoding="utf-8") as fh: loaded = json.load(fh) if isinstance(loaded, dict): data = loaded except Exception: # noqa: BLE001 — a corrupt manifest is overwritten. data = {} data["engine"] = ENGINE_NAME data["engine_version"] = engine_version data["generated_at"] = generated_at chapters = data.get("chapters") if not isinstance(chapters, dict): chapters = {} count_key = "n_slides" if renderer == "pptx" else "n_pages" for cm in chapters_meta or []: if not isinstance(cm, dict): continue cid = cm.get("id") if not cid: continue entry = chapters.get(cid) if not isinstance(entry, dict): entry = {} entry["version"] = cm.get("version") or entry.get("version") or "1.0.0" entry[count_key] = cm.get(count_key, cm.get("n_pages", cm.get("n_slides"))) chapters[cid] = entry data["chapters"] = chapters try: parent = os.path.dirname(os.path.abspath(manifest_path)) os.makedirs(parent, exist_ok=True) with open(manifest_path, "w", encoding="utf-8") as fh: json.dump(data, fh, ensure_ascii=False, indent=2, default=str) except Exception: # noqa: BLE001 — never raise from the manifest writer. pass return data