merge(eda): portada v2 (sin Criterios, descripcion LLM, resumen a la derecha) + zebra global PDF + nombre PPTX grande/subrayado

2026-06-30 22:53:46 +02:00
parent ecc22d6d57 80d10010f5
commit f2eb782a5f
4 changed files with 77 additions and 26 deletions
@@ -26,7 +26,7 @@ from datetime import datetime, timezone

 from .. import model

-CHAPTER_VERSION = "1.2.0"
+CHAPTER_VERSION = "1.3.0"
 CHAPTER_ID = "portada"
 CHAPTER_TITLE = "Portada"

@@ -35,12 +35,9 @@ CHAPTER_TITLE = "Portada"
 # row represents) from it when the LLM layer ran (``run_llm``).
 _LLM_KEY = "llm"

-# Default human description of what the table quality score measures. Chapters
-# can override it via ctx["quality_criteria"].
-_DEFAULT_QUALITY_CRITERIA = (
-    "media de los scores por columna (0–100): completitud (sin nulos/vacíos), "
-    "validez (tipo y rango coherentes) y consistencia (sin duplicados/constantes)."
-)
+# Font size (pt) for the dataset name on the PPTX cover slide — notably larger
+# than the default H1 so the dataset name stands out (shown underlined too).
+_PPTX_TITLE_PT = 44.0


 def _storage_from_source(source: str) -> str:
@@ -120,7 +117,8 @@ def _summary_blocks(summary) -> list:

    blocks = [model.Heading(text="Resumen del análisis", level=2)]
    if rows:
-        blocks.append(model.KVTable(rows=rows))
+        # Values pinned to the right margin (numbers flush right, label left).
+        blocks.append(model.KVTable(rows=rows, value_align="right"))
    if titles:
        bullets = "\n".join(f"- {model._safe_str(t)}" for t in titles)
        blocks.append(model.Markdown(
@@ -213,9 +211,7 @@ def _derive_description(profile: dict, ctx: dict) -> str:
    score = profile.get("quality_score")
    if score is not None:
        parts.append(f"Calidad media estimada: {score}/100.")
-    parts.append(
-        "Resumen derivado del perfil; active la interpretación LLM (`run_llm`) "
-        "para una descripción de negocio más rica.")
+    parts.append("Resumen derivado del perfil.")
    return " ".join(parts)


@@ -259,7 +255,6 @@ def build_portada(profile: dict, ctx: dict):
    shape = f"{_fmt_int(n_rows)} filas × {_fmt_int(n_cols)} columnas"

    score = profile.get("quality_score")
-    quality_criteria = ctx.get("quality_criteria") or _DEFAULT_QUALITY_CRITERIA
    quality_value = "—" if score is None else f"{score} / 100"

    llm = _llm_block(profile, ctx)
@@ -282,8 +277,11 @@ def build_portada(profile: dict, ctx: dict):

    # Title + dataset size shown together and BIG (Heading) at the top, kept on
    # the same page (Group). The size is no longer buried in the metadata table.
+    # The dataset name is shown big and underlined on the PPTX cover slide
+    # (size_pt/underline are honoured by the PPTX renderer; the PDF ignores them).
    cover = [
-        model.Heading(text=str(dataset_name), level=1),
+        model.Heading(text=str(dataset_name), level=1, underline=True,
+                      size_pt=_PPTX_TITLE_PT),
        model.Markdown(text="**Automatic-EDA** · informe exploratorio automático"),
        model.Heading(text=shape, level=2),
    ]
@@ -295,7 +293,6 @@ def build_portada(profile: dict, ctx: dict):
            ("Almacenamiento", storage),
            ("Generado", when),
            ("Calidad", quality_value),
-            ("Criterios de calidad", quality_criteria),
        ]),
        model.Heading(text="Descripción", level=2),
        model.Markdown(text=str(description)),
@@ -38,10 +38,18 @@ ENGINE_NAME = "AutomaticEDA"
 # --------------------------------------------------------------------------- #
@dataclass
 class Heading:
-    """A section heading. ``level`` 1 (largest) .. 3 (smallest)."""
+    """A section heading. ``level`` 1 (largest) .. 3 (smallest).
+
+    ``underline`` and ``size_pt`` are optional emphasis hints honoured by the
+    PPTX renderer (the cover uses them to show the dataset name big and
+    underlined). ``size_pt`` overrides the per-level font size when set; the PDF
+    renderer ignores both so its layout is unchanged.
+    """

    text: str = ""
    level: int = 1
+    underline: bool = False
+    size_pt: Optional[float] = None
    kind: str = field(default="heading", init=False)


@@ -62,10 +70,17 @@ class Markdown:

@dataclass
 class KVTable:
-    """A two-column key/value table. ``rows`` is a list of ``(label, value)``."""
+    """A two-column key/value table. ``rows`` is a list of ``(label, value)``.
+
+    ``value_align`` controls the horizontal alignment of the value column in the
+    PDF renderer: ``"left"`` (default) keeps values next to the label column;
+    ``"right"`` pins them to the right margin (used by the cover's analysis
+    summary so the numbers line up flush right).
+    """

    rows: list = field(default_factory=list)
    title: Optional[str] = None
+    value_align: str = "left"
    kind: str = field(default="kv_table", init=False)


@@ -210,13 +225,20 @@ def as_block(obj: Any):
        # Build only with fields the dataclass accepts (ignore extras).
        try:
            if cls is Heading:
+                size_pt = obj.get("size_pt")
                return Heading(text=_safe_str(obj.get("text")),
-                               level=int(obj.get("level", 1) or 1))
+                               level=int(obj.get("level", 1) or 1),
+                               underline=bool(obj.get("underline", False)),
+                               size_pt=(float(size_pt)
+                                        if isinstance(size_pt, (int, float))
+                                        else None))
            if cls is Markdown:
                return Markdown(text=_safe_str(obj.get("text")))
            if cls is KVTable:
                return KVTable(rows=list(obj.get("rows") or []),
-                               title=obj.get("title"))
+                               title=obj.get("title"),
+                               value_align=_safe_str(
+                                   obj.get("value_align")) or "left")
            if cls is DataTable:
                return DataTable(header=list(obj.get("header") or []),
                                 rows=list(obj.get("rows") or []),
@@ -317,10 +317,18 @@ def _place_kv_table(st: _PdfState, block) -> None:
    if title:
        _place_heading(st, model.Heading(title, level=2))
    rows = getattr(block, "rows", []) or []
+    # ``value_align="right"`` pins the value column to the right margin (label
+    # left, number flush right) — used by the cover's analysis summary.
+    right = str(getattr(block, "value_align", "left")).lower() == "right"
    key_w = 1.9  # inches reserved for the label column.
+    # Right-aligned values wrap against the full usable width minus the label
+    # column; left-aligned values wrap against the value column only.
    val_chars = tl.chars_per_line(_USABLE_W - key_w - 0.1, _FS_BODY)
    lh = tl.line_height_in(_FS_BODY)
-    for row in rows:
+    # ``data_idx`` is the 0-based logical row index: even rows (1-based) are
+    # zebra-shaded → 0-based odd indices, matching the data-table convention so
+    # every table in the document carries the same striping.
+    for data_idx, row in enumerate(rows):
        try:
            label, value = row[0], row[1]
        except Exception:  # noqa: BLE001
@@ -329,11 +337,25 @@ def _place_kv_table(st: _PdfState, block) -> None:
        row_h = lh * len(v_lines) + _ROW_VPAD
        _ensure_space(st, row_h)
        y0 = st.y
+        # Faint zebra fill for even rows, drawn first (zorder 0) so striping
+        # never hides the text/value drawn on top.
+        if data_idx % 2 == 1:
+            st.fig.add_artist(Rectangle(
+                (_xf(_ML), _yf(y0 + row_h)), _xf(_ML + _USABLE_W) - _xf(_ML),
+                _yf(y0) - _yf(y0 + row_h), transform=st.fig.transFigure,
+                color=_ZEBRA, lw=0, zorder=0))
        st.fig.text(_xf(_ML), _yf(y0), tl.strip_inline_md(model._safe_str(label)),
-                    fontsize=_FS_BODY, color=_MUTED, ha="left", va="top")
+                    fontsize=_FS_BODY, color=_MUTED, ha="left", va="top",
+                    zorder=2)
        for k, vl in enumerate(v_lines):
-            st.fig.text(_xf(_ML + key_w), _yf(y0 + k * lh), vl,
-                        fontsize=_FS_BODY, color=_INK, ha="left", va="top")
+            if right:
+                st.fig.text(_xf(_ML + _USABLE_W), _yf(y0 + k * lh), vl,
+                            fontsize=_FS_BODY, color=_INK, ha="right",
+                            va="top", zorder=2)
+            else:
+                st.fig.text(_xf(_ML + key_w), _yf(y0 + k * lh), vl,
+                            fontsize=_FS_BODY, color=_INK, ha="left",
+                            va="top", zorder=2)
        st.y = y0 + row_h
    st.y += _GAP

@@ -135,7 +135,7 @@ def _ensure(st: _PptxState, height: float) -> None:


 def _add_text(st: _PptxState, lines: list, fs: float, color, bold=False,
-              italic=False, indent=0.0, bullet=False) -> None:
+              italic=False, indent=0.0, bullet=False, underline=False) -> None:
    lh = tl.line_height_in(fs)
    height = lh * len(lines) + 0.05
    _ensure(st, height)
@@ -153,6 +153,7 @@ def _add_text(st: _PptxState, lines: list, fs: float, color, bold=False,
        run.font.size = Pt(fs)
        run.font.bold = bold
        run.font.italic = italic
+        run.font.underline = underline
        run.font.color.rgb = _rgb(color)
    st.y += height

@@ -206,10 +207,16 @@ def _add_rich_text(st: _PptxState, rich_lines: list, fs: float, color,
 def _place_heading(st: _PptxState, block) -> None:
    level = max(1, min(3, int(getattr(block, "level", 1) or 1)))
    fs = {1: _FS_H1, 2: _FS_H2, 3: _FS_H3}[level]
+    # Optional per-heading emphasis (cover dataset name): a larger font and an
+    # underline. ``size_pt`` overrides the per-level size when set.
+    size_override = getattr(block, "size_pt", None)
+    if isinstance(size_override, (int, float)) and size_override > 0:
+        fs = float(size_override)
+    underline = bool(getattr(block, "underline", False))
    text = tl.strip_inline_md(getattr(block, "text", ""))
    st.last_heading = text or st.last_heading
    lines = tl.wrap(text, tl.chars_per_line(_USABLE_W, fs))
-    _add_text(st, lines, fs, _INK, bold=True)
+    _add_text(st, lines, fs, _INK, bold=True, underline=underline)
    st.y += 0.04


@@ -552,9 +559,11 @@ def _place_note(st: _PptxState, block) -> None:
 # WITHOUT drawing it so a Group can move whole to the next slide before drawing.
 # Over-estimating only triggers an earlier slide break, never a content cut.
 # --------------------------------------------------------------------------- #
-def _measure_heading_text(text: str, level: int) -> float:
+def _measure_heading_text(text: str, level: int, size_pt=None) -> float:
    level = max(1, min(3, int(level or 1)))
    fs = {1: _FS_H1, 2: _FS_H2, 3: _FS_H3}[level]
+    if isinstance(size_pt, (int, float)) and size_pt > 0:
+        fs = float(size_pt)
    lines = tl.wrap(tl.strip_inline_md(text), tl.chars_per_line(_USABLE_W, fs))
    return tl.line_height_in(fs) * len(lines) + 0.05 + 0.04

@@ -679,7 +688,8 @@ def _measure_block(st: _PptxState, block) -> float:
    try:
        if kind == "heading":
            return _measure_heading_text(getattr(block, "text", ""),
-                                         getattr(block, "level", 1))
+                                         getattr(block, "level", 1),
+                                         size_pt=getattr(block, "size_pt", None))
        if kind == "markdown":
            return _measure_markdown(block)
        if kind in ("figure", "image"):