feat(eda): portada — tamaño grande + descripción/granularidad reales

El capítulo PORTADA ahora muestra SIEMPRE el tamaño del dataset (N filas × M columnas) en grande, como heading junto al nombre y agrupado con él (Group keep-together), en lugar de enterrarlo en la tabla de metadatos. La Descripción y la Granularidad ya no salen vacías ni con placeholders: se resuelven por cascada — ctx explícito > bloque LLM (profile['llm'].summary / row_meaning de eda_llm_insights) > derivación del propio perfil (forma, mezcla de tipos y score de calidad para la descripción; columnas key_candidates o la forma de la tabla para una frase 'Cada fila es…'). Las derivaciones son honestas (declaran que vienen del perfil) y nunca inventan significado de negocio. Añade chapters/portada_test.py: golden (tamaño grande + textos del LLM, sin fila 'Tamaño' duplicada), fallbacks sin LLM (keys / forma), prioridad de ctx, edge de perfil vacío sin lanzar, y render a PDF + PPTX. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 18:04:05 +02:00
6 changed files with 323 additions and 254 deletions
@@ -20,7 +20,7 @@ from __future__ import annotations

 from .. import model

-CHAPTER_VERSION = "1.1.0"
+CHAPTER_VERSION = "1.0.0"
 CHAPTER_ID = "overview"
 CHAPTER_TITLE = "Overview"

@@ -90,14 +90,8 @@ def _head_block(profile: dict, ctx: dict):
        if not cols:
            cols = list(head[0].keys())
        rows = [[model._safe_str(r.get(c)) for c in cols] for r in head[:10]]
-        # Honest note: how many rows are shown and, when known, out of how many
-        # rows the dataset has (so "primeras 10 filas de 891" gives context).
-        note = f"primeras {len(rows)} filas"
-        n_rows = profile.get("n_rows")
-        if isinstance(n_rows, int) and not isinstance(n_rows, bool) \
-                and n_rows > len(rows):
-            note += f" de {n_rows:,}".replace(",", ".")
-        return model.DataTable(header=cols, rows=rows, note=note)
+        return model.DataTable(header=cols, rows=rows,
+                               note=f"primeras {len(rows)} filas")
    return model.Note(
        "df.head no disponible: el TableProfile no incluye 'head_rows'. La fase "
        "de cálculo debe añadir profile['head_rows'] (lista de dicts fila) o "
@@ -1,187 +0,0 @@
-"""Tests for the OVERVIEW chapter — DoD: golden + edges + degradation.
-
-Self-contained: builds synthetic TableProfiles (no DuckDB) so the suite is fast
-and deterministic. Verifies that ``build_overview`` renders the raw first rows
-(``df.head``) as a DataTable when ``head_rows`` is present — both when it arrives
-via ``profile['head_rows']`` (populated by ``profile_table``) and via
-``ctx['head_rows']`` (populated by ``build_eda_render_ctx``) — that the chapter
-also renders the column dictionary and the numeric describe, that the full
-document renders to PDF and PPTX showing the head values, and that a profile with
-NO head data degrades to an honest note instead of raising or inventing rows.
-"""
-
-import os
-import re
-import tempfile
-
-from pypdf import PdfReader
-from pptx import Presentation
-
-from datascience.automatic_eda.model import DataTable, Note
-from datascience.automatic_eda.chapters.overview import (
-    CHAPTER_ID, CHAPTER_VERSION, build_overview,
-)
-from datascience.render_automatic_eda_pdf import render_automatic_eda_pdf
-from datascience.render_automatic_eda_pptx import render_automatic_eda_pptx
-
-
-def _columns() -> list:
-    return [
-        {"name": "PassengerId", "inferred_type": "numeric", "null_pct": 0.0,
-         "null_count": 0, "numeric": {"mean": 2.0, "median": 2.0, "min": 1.0,
-                                      "max": 3.0, "std": 1.0}},
-        {"name": "Survived", "inferred_type": "numeric", "null_pct": 0.0,
-         "null_count": 0, "numeric": {"mean": 0.33, "median": 0.0, "min": 0.0,
-                                      "max": 1.0, "std": 0.58}},
-        {"name": "Pclass", "inferred_type": "numeric", "null_pct": 0.0,
-         "null_count": 0, "numeric": {"mean": 2.33, "median": 3.0, "min": 1.0,
-                                      "max": 3.0, "std": 1.15}},
-        {"name": "Name", "inferred_type": "categorical", "null_pct": 0.0,
-         "null_count": 0, "distinct_count": 3},
-        {"name": "Sex", "inferred_type": "categorical", "null_pct": 0.0,
-         "null_count": 0, "distinct_count": 2,
-         "categorical": {"top": [{"value": "male", "count": 2},
-                                 {"value": "female", "count": 1}]}},
-    ]
-
-
-def _head_rows() -> list:
-    return [
-        {"PassengerId": 1, "Survived": 0, "Pclass": 3,
-         "Name": "Braund Owen", "Sex": "male"},
-        {"PassengerId": 2, "Survived": 1, "Pclass": 1,
-         "Name": "Cumings Florence", "Sex": "female"},
-        {"PassengerId": 3, "Survived": 1, "Pclass": 3,
-         "Name": "Heikkinen Laina", "Sex": "female"},
-    ]
-
-
-def _profile(with_head: bool = True) -> dict:
-    prof = {
-        "table": "titanic",
-        "source": "/data/titanic.csv",
-        "profiled_at": "2026-06-30T10:00:00+00:00",
-        "n_rows": 891,
-        "n_cols": 5,
-        "quality_score": 88.0,
-        "columns": _columns(),
-    }
-    if with_head:
-        prof["head_rows"] = _head_rows()
-    return prof
-
-
-def _pdf_text(path: str) -> str:
-    txt = "".join((pg.extract_text() or "") for pg in PdfReader(path).pages)
-    return re.sub(r"\s+", " ", txt)
-
-
-def _pptx_text(path: str) -> str:
-    prs = Presentation(path)
-    parts = []
-    for sl in prs.slides:
-        for sh in sl.shapes:
-            if sh.has_text_frame:
-                parts.append(sh.text_frame.text)
-            if sh.has_table:
-                tb = sh.table
-                for r in range(len(tb.rows)):
-                    for c in range(len(tb.columns)):
-                        parts.append(tb.cell(r, c).text)
-    return re.sub(r"\s+", " ", " ".join(parts))
-
-
-def _flatten(blocks):
-    """Recursively flatten Group blocks into a flat list (none here today)."""
-    out = []
-    for b in blocks:
-        inner = getattr(b, "blocks", None)
-        if inner is not None and getattr(b, "kind", None) == "group":
-            out.extend(_flatten(inner))
-        else:
-            out.append(b)
-    return out
-
-
-def test_golden_build_overview_muestra_head_desde_profile():
-    ch = build_overview(_profile(), {})
-    assert ch is not None
-    assert ch.id == CHAPTER_ID
-    assert ch.version == CHAPTER_VERSION
-    blocks = _flatten(ch.blocks)
-    # The first DataTable is df.head: its header is the column names and the
-    # real first rows are present (not a placeholder note).
-    tables = [b for b in blocks if isinstance(b, DataTable)]
-    assert tables, "overview must emit at least the df.head DataTable"
-    head_tbl = tables[0]
-    assert head_tbl.header == ["PassengerId", "Survived", "Pclass",
-                               "Name", "Sex"]
-    assert len(head_tbl.rows) == 3
-    flat = [str(c) for row in head_tbl.rows for c in row]
-    assert "Braund Owen" in flat and "Cumings Florence" in flat
-    # Honest note carries how many rows shown out of the dataset total.
-    assert head_tbl.note is not None
-    assert "primeras 3 filas" in head_tbl.note and "891" in head_tbl.note
-    # No "df.head no disponible" placeholder when head_rows is present.
-    assert not any(isinstance(b, Note) and "no disponible" in b.text
-                   for b in blocks)
-
-
-def test_golden_head_desde_ctx_tambien_funciona():
-    # head_rows absent in profile but present in ctx (build_eda_render_ctx path).
-    prof = _profile(with_head=False)
-    ch = build_overview(prof, {"head_rows": _head_rows()})
-    assert ch is not None
-    tables = [b for b in _flatten(ch.blocks) if isinstance(b, DataTable)]
-    flat = [str(c) for row in tables[0].rows for c in row]
-    assert "Braund Owen" in flat
-
-
-def test_golden_render_pdf_muestra_head():
-    with tempfile.TemporaryDirectory() as d:
-        out = os.path.join(d, "eda.pdf")
-        res = render_automatic_eda_pdf(_profile(), out, {"title": "EDA"})
-        assert res["path"] == out and os.path.exists(out)
-        assert CHAPTER_ID in [c["id"] for c in res["chapters"]]
-        txt = _pdf_text(out)
-        assert "Braund" in txt and "male" in txt
-        assert "primeras" in txt          # head note rendered.
-        assert "df.head" in txt           # chapter heading rendered.
-        assert "no disponible" not in txt  # placeholder NOT shown.
-
-
-def test_golden_render_pptx_muestra_head():
-    with tempfile.TemporaryDirectory() as d:
-        out = os.path.join(d, "eda.pptx")
-        res = render_automatic_eda_pptx(_profile(), out, {"title": "EDA"})
-        assert res["path"] == out and os.path.exists(out)
-        assert CHAPTER_ID in [c["id"] for c in res["chapters"]]
-        txt = _pptx_text(out)
-        assert "Braund" in txt and "Cumings" in txt
-
-
-def test_edge_sin_head_rows_degrada_a_nota_honesta():
-    # No head data anywhere: chapter still builds (columns exist), shows the
-    # honest placeholder note, and never invents rows nor raises.
-    prof = _profile(with_head=False)
-    ch = build_overview(prof, {})
-    assert ch is not None
-    blocks = _flatten(ch.blocks)
-    assert any(isinstance(b, Note) and "no disponible" in b.text
-               for b in blocks)
-    # The first DataTable now is the column dictionary, not df.head rows.
-    tables = [b for b in blocks if isinstance(b, DataTable)]
-    assert all("Braund" not in str(c)
-               for tbl in tables for row in tbl.rows for c in row)
-
-
-def test_edge_none_y_vacio_no_rompen():
-    # Nothing to render at all -> None, no raise.
-    assert build_overview(None, None) is None
-    assert build_overview({}, {}) is None
-    assert build_overview({"columns": []}, {}) is None
-    # Only head_rows (no columns) still yields a chapter with the head table.
-    ch = build_overview({"columns": []}, {"head_rows": _head_rows()})
-    assert ch is not None
-    tables = [b for b in _flatten(ch.blocks) if isinstance(b, DataTable)]
-    assert tables and len(tables[0].rows) == 3
@@ -2,8 +2,17 @@

 Builds the document cover from a TableProfile plus an optional ``ctx`` of
 presentation metadata. Reads everything defensively (``.get``) and degrades
-honestly: a field that is neither in the profile nor in ``ctx`` is shown as a
-placeholder rather than invented, leaving a hook for the LLM layer to fill it.
+honestly.
+
+The dataset size (N rows x M columns) is always shown big, as a heading right
+under the dataset name (kept together in a ``Group``), not buried in the
+metadata table. The Description and Granularity are resolved through a cascade
+so they are never empty: an explicit ``ctx`` value wins; otherwise the LLM block
+(``profile['llm']`` from ``eda_llm_insights``) provides ``summary`` /
+``row_meaning``; otherwise a short summary is derived from the profile itself
+(shape, column-type mix, quality score) and a "Cada fila es…" sentence from the
+key-candidate columns or the table shape. Nothing is invented: the derived
+fallbacks state that they come from the profile.

 Contract for chapter authors (see ``docs/capabilities/automatic_eda.md``):
    build_<id>(profile: dict, ctx: dict) -> Chapter | None
@@ -17,10 +26,15 @@ from datetime import datetime, timezone

 from .. import model

-CHAPTER_VERSION = "1.1.0"
+CHAPTER_VERSION = "1.2.0"
 CHAPTER_ID = "portada"
 CHAPTER_TITLE = "Portada"

+# Key under which eda_llm_insights stores its interpretive block in the profile.
+# The cover reads ``summary`` (what the table is) and ``row_meaning`` (what one
+# row represents) from it when the LLM layer ran (``run_llm``).
+_LLM_KEY = "llm"
+
 # Default human description of what the table quality score measures. Chapters
 # can override it via ctx["quality_criteria"].
 _DEFAULT_QUALITY_CRITERIA = (
@@ -142,6 +156,88 @@ def _fmt_date_eu(value) -> str:
        return s


+def _llm_block(profile: dict, ctx: dict) -> dict:
+    """Return the interpretive LLM block (``eda_llm_insights`` output), or {}.
+
+    It is stored under ``profile['llm']`` by ``profile_table(run_llm=True)`` and
+    may also be forwarded in ``ctx['llm']``. Read defensively: anything that is
+    not a dict degrades to an empty dict so the cover never raises.
+    """
+    block = profile.get(_LLM_KEY)
+    if not isinstance(block, dict):
+        block = ctx.get(_LLM_KEY)
+    return block if isinstance(block, dict) else {}
+
+
+def _count_column_types(profile: dict, ctx: dict):
+    """Best-effort (n_numeric, n_categorical) for the dataset.
+
+    Prefers the aggregated ``ctx['document_summary']`` (computed by the engine
+    over the whole body); falls back to counting the profile columns directly so
+    the cover still has the numbers when no summary was passed.
+    """
+    summary = ctx.get("document_summary")
+    if isinstance(summary, dict):
+        n_num = summary.get("n_numeric")
+        n_cat = summary.get("n_categorical")
+        if n_num is not None or n_cat is not None:
+            return n_num, n_cat
+    cols = profile.get("columns") or []
+    n_num = sum(1 for c in cols if isinstance(c, dict)
+                and c.get("inferred_type") == "numeric")
+    n_cat = sum(1 for c in cols if isinstance(c, dict)
+                and isinstance(c.get("categorical"), dict)
+                and c.get("categorical", {}).get("top")
+                and c.get("inferred_type") != "numeric")
+    return n_num, n_cat
+
+
+def _derive_description(profile: dict, ctx: dict) -> str:
+    """A short, honest description of the dataset from the profile.
+
+    Used only when no explicit ``ctx['description']`` and no LLM ``summary`` are
+    available. Summarizes shape, column-type mix and quality score; never empty,
+    never invents business meaning (it states the description was derived)."""
+    n_rows = profile.get("n_rows")
+    n_cols = profile.get("n_cols")
+    n_num, n_cat = _count_column_types(profile, ctx)
+    head = f"Conjunto de datos con {_fmt_int(n_rows)} filas y {_fmt_int(n_cols)} columnas"
+    type_bits = []
+    if n_num:
+        type_bits.append(f"{_fmt_int(n_num)} numéricas")
+    if n_cat:
+        type_bits.append(f"{_fmt_int(n_cat)} categóricas")
+    if type_bits:
+        head += " (" + ", ".join(type_bits) + ")"
+    parts = [head + "."]
+    score = profile.get("quality_score")
+    if score is not None:
+        parts.append(f"Calidad media estimada: {score}/100.")
+    parts.append(
+        "Resumen derivado del perfil; active la interpretación LLM (`run_llm`) "
+        "para una descripción de negocio más rica.")
+    return " ".join(parts)
+
+
+def _derive_granularity(profile: dict, dataset_name: str) -> str:
+    """A ``Cada fila es…`` granularity sentence from the profile.
+
+    Prefers the key-candidate columns (a row is identified by them); when no key
+    is detected, falls back to the table shape so the line is always meaningful
+    and starts with ``Cada fila es`` as the user requested."""
+    keys = profile.get("key_candidates") or []
+    if keys:
+        shown = ", ".join(str(k) for k in keys[:3])
+        more = "" if len(keys) <= 3 else f" (y {len(keys) - 3} más)"
+        return (f"Cada fila es un registro identificado por {shown}{more}, "
+                "candidata(s) a clave por ser únicas y sin nulos.")
+    n_rows = profile.get("n_rows")
+    tail = f" El dataset tiene {_fmt_int(n_rows)} filas en total." if n_rows else ""
+    return (f"Cada fila es un registro de «{dataset_name}». No se detectó una "
+            "columna identificadora única, así que la granularidad se infiere "
+            "de la forma de la tabla." + tail)
+
+
 def build_portada(profile: dict, ctx: dict):
    """Build the cover Chapter, or None if there is truly nothing to show."""
    profile = profile or {}
@@ -166,30 +262,38 @@ def build_portada(profile: dict, ctx: dict):
    quality_criteria = ctx.get("quality_criteria") or _DEFAULT_QUALITY_CRITERIA
    quality_value = "—" if score is None else f"{score} / 100"

-    # Granularity: ctx wins; else derive from key candidates; else be honest.
+    llm = _llm_block(profile, ctx)
+
+    # Granularity: explicit ctx wins; then the LLM "row_meaning"; then the key
+    # candidates; finally a shape-based fallback. Always a real "Cada fila es…".
    granularity = ctx.get("granularity")
    if not granularity:
-        keys = profile.get("key_candidates") or []
-        if keys:
-            granularity = ("Cada fila parece identificada por "
-                           + ", ".join(str(k) for k in keys[:3]) + ".")
-        else:
-            granularity = ("Cada fila es… (granularidad no determinada — "
-                           "pendiente de la capa de cálculo/LLM).")
+        granularity = (llm.get("row_meaning") or "").strip() or None
+    if not granularity:
+        granularity = _derive_granularity(profile, str(dataset_name))

+    # Description: explicit ctx wins; then the LLM "summary"; finally a short
+    # profile-derived summary. Never the old empty placeholder.
    description = ctx.get("description")
    if not description:
-        description = ("Descripción no provista — pendiente de la capa LLM "
-                       "(`run_llm`) o de `ctx['description']`.")
+        description = (llm.get("summary") or "").strip() or None
+    if not description:
+        description = _derive_description(profile, ctx)

-    blocks = [
+    # Title + dataset size shown together and BIG (Heading) at the top, kept on
+    # the same page (Group). The size is no longer buried in the metadata table.
+    cover = [
        model.Heading(text=str(dataset_name), level=1),
        model.Markdown(text="**Automatic-EDA** · informe exploratorio automático"),
+        model.Heading(text=shape, level=2),
+    ]
+
+    blocks = [
+        model.Group(blocks=cover),
        model.KVTable(rows=[
            ("Fuente", source_origin),
            ("Almacenamiento", storage),
            ("Generado", when),
-            ("Tamaño", shape),
            ("Calidad", quality_value),
            ("Criterios de calidad", quality_criteria),
        ]),
@@ -0,0 +1,197 @@
+"""Tests for the PORTADA (cover) chapter — DoD: golden + edges + render.
+
+Self-contained: builds synthetic TableProfiles (no DuckDB) so the suite is fast
+and deterministic. Verifies the Fase 4b improvements:
+
+1. The dataset size (N rows x M columns) is always shown BIG — as a level-2
+   heading kept together with the dataset name in a ``Group`` — and is no longer
+   a row of the metadata table.
+2. Description and Granularity are resolved through a real cascade and are never
+   the old empty placeholders: an explicit ``ctx`` value wins; otherwise the LLM
+   block (``profile['llm']``) provides ``summary`` / ``row_meaning``; otherwise a
+   short summary is derived from the profile and a "Cada fila es…" sentence from
+   the key-candidate columns or the table shape.
+3. The chapter degrades without raising on empty/None input.
+4. It renders inside the full document to both PDF and PPTX showing that content.
+"""
+
+import os
+import re
+import tempfile
+
+from pypdf import PdfReader
+from pptx import Presentation
+
+from datascience.automatic_eda.model import Group, Heading, KVTable, Markdown
+from datascience.automatic_eda.chapters.portada import (
+    CHAPTER_ID, CHAPTER_VERSION, build_portada,
+)
+from datascience.render_automatic_eda_pdf import render_automatic_eda_pdf
+from datascience.render_automatic_eda_pptx import render_automatic_eda_pptx
+
+
+def _profile(with_llm: bool = True, with_keys: bool = True) -> dict:
+    prof = {
+        "table": "titanic",
+        "source": "/data/titanic.csv",
+        "profiled_at": "2026-06-30T10:00:00+00:00",
+        "n_rows": 891,
+        "n_cols": 12,
+        "quality_score": 78.0,
+        "columns": [
+            {"name": "PassengerId", "inferred_type": "numeric",
+             "null_pct": 0.0, "numeric": {"mean": 446.0, "min": 1.0,
+                                          "max": 891.0, "std": 257.0}},
+            {"name": "Survived", "inferred_type": "numeric",
+             "null_pct": 0.0, "numeric": {"mean": 0.38, "min": 0.0,
+                                          "max": 1.0, "std": 0.49}},
+            {"name": "Sex", "inferred_type": "categorical", "null_pct": 0.0,
+             "categorical": {"top": [{"value": "male", "count": 577, "pct": 0.65},
+                                     {"value": "female", "count": 314,
+                                      "pct": 0.35}],
+                             "mode": "male", "n_distinct": 2, "entropy": 0.93}},
+        ],
+    }
+    if with_keys:
+        prof["key_candidates"] = ["PassengerId"]
+    if with_llm:
+        prof["llm"] = {
+            "summary": "Pasajeros del Titanic con su supervivencia y datos de viaje.",
+            "row_meaning": "Cada fila es un pasajero del Titanic.",
+            "dictionary": [], "pii": [], "cleaning": [], "analyses": [],
+        }
+    return prof
+
+
+def _pdf_text(path: str) -> str:
+    txt = "".join((pg.extract_text() or "") for pg in PdfReader(path).pages)
+    return re.sub(r"\s+", " ", txt)
+
+
+def _pptx_text(path: str) -> str:
+    prs = Presentation(path)
+    parts = []
+    for sl in prs.slides:
+        for sh in sl.shapes:
+            if sh.has_text_frame:
+                parts.append(sh.text_frame.text)
+            if sh.has_table:
+                tb = sh.table
+                for r in range(len(tb.rows)):
+                    for c in range(len(tb.columns)):
+                        parts.append(tb.cell(r, c).text)
+    return re.sub(r"\s+", " ", " ".join(parts))
+
+
+def _markdown_after(blocks, heading_text):
+    """Return the Markdown block that follows a Heading whose text matches."""
+    for i, b in enumerate(blocks):
+        if isinstance(b, Heading) and heading_text.lower() in b.text.lower():
+            for nb in blocks[i + 1:]:
+                if isinstance(nb, Markdown):
+                    return nb
+    return None
+
+
+def test_golden_tamano_grande_y_textos_llm():
+    ch = build_portada(_profile(), {})
+    assert ch is not None
+    assert ch.id == CHAPTER_ID
+    assert ch.version == CHAPTER_VERSION
+
+    # 1) Title + size kept together in a Group; size is a BIG level-2 heading.
+    group = next(b for b in ch.blocks if isinstance(b, Group))
+    inner = group.blocks
+    assert isinstance(inner[0], Heading) and inner[0].level == 1
+    assert inner[0].text == "titanic"
+    size_h = next(b for b in inner if isinstance(b, Heading) and b.level == 2)
+    assert "891" in size_h.text and "12" in size_h.text
+    assert "filas" in size_h.text and "columnas" in size_h.text
+
+    # 2) Size is no longer a row of the metadata table.
+    kv = next(b for b in ch.blocks if isinstance(b, KVTable))
+    labels = [r[0] for r in kv.rows]
+    assert "Tamaño" not in labels
+    assert "Fuente" in labels and "Calidad" in labels
+
+    # 3) Description and Granularity come from the LLM block.
+    desc = _markdown_after(ch.blocks, "Descripción")
+    gran = _markdown_after(ch.blocks, "Granularidad")
+    assert desc is not None and "Titanic" in desc.text
+    assert gran is not None and gran.text.startswith("Cada fila es")
+    assert "pasajero" in gran.text.lower()
+
+
+def test_fallback_sin_llm_usa_keys_y_perfil():
+    # No LLM block: description derived from the profile, granularity from keys.
+    ch = build_portada(_profile(with_llm=False, with_keys=True), {})
+    desc = _markdown_after(ch.blocks, "Descripción")
+    gran = _markdown_after(ch.blocks, "Granularidad")
+    # Description is the derived summary, never the old "pendiente" placeholder.
+    assert "pendiente" not in desc.text.lower()
+    assert "891" in desc.text and "columnas" in desc.text
+    assert "numéricas" in desc.text or "categóricas" in desc.text
+    # Granularity mentions the key candidate and starts with "Cada fila es".
+    assert gran.text.startswith("Cada fila es")
+    assert "PassengerId" in gran.text
+    assert "…" not in gran.text  # the old ellipsis placeholder is gone.
+
+
+def test_fallback_sin_llm_sin_keys_usa_forma():
+    ch = build_portada(_profile(with_llm=False, with_keys=False), {})
+    gran = _markdown_after(ch.blocks, "Granularidad")
+    assert gran.text.startswith("Cada fila es")
+    assert "titanic" in gran.text.lower()
+    assert "pendiente" not in gran.text.lower()
+
+
+def test_ctx_explicito_gana_sobre_llm():
+    ctx = {"description": "Descripción manual.",
+           "granularity": "Cada fila es una unidad manual."}
+    ch = build_portada(_profile(), ctx)
+    desc = _markdown_after(ch.blocks, "Descripción")
+    gran = _markdown_after(ch.blocks, "Granularidad")
+    assert desc.text == "Descripción manual."
+    assert gran.text == "Cada fila es una unidad manual."
+
+
+def test_edge_perfil_vacio_no_lanza():
+    # Empty / None never raise; the cover still shows a size and real texts.
+    for prof, ctx in (({}, {}), (None, None)):
+        ch = build_portada(prof, ctx)
+        assert ch is not None
+        group = next(b for b in ch.blocks if isinstance(b, Group))
+        size_h = next(b for b in group.blocks
+                      if isinstance(b, Heading) and b.level == 2)
+        assert "filas" in size_h.text and "columnas" in size_h.text
+        desc = _markdown_after(ch.blocks, "Descripción")
+        gran = _markdown_after(ch.blocks, "Granularidad")
+        assert desc.text and "pendiente" not in desc.text.lower()
+        assert gran.text.startswith("Cada fila es")
+
+
+def test_golden_render_pdf_muestra_portada():
+    prof = _profile()
+    with tempfile.TemporaryDirectory() as d:
+        out = os.path.join(d, "eda.pdf")
+        res = render_automatic_eda_pdf(prof, out, {"title": "EDA"})
+        assert res["path"] == out and os.path.exists(out)
+        assert CHAPTER_ID in [c["id"] for c in res["chapters"]]
+        txt = _pdf_text(out)
+        assert "titanic" in txt.lower()
+        assert "891" in txt and "filas" in txt and "columnas" in txt
+        assert "Titanic" in txt          # LLM summary in the Description.
+        assert "Cada fila es" in txt     # granularity sentence.
+
+
+def test_golden_render_pptx_muestra_portada():
+    prof = _profile()
+    with tempfile.TemporaryDirectory() as d:
+        out = os.path.join(d, "eda.pptx")
+        res = render_automatic_eda_pptx(prof, out, {"title": "EDA"})
+        assert res["path"] == out and os.path.exists(out)
+        assert CHAPTER_ID in [c["id"] for c in res["chapters"]]
+        txt = _pptx_text(out)
+        assert "titanic" in txt.lower()
+        assert "891" in txt and "columnas" in txt
+        assert "Cada fila es" in txt
@@ -20,10 +20,6 @@ vacia y el resto del ctx se construye igual. Ante un fallo global devuelve al
 menos ``{**base_ctx, "db_path": db_path, "table": table}``.

 Claves de DATOS que produce (las consumen los capitulos):
-  - ``head_rows``      : [ {col: valor, ...}, ... ] primeras filas CRUDAS de la
-                         tabla (``SELECT * LIMIT head_n``), una entrada por fila.
-                         La lee el capitulo OVERVIEW para mostrar df.head real en
-                         lugar del placeholder "df.head no disponible".
  - ``raw_numeric``    : {col: [float|None, ...]} muestra cruda de las columnas
                         numericas, ALINEADA POR FILA (una entrada por fila aunque
                         sea None). La leen modelos (clustering 2D en vivo) y
@@ -60,7 +56,7 @@ def _to_float(value):
        return None


-def build_eda_render_ctx(db_path, table, profile, backend="duckdb", sample=5000, base_ctx=None, head_n=10):
+def build_eda_render_ctx(db_path, table, profile, backend="duckdb", sample=5000, base_ctx=None):
    """Construye el ctx de datos crudos para los renderers de AutomaticEDA.

    Args:
@@ -81,15 +77,13 @@ def build_eda_render_ctx(db_path, table, profile, backend="duckdb", sample=5000,
        base_ctx: dict opcional con claves de presentacion ya preparadas
            (dataset_name, source_origin, ...). Se parte de una copia y NO se
            pisan sus claves; solo se añaden las de datos. Default None -> {}.
-        head_n: numero de filas crudas a muestrear para ``ctx["head_rows"]``
-            (df.head del capitulo OVERVIEW). Default 10. <=0 omite la clave.

    Returns:
        El dict ``ctx`` directamente (NO un wrapper {status,...}): se pasa tal
        cual como ``meta={"ctx": <ese dict>}`` a render_automatic_eda_pdf/pptx.
-        Nunca lanza. Claves que puede contener: head_rows, raw_numeric,
-        timeseries_raw, geo_points (omitidas si no aplican o fallan), y siempre
-        db_path + table para backends validos.
+        Nunca lanza. Claves que puede contener: raw_numeric, timeseries_raw,
+        geo_points (omitidas si no aplican o fallan), y siempre db_path + table
+        para backends validos.
    """
    # Copia de base_ctx: nunca mutamos el dict del caller. Las claves de
    # presentacion que ya traiga se conservan; las de datos se añaden encima.
@@ -123,24 +117,6 @@ def build_eda_render_ctx(db_path, table, profile, backend="duckdb", sample=5000,
        ctx["db_path"] = db_path
        ctx["table"] = table

-        # 1.5) head_rows: primeras filas CRUDAS de la tabla (SELECT * LIMIT n)
-        # para que el capitulo OVERVIEW muestre df.head real en vez del
-        # placeholder. Una sola query, dict-no-throw: si falla, se omite la
-        # clave (el capitulo degrada a su nota honesta). No se pisa una clave
-        # head_rows que ya viniera en base_ctx (presentacion).
-        if head_n and int(head_n) > 0 and "head_rows" not in ctx:
-            try:
-                hq = query_fn(f'SELECT * FROM "{table}" LIMIT {int(head_n)}')
-                if isinstance(hq, dict) and hq.get("status") == "ok":
-                    hrows = [
-                        dict(r) for r in (hq.get("rows") or [])
-                        if isinstance(r, dict)
-                    ]
-                    if hrows:
-                        ctx["head_rows"] = hrows
-            except Exception:  # noqa: BLE001 - dict-no-throw: omitir la clave
-                pass
-
        # 2) Columnas del perfil agregado (lectura defensiva).
        cols = profile.get("columns") if isinstance(profile, dict) else None
        cols = cols or []
@@ -536,21 +536,6 @@ def profile_table(
                type_breakdown[it] += 1
        prof["type_breakdown"] = type_breakdown

-        # 8.1) Primeras filas crudas (df.head) para el capitulo OVERVIEW del motor
-        # AutomaticEDA: una muestra SELECT col1,col2,... LIMIT 10 alineada por fila.
-        # Se reusa _sample_rows (mismo lector read-only). Estilo dict-no-throw: si
-        # falla, head_rows queda None y el capitulo degrada a su nota honesta. El
-        # capitulo lo recoge via profile["head_rows"]; build_eda_render_ctx ademas
-        # lo replica en ctx["head_rows"] cuando se construye el contexto de render.
-        try:
-            head_names = [c.get("name") for c in cols if c.get("name")]
-            head_rows = _sample_rows(_q, table, head_names, 10)
-            prof["head_rows"] = [
-                dict(r) for r in head_rows if isinstance(r, dict)
-            ] or None
-        except Exception:  # noqa: BLE001
-            prof["head_rows"] = None
-
        # 8.5) Matriz de correlacion/asociacion sobre una muestra de filas
        # alineadas. Elige la metrica por par de tipos (Pearson/Spearman,
        # Cramer's V/Theil's U, correlation ratio, MI) via association_matrix.