feat(eda): series temporales + rigor anti-data-mining + PDF movil + /eda + benchmark issues

Bloque del grupo eda (sesion ausente EDA-benchmark): - 8 funciones nuevas: adf_kpss_stationarity, acf_pacf, stl_decompose, to_returns, fdr_correction, suggest_reexpression, exploratory_caveats, render_eda_pdf - integracion: profile_table (run_series, emit_pdf), association_matrix (FDR Benjamini-Hochberg), render_eda_markdown (secciones series/reexpresion/caveats) - slash commands /eda y /capitulos - issues 0173-0177: mejoras del /eda derivadas del benchmark sobre 12 datasets reales (outlier_pct x100, periodo estacional, FK inference, render models, tipos id-like) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-29 03:34:01 +02:00
parent 02301aaed3
commit 7ac69ab4fb
33 changed files with 3995 additions and 51 deletions
@@ -0,0 +1,626 @@
+"""render_eda_pdf — Portable, mobile-readable PDF report of a TableProfile (eda group).
+
+Impure function (writes a file): takes a TableProfile dict from the `eda`
+capability group and renders a MULTI-PAGE PDF designed to be read and explored
+on a phone screen. It is the 4th output of the eda workflow, next to the
+markdown report, the JSON sidecar and the executed Jupyter notebook.
+
+Design follows Edward Tufte, "The Visual Display of Quantitative Information":
+high data-ink ratio (no chartjunk, despined axes, light grids), small multiples
+for per-column histograms, and graphical integrity (y-axes start at 0, no
+misleading truncation). Pages are A5 portrait, single column, with a large,
+legible typeface so the report stays readable on a small display.
+
+Every key of the profile is read defensively with ``.get(...)`` and only the
+sections actually present are rendered. The function is forward-compatible: if
+the profile carries blocks this renderer does not know about (e.g. ``models``,
+time series, ``caveats`` added by sibling functions), they are dumped generically
+on a final page instead of being ignored or crashing the render.
+
+dict-no-throw contract of the eda group: it NEVER raises. Any failure of a single
+section is caught and noted; the function always returns a dict with the path,
+the page count and a human note.
+
+Engine: matplotlib ``PdfPages`` (already in ``python/.venv``) — zero new deps.
+"""
+
+import os
+import textwrap
+from datetime import datetime, timezone
+
+import matplotlib
+
+# Headless backend: this runs in agents/CI without a display.
+matplotlib.use("Agg")
+
+import matplotlib.pyplot as plt  # noqa: E402
+import numpy as np  # noqa: E402
+from matplotlib.backends.backend_pdf import PdfPages  # noqa: E402
+
+# A5 portrait in inches (148 x 210 mm). Single column, tall, phone-friendly.
+_A5_PORTRAIT = (5.83, 8.27)
+
+# Number of per-column small multiples stacked vertically on one page.
+_NUMERIC_PER_PAGE = 3
+_CATEGORICAL_PER_PAGE = 3
+
+# Top-of-profile keys this renderer handles explicitly. Anything else found at
+# the top level of the profile is dumped on the forward-compat "Otros" page so
+# new sections added by sibling functions still reach the reader.
+_KNOWN_TOP_KEYS = {
+    "table", "source", "profiled_at", "n_rows", "n_cols", "size_bytes",
+    "duplicate_rows", "duplicate_pct", "null_cell_pct", "constant_cols",
+    "all_null_cols", "quality_score", "type_breakdown", "key_candidates",
+    "columns", "correlations", "llm",
+}
+
+# Restrained, high-contrast palette: a single accent reads cleanly on a phone.
+_INK = "#1b1b1b"
+_ACCENT = "#2a6f97"
+_MUTED = "#8a8a8a"
+
+
+# --------------------------------------------------------------------------- #
+# Small formatting + Tufte helpers
+# --------------------------------------------------------------------------- #
+def _fmt_num(value, decimals: int = 3) -> str:
+    """Format a number compactly; fall back to str for non-numerics/None."""
+    if value is None:
+        return "—"
+    if isinstance(value, bool):
+        return str(value)
+    if isinstance(value, int):
+        return f"{value:,}"
+    if isinstance(value, float):
+        if value != value:  # NaN
+            return "NaN"
+        if value in (float("inf"), float("-inf")):
+            return str(value)
+        text = f"{value:.{decimals}f}".rstrip("0").rstrip(".")
+        return text if text else "0"
+    return str(value)
+
+
+def _fmt_pct(value, decimals: int = 1) -> str:
+    """Format a fraction (0-1) as 'NN.N%'. Returns '—' for None."""
+    if value is None:
+        return "—"
+    try:
+        num = float(value)
+    except (TypeError, ValueError):
+        return str(value)
+    return f"{num * 100:.{decimals}f}%"
+
+
+def _despine(ax) -> None:
+    """Strip top/right spines and soften the rest — raise the data-ink ratio."""
+    for side in ("top", "right"):
+        ax.spines[side].set_visible(False)
+    for side in ("left", "bottom"):
+        ax.spines[side].set_color(_MUTED)
+        ax.spines[side].set_linewidth(0.6)
+    ax.tick_params(colors=_MUTED, labelsize=7, length=2)
+    ax.title.set_color(_INK)
+
+
+def _truncate(text, width: int = 22) -> str:
+    """Clip an arbitrary value to a short label for tight phone layouts."""
+    s = str(text) if text is not None else "—"
+    return s if len(s) <= width else s[: width - 1] + "…"
+
+
+def _text_page(pdf, title: str, lines: list, subtitle: str = None) -> int:
+    """Render one text page (monospace body) and return 1 (pages written)."""
+    fig = plt.figure(figsize=_A5_PORTRAIT)
+    fig.text(0.08, 0.94, title, fontsize=16, fontweight="bold", color=_INK)
+    if subtitle:
+        fig.text(0.08, 0.905, subtitle, fontsize=9, color=_MUTED)
+    body = "\n".join(lines)
+    fig.text(
+        0.08, 0.88, body, fontsize=9.5, color=_INK, family="monospace",
+        va="top", ha="left", linespacing=1.5,
+    )
+    pdf.savefig(fig)
+    plt.close(fig)
+    return 1
+
+
+def _kv_lines(rows: list, key_width: int = 18) -> list:
+    """Format [label, value] rows as aligned 'label : value' monospace lines."""
+    out = []
+    for label, value in rows:
+        out.append(f"{str(label):<{key_width}}: {value}")
+    return out
+
+
+# --------------------------------------------------------------------------- #
+# Page builders (each fully defensive, each returns the number of pages it made)
+# --------------------------------------------------------------------------- #
+def _cover_page(pdf, profile: dict, title: str) -> int:
+    """Cover: table name, date, shape and an oversized quality score."""
+    fig = plt.figure(figsize=_A5_PORTRAIT)
+
+    table = profile.get("table") or "(tabla sin nombre)"
+    heading = title or f"EDA — {table}"
+    fig.text(0.08, 0.82, heading, fontsize=22, fontweight="bold", color=_INK,
+             wrap=True)
+
+    sub = []
+    src = profile.get("source")
+    if src:
+        sub.append(f"fuente: {_truncate(src, 40)}")
+    when = profile.get("profiled_at") or datetime.now(timezone.utc).strftime(
+        "%Y-%m-%d %H:%M UTC"
+    )
+    sub.append(f"generado: {when}")
+    fig.text(0.08, 0.76, "\n".join(sub), fontsize=10, color=_MUTED, va="top")
+
+    n_rows = profile.get("n_rows")
+    n_cols = profile.get("n_cols")
+    shape = (f"{_fmt_num(n_rows)} filas  ×  {_fmt_num(n_cols)} columnas")
+    fig.text(0.08, 0.60, shape, fontsize=15, color=_ACCENT, fontweight="bold")
+
+    score = profile.get("quality_score")
+    if score is not None:
+        fig.text(0.08, 0.42, "calidad", fontsize=12, color=_MUTED)
+        fig.text(0.08, 0.31, _fmt_num(score), fontsize=60, fontweight="bold",
+                 color=_INK)
+        fig.text(0.08, 0.25, "sobre 100", fontsize=12, color=_MUTED)
+
+    fig.text(0.08, 0.06, "Tufte · alta densidad de datos · lectura en móvil",
+             fontsize=8, color=_MUTED, style="italic")
+    pdf.savefig(fig)
+    plt.close(fig)
+    return 1
+
+
+def _overview_page(pdf, profile: dict) -> int:
+    """Overview key/value page: types, duplicates, nulls, constants, keys."""
+    rows = []
+    if profile.get("n_rows") is not None:
+        rows.append(["Filas", _fmt_num(profile.get("n_rows"))])
+    if profile.get("n_cols") is not None:
+        rows.append(["Columnas", _fmt_num(profile.get("n_cols"))])
+    if profile.get("size_bytes") is not None:
+        rows.append(["Tamaño (bytes)", _fmt_num(profile.get("size_bytes"))])
+    if profile.get("duplicate_rows") is not None:
+        dup = _fmt_num(profile.get("duplicate_rows"))
+        if profile.get("duplicate_pct") is not None:
+            dup += f" ({_fmt_pct(profile.get('duplicate_pct'))})"
+        rows.append(["Filas duplicadas", dup])
+    if profile.get("null_cell_pct") is not None:
+        rows.append(["Celdas nulas", _fmt_pct(profile.get("null_cell_pct"))])
+    if profile.get("quality_score") is not None:
+        rows.append(["Calidad", _fmt_num(profile.get("quality_score"))])
+
+    type_breakdown = profile.get("type_breakdown") or {}
+    tb = ", ".join(
+        f"{k}: {v}" for k, v in type_breakdown.items() if v
+    )
+    if tb:
+        rows.append(["Tipos", tb])
+
+    constant_cols = profile.get("constant_cols") or []
+    if constant_cols:
+        rows.append(["Columnas constantes", _truncate(", ".join(constant_cols), 40)])
+    all_null_cols = profile.get("all_null_cols") or []
+    if all_null_cols:
+        rows.append(["Columnas all-null", _truncate(", ".join(all_null_cols), 40)])
+    key_candidates = profile.get("key_candidates") or []
+    if key_candidates:
+        rows.append(["Candidatos a clave", _truncate(", ".join(key_candidates), 40)])
+
+    if not rows:
+        rows.append(["(sin métricas de overview)", ""])
+
+    return _text_page(pdf, "Overview", _kv_lines(rows, key_width=20))
+
+
+def _numeric_pages(pdf, columns: list) -> int:
+    """Small multiples: a real histogram per numeric column, several per page."""
+    numeric_cols = [
+        c for c in columns
+        if isinstance(c, dict) and c.get("numeric") and c["numeric"].get("histogram")
+    ]
+    if not numeric_cols:
+        return 0
+
+    pages = 0
+    for start in range(0, len(numeric_cols), _NUMERIC_PER_PAGE):
+        chunk = numeric_cols[start:start + _NUMERIC_PER_PAGE]
+        fig, axes = plt.subplots(
+            len(chunk), 1, figsize=_A5_PORTRAIT, squeeze=False,
+        )
+        fig.suptitle("Distribuciones numéricas", fontsize=14, fontweight="bold",
+                     color=_INK, x=0.08, ha="left", y=0.98)
+        for ax, col in zip(axes[:, 0], chunk):
+            _draw_histogram(ax, col)
+        # Hide unused axes if the chunk is short (keeps spacing even).
+        for ax in axes[len(chunk):, 0]:
+            ax.axis("off")
+        fig.tight_layout(rect=[0, 0, 1, 0.95])
+        pdf.savefig(fig)
+        plt.close(fig)
+        pages += 1
+    return pages
+
+
+def _draw_histogram(ax, col: dict) -> None:
+    """Draw one column's real histogram from its {lo, hi, count} bins."""
+    num = col.get("numeric") or {}
+    hist = num.get("histogram") or []
+    lefts, widths, counts = [], [], []
+    for b in hist:
+        if not isinstance(b, dict):
+            continue
+        lo = b.get("lo")
+        hi = b.get("hi")
+        cnt = b.get("count") or 0
+        if lo is None or hi is None:
+            continue
+        w = hi - lo
+        if w <= 0:
+            w = max(abs(lo) * 1e-6, 1e-6)
+        lefts.append(lo)
+        widths.append(w)
+        counts.append(cnt)
+
+    name = col.get("name") or "(col)"
+    if not counts:
+        ax.axis("off")
+        ax.text(0.5, 0.5, f"{name}: sin datos numéricos", ha="center",
+                va="center", fontsize=8, color=_MUTED, transform=ax.transAxes)
+        return
+
+    ax.bar(lefts, counts, width=widths, align="edge", color=_ACCENT,
+           edgecolor="white", linewidth=0.3)
+    # Graphical integrity: count axis starts at 0, never truncated.
+    ax.set_ylim(bottom=0)
+    _despine(ax)
+    ax.set_title(_truncate(name, 28), fontsize=10, loc="left", pad=4)
+    ax.grid(axis="y", color=_MUTED, alpha=0.15, linewidth=0.5)
+    ax.set_axisbelow(True)
+
+    # Median reference line (a single light marker, no chartjunk).
+    median = num.get("median")
+    if isinstance(median, (int, float)) and not isinstance(median, bool):
+        ax.axvline(median, color=_INK, linewidth=0.8, alpha=0.5)
+
+    # One compact annotation line: mean / std / outliers.
+    bits = []
+    if num.get("mean") is not None:
+        bits.append(f"μ={_fmt_num(num.get('mean'))}")
+    if num.get("std") is not None:
+        bits.append(f"σ={_fmt_num(num.get('std'))}")
+    if num.get("outlier_pct") is not None:
+        bits.append(f"outliers={_fmt_num(num.get('outlier_pct'), 1)}%")
+    if bits:
+        ax.text(0.99, 0.92, "  ".join(bits), transform=ax.transAxes,
+                ha="right", va="top", fontsize=7, color=_MUTED)
+
+
+def _categorical_pages(pdf, columns: list) -> int:
+    """Top-k horizontal bars per categorical column, several per page."""
+    cat_cols = [
+        c for c in columns
+        if isinstance(c, dict) and c.get("categorical")
+        and (c["categorical"].get("top"))
+    ]
+    if not cat_cols:
+        return 0
+
+    pages = 0
+    for start in range(0, len(cat_cols), _CATEGORICAL_PER_PAGE):
+        chunk = cat_cols[start:start + _CATEGORICAL_PER_PAGE]
+        fig, axes = plt.subplots(
+            len(chunk), 1, figsize=_A5_PORTRAIT, squeeze=False,
+        )
+        fig.suptitle("Categóricas (top-k)", fontsize=14, fontweight="bold",
+                     color=_INK, x=0.08, ha="left", y=0.98)
+        for ax, col in zip(axes[:, 0], chunk):
+            _draw_topk_bars(ax, col)
+        for ax in axes[len(chunk):, 0]:
+            ax.axis("off")
+        fig.tight_layout(rect=[0, 0, 1, 0.95])
+        pdf.savefig(fig)
+        plt.close(fig)
+        pages += 1
+    return pages
+
+
+def _draw_topk_bars(ax, col: dict) -> None:
+    """Draw top-k counts for one categorical column as horizontal bars."""
+    cat = col.get("categorical") or {}
+    top = cat.get("top") or []
+    labels, values = [], []
+    for item in top[:10]:
+        if not isinstance(item, dict):
+            continue
+        labels.append(_truncate(item.get("value"), 20))
+        values.append(item.get("count") or 0)
+
+    name = col.get("name") or "(col)"
+    if not values:
+        ax.axis("off")
+        ax.text(0.5, 0.5, f"{name}: sin categorías", ha="center", va="center",
+                fontsize=8, color=_MUTED, transform=ax.transAxes)
+        return
+
+    # Largest on top: reverse so barh reads naturally top-to-bottom.
+    labels = labels[::-1]
+    values = values[::-1]
+    y = np.arange(len(values))
+    ax.barh(y, values, color=_ACCENT, edgecolor="white", linewidth=0.3)
+    ax.set_yticks(y)
+    ax.set_yticklabels(labels, fontsize=7)
+    ax.set_xlim(left=0)  # bars start at 0 — honest length encoding.
+    _despine(ax)
+    ax.set_title(_truncate(name, 28), fontsize=10, loc="left", pad=4)
+    ax.grid(axis="x", color=_MUTED, alpha=0.15, linewidth=0.5)
+    ax.set_axisbelow(True)
+    if cat.get("entropy") is not None:
+        ax.text(0.99, 1.02, f"entropía={_fmt_num(cat.get('entropy'))}",
+                transform=ax.transAxes, ha="right", va="bottom", fontsize=7,
+                color=_MUTED)
+
+
+def _quality_page(pdf, columns: list) -> int:
+    """Worst-quality columns first, with their issues/flags."""
+    scored = [
+        c for c in columns
+        if isinstance(c, dict) and c.get("quality_score") is not None
+    ]
+    if not scored:
+        return 0
+    scored = sorted(scored, key=lambda c: c.get("quality_score"))
+
+    lines = [f"{'columna':<20} {'score':>6}  problemas", "-" * 52]
+    for col in scored:
+        issues = col.get("issues") or col.get("flags") or []
+        issues_s = ", ".join(issues) if isinstance(issues, list) else str(issues)
+        lines.append(
+            f"{_truncate(col.get('name'), 20):<20} "
+            f"{_fmt_num(col.get('quality_score'), 1):>6}  {_truncate(issues_s, 24)}"
+        )
+    return _text_page(pdf, "Calidad", lines,
+                      subtitle="ordenado de peor a mejor calidad")
+
+
+def _correlations_page(pdf, correlations) -> int:
+    """Heatmap of the association matrix reconstructed from the pairs list."""
+    if not correlations:
+        return 0
+    pairs = correlations
+    if isinstance(correlations, dict):
+        pairs = correlations.get("pairs") or correlations.get("strong") or []
+    if not pairs:
+        return 0
+
+    # Build the symmetric label set and a value matrix from the pairs.
+    labels = []
+    for p in pairs:
+        if not isinstance(p, dict):
+            continue
+        for key in ("a", "col_a", "b", "col_b"):
+            v = p.get(key)
+            if v is not None and v not in labels:
+                labels.append(v)
+    if len(labels) < 2:
+        return 0
+    idx = {lab: i for i, lab in enumerate(labels)}
+    n = len(labels)
+    mat = np.full((n, n), np.nan)
+    for i in range(n):
+        mat[i, i] = 1.0
+    for p in pairs:
+        if not isinstance(p, dict):
+            continue
+        a = p.get("a") or p.get("col_a")
+        b = p.get("b") or p.get("col_b")
+        val = p.get("value")
+        if val is None:
+            val = p.get("corr")
+        if a in idx and b in idx and val is not None:
+            try:
+                fv = float(val)
+            except (TypeError, ValueError):
+                continue
+            mat[idx[a], idx[b]] = fv
+            mat[idx[b], idx[a]] = fv
+
+    fig, ax = plt.subplots(figsize=_A5_PORTRAIT)
+    fig.suptitle("Correlaciones / asociación", fontsize=14, fontweight="bold",
+                 color=_INK, x=0.08, ha="left", y=0.97)
+    im = ax.imshow(mat, cmap="RdBu_r", vmin=-1, vmax=1, aspect="auto")
+    ax.set_xticks(np.arange(n))
+    ax.set_yticks(np.arange(n))
+    ax.set_xticklabels([_truncate(lab, 12) for lab in labels], rotation=60,
+                       ha="right", fontsize=7, color=_INK)
+    ax.set_yticklabels([_truncate(lab, 14) for lab in labels], fontsize=7,
+                       color=_INK)
+    ax.tick_params(length=0)
+    for side in ("top", "right", "left", "bottom"):
+        ax.spines[side].set_visible(False)
+    # Annotate cells only when few columns (keeps it legible on a phone).
+    if n <= 8:
+        for i in range(n):
+            for j in range(n):
+                if not np.isnan(mat[i, j]):
+                    ax.text(j, i, _fmt_num(mat[i, j], 2), ha="center",
+                            va="center", fontsize=6,
+                            color=_INK if abs(mat[i, j]) < 0.6 else "white")
+    cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+    cbar.ax.tick_params(labelsize=7)
+    fig.tight_layout(rect=[0, 0, 1, 0.94])
+    pdf.savefig(fig)
+    plt.close(fig)
+    return 1
+
+
+def _llm_pages(pdf, llm) -> int:
+    """Render the LLM block (data dictionary / summary) as wrapped text pages."""
+    if not llm:
+        return 0
+    lines = []
+    if isinstance(llm, dict):
+        for key, value in llm.items():
+            if value is None:
+                continue
+            lines.append(f"## {key}")
+            lines.extend(_wrap_value(value))
+            lines.append("")
+    else:
+        lines.extend(_wrap_value(llm))
+    if not lines:
+        return 0
+    return _paginate_text(pdf, "Análisis LLM", lines)
+
+
+def _generic_pages(pdf, profile: dict) -> int:
+    """Forward-compat: dump unknown top-level sections so they still reach the reader."""
+    extras = {
+        k: v for k, v in profile.items()
+        if k not in _KNOWN_TOP_KEYS and v is not None
+    }
+    if not extras:
+        return 0
+    lines = []
+    for key, value in extras.items():
+        lines.append(f"## {key}")
+        lines.extend(_wrap_value(value))
+        lines.append("")
+    if not lines:
+        return 0
+    return _paginate_text(pdf, "Otras secciones", lines,
+                          subtitle="bloques nuevos del profile (forward-compat)")
+
+
+def _wrap_value(value, width: int = 78) -> list:
+    """Flatten an arbitrary value into wrapped, readable text lines."""
+    out = []
+    if isinstance(value, dict):
+        for k, v in value.items():
+            out.append(f"- {k}: {_truncate(_scalar(v), 64)}")
+    elif isinstance(value, (list, tuple)):
+        for item in value:
+            if isinstance(item, dict):
+                out.append("- " + _truncate(
+                    ", ".join(f"{k}={_scalar(v)}" for k, v in item.items()), 70))
+            else:
+                out.append(f"- {_truncate(_scalar(item), 72)}")
+    else:
+        for line in textwrap.wrap(str(value), width=width) or [""]:
+            out.append(line)
+    return out
+
+
+def _scalar(v) -> str:
+    """Compact one-line representation of a scalar/nested value."""
+    if isinstance(v, float):
+        return _fmt_num(v)
+    if isinstance(v, (dict, list, tuple)):
+        return _truncate(str(v), 60)
+    return str(v)
+
+
+def _paginate_text(pdf, title: str, lines: list, subtitle: str = None,
+                   per_page: int = 34) -> int:
+    """Split a long list of text lines across several text pages."""
+    pages = 0
+    for start in range(0, len(lines), per_page):
+        chunk = lines[start:start + per_page]
+        page_title = title if pages == 0 else f"{title} (cont.)"
+        pages += _text_page(pdf, page_title, chunk,
+                            subtitle=subtitle if pages == 0 else None)
+    return pages
+
+
+# --------------------------------------------------------------------------- #
+# Public entry point
+# --------------------------------------------------------------------------- #
+def render_eda_pdf(profile: dict, out_path: str, title: str = None) -> dict:
+    """Render a TableProfile dict into a portable, mobile-readable multi-page PDF.
+
+    The report is laid out for reading on a phone: A5 portrait pages, single
+    column, large type, Tufte-style high data-ink charts (real histograms as
+    small multiples, top-k bars, an association heatmap). Every profile key is
+    read defensively and only present sections are rendered; unknown top-level
+    blocks are dumped on a forward-compat page rather than dropped.
+
+    Args:
+        profile: TableProfile dict from the `eda` capability group (the dict
+            returned by ``profile_table`` under ``profile``). May have many keys
+            absent or None; a None/empty profile still yields a 1-page PDF.
+        out_path: filesystem path where the PDF is written. Parent directories
+            are created if missing.
+        title: optional report title for the cover. Defaults to
+            ``"EDA — <table>"``.
+
+    Returns:
+        dict (never raises): {"pdf_path": str, "n_pages": int, "note": str}.
+        On a fatal write error, ``pdf_path`` is None and ``note`` explains why.
+    """
+    if profile is None:
+        profile = {}
+    if not isinstance(profile, dict):
+        return {"pdf_path": None, "n_pages": 0,
+                "note": f"profile no es dict: {type(profile).__name__}"}
+
+    columns = profile.get("columns") or []
+    if not isinstance(columns, list):
+        columns = []
+
+    notes = []
+    n_pages = 0
+
+    try:
+        parent = os.path.dirname(os.path.abspath(out_path))
+        os.makedirs(parent, exist_ok=True)
+    except OSError as e:
+        return {"pdf_path": None, "n_pages": 0,
+                "note": f"no se pudo crear el directorio destino: {e}"}
+
+    # Tufte-ish defaults scoped to this render only.
+    rc = {
+        "font.size": 10,
+        "font.family": "sans-serif",
+        "axes.titlesize": 11,
+        "axes.edgecolor": _MUTED,
+        "figure.facecolor": "white",
+        "savefig.facecolor": "white",
+        "pdf.fonttype": 42,  # embed TrueType so text stays selectable on mobile.
+    }
+
+    # Each section is isolated: a failure in one never aborts the whole PDF.
+    builders = [
+        ("cover", lambda p: _cover_page(p, profile, title)),
+        ("overview", lambda p: _overview_page(p, profile)),
+        ("numeric", lambda p: _numeric_pages(p, columns)),
+        ("categorical", lambda p: _categorical_pages(p, columns)),
+        ("quality", lambda p: _quality_page(p, columns)),
+        ("correlations", lambda p: _correlations_page(p, profile.get("correlations"))),
+        ("llm", lambda p: _llm_pages(p, profile.get("llm"))),
+        ("generic", lambda p: _generic_pages(p, profile)),
+    ]
+
+    try:
+        with plt.rc_context(rc):
+            with PdfPages(out_path) as pdf:
+                for name, build in builders:
+                    try:
+                        n_pages += build(pdf) or 0
+                    except Exception as e:  # noqa: BLE001 — one bad section never aborts.
+                        notes.append(f"sección '{name}' omitida: {e}")
+                # Guarantee at least one page so the PDF is always valid.
+                if n_pages == 0:
+                    n_pages += _text_page(
+                        pdf, title or "EDA", ["(perfil vacío — sin secciones)"]
+                    )
+    except Exception as e:  # noqa: BLE001
+        return {"pdf_path": None, "n_pages": 0,
+                "note": f"fallo al escribir el PDF: {e}"}
+
+    note = f"{n_pages} páginas"
+    if notes:
+        note += " · " + "; ".join(notes)
+    return {"pdf_path": out_path, "n_pages": n_pages, "note": note}