"""render_eda_pdf — Portable, mobile-readable PDF report of a TableProfile (eda group). Impure function (writes a file): takes a TableProfile dict from the `eda` capability group and renders a MULTI-PAGE PDF designed to be read and explored on a phone screen. It is the 4th output of the eda workflow, next to the markdown report, the JSON sidecar and the executed Jupyter notebook. Design follows Edward Tufte, "The Visual Display of Quantitative Information": high data-ink ratio (no chartjunk, despined axes, light grids), small multiples for per-column histograms, and graphical integrity (y-axes start at 0, no misleading truncation). Pages are A5 portrait, single column, with a large, legible typeface so the report stays readable on a small display. Every key of the profile is read defensively with ``.get(...)`` and only the sections actually present are rendered. The function is forward-compatible: if the profile carries blocks this renderer does not know about (e.g. ``models``, time series, ``caveats`` added by sibling functions), they are dumped generically on a final page instead of being ignored or crashing the render. dict-no-throw contract of the eda group: it NEVER raises. Any failure of a single section is caught and noted; the function always returns a dict with the path, the page count and a human note. Engine: matplotlib ``PdfPages`` (already in ``python/.venv``) — zero new deps. """ import os import textwrap from datetime import datetime, timezone import matplotlib # Headless backend: this runs in agents/CI without a display. matplotlib.use("Agg") import matplotlib.pyplot as plt # noqa: E402 import numpy as np # noqa: E402 from matplotlib.backends.backend_pdf import PdfPages # noqa: E402 # A5 portrait in inches (148 x 210 mm). Single column, tall, phone-friendly. _A5_PORTRAIT = (5.83, 8.27) # Number of per-column small multiples stacked vertically on one page. _NUMERIC_PER_PAGE = 3 _CATEGORICAL_PER_PAGE = 3 # Top-of-profile keys this renderer handles explicitly. Anything else found at # the top level of the profile is dumped on the forward-compat "Otros" page so # new sections added by sibling functions still reach the reader. _KNOWN_TOP_KEYS = { "table", "source", "profiled_at", "n_rows", "n_cols", "size_bytes", "duplicate_rows", "duplicate_pct", "null_cell_pct", "constant_cols", "all_null_cols", "quality_score", "type_breakdown", "key_candidates", "columns", "correlations", "llm", # Bloques con builder dedicado (no caen al volcado genérico str(dict)). "models", "series", "caveats", } # Restrained, high-contrast palette: a single accent reads cleanly on a phone. _INK = "#1b1b1b" _ACCENT = "#2a6f97" _MUTED = "#8a8a8a" # Tufte-ish render defaults shared by both public entry points. _RC = { "font.size": 10, "font.family": "sans-serif", "axes.titlesize": 11, "axes.edgecolor": _MUTED, "figure.facecolor": "white", "savefig.facecolor": "white", "pdf.fonttype": 42, # embed TrueType so text stays selectable on mobile. } # --------------------------------------------------------------------------- # # Small formatting + Tufte helpers # --------------------------------------------------------------------------- # def _fmt_num(value, decimals: int = 3) -> str: """Format a number compactly; fall back to str for non-numerics/None.""" if value is None: return "—" if isinstance(value, bool): return str(value) if isinstance(value, int): return f"{value:,}" if isinstance(value, float): if value != value: # NaN return "NaN" if value in (float("inf"), float("-inf")): return str(value) text = f"{value:.{decimals}f}".rstrip("0").rstrip(".") return text if text else "0" return str(value) def _fmt_pct(value, decimals: int = 1) -> str: """Format a fraction (0-1) as 'NN.N%'. Returns '—' for None.""" if value is None: return "—" try: num = float(value) except (TypeError, ValueError): return str(value) return f"{num * 100:.{decimals}f}%" def _despine(ax) -> None: """Strip top/right spines and soften the rest — raise the data-ink ratio.""" for side in ("top", "right"): ax.spines[side].set_visible(False) for side in ("left", "bottom"): ax.spines[side].set_color(_MUTED) ax.spines[side].set_linewidth(0.6) ax.tick_params(colors=_MUTED, labelsize=7, length=2) ax.title.set_color(_INK) def _truncate(text, width: int = 22) -> str: """Clip an arbitrary value to a short label for tight phone layouts.""" s = str(text) if text is not None else "—" return s if len(s) <= width else s[: width - 1] + "…" def _text_page(pdf, title: str, lines: list, subtitle: str = None) -> int: """Render one text page (monospace body) and return 1 (pages written).""" fig = plt.figure(figsize=_A5_PORTRAIT) fig.text(0.08, 0.94, title, fontsize=16, fontweight="bold", color=_INK) if subtitle: fig.text(0.08, 0.905, subtitle, fontsize=9, color=_MUTED) body = "\n".join(lines) fig.text( 0.08, 0.88, body, fontsize=9.5, color=_INK, family="monospace", va="top", ha="left", linespacing=1.5, ) pdf.savefig(fig) plt.close(fig) return 1 def _kv_lines(rows: list, key_width: int = 18) -> list: """Format [label, value] rows as aligned 'label : value' monospace lines.""" out = [] for label, value in rows: out.append(f"{str(label):<{key_width}}: {value}") return out # --------------------------------------------------------------------------- # # Page builders (each fully defensive, each returns the number of pages it made) # --------------------------------------------------------------------------- # def _cover_page(pdf, profile: dict, title: str) -> int: """Cover: table name, date, shape and an oversized quality score.""" fig = plt.figure(figsize=_A5_PORTRAIT) table = profile.get("table") or "(tabla sin nombre)" heading = title or f"EDA — {table}" fig.text(0.08, 0.82, heading, fontsize=22, fontweight="bold", color=_INK, wrap=True) sub = [] src = profile.get("source") if src: sub.append(f"fuente: {_truncate(src, 40)}") when = profile.get("profiled_at") or datetime.now(timezone.utc).strftime( "%Y-%m-%d %H:%M UTC" ) sub.append(f"generado: {when}") fig.text(0.08, 0.76, "\n".join(sub), fontsize=10, color=_MUTED, va="top") n_rows = profile.get("n_rows") n_cols = profile.get("n_cols") shape = (f"{_fmt_num(n_rows)} filas × {_fmt_num(n_cols)} columnas") fig.text(0.08, 0.60, shape, fontsize=15, color=_ACCENT, fontweight="bold") score = profile.get("quality_score") if score is not None: fig.text(0.08, 0.42, "calidad", fontsize=12, color=_MUTED) fig.text(0.08, 0.31, _fmt_num(score), fontsize=60, fontweight="bold", color=_INK) fig.text(0.08, 0.25, "sobre 100", fontsize=12, color=_MUTED) fig.text(0.08, 0.06, "Tufte · alta densidad de datos · lectura en móvil", fontsize=8, color=_MUTED, style="italic") pdf.savefig(fig) plt.close(fig) return 1 def _overview_page(pdf, profile: dict) -> int: """Overview key/value page: types, duplicates, nulls, constants, keys.""" rows = [] if profile.get("n_rows") is not None: rows.append(["Filas", _fmt_num(profile.get("n_rows"))]) if profile.get("n_cols") is not None: rows.append(["Columnas", _fmt_num(profile.get("n_cols"))]) if profile.get("size_bytes") is not None: rows.append(["Tamaño (bytes)", _fmt_num(profile.get("size_bytes"))]) if profile.get("duplicate_rows") is not None: dup = _fmt_num(profile.get("duplicate_rows")) if profile.get("duplicate_pct") is not None: dup += f" ({_fmt_pct(profile.get('duplicate_pct'))})" rows.append(["Filas duplicadas", dup]) if profile.get("null_cell_pct") is not None: rows.append(["Celdas nulas", _fmt_pct(profile.get("null_cell_pct"))]) if profile.get("quality_score") is not None: rows.append(["Calidad", _fmt_num(profile.get("quality_score"))]) type_breakdown = profile.get("type_breakdown") or {} tb = ", ".join( f"{k}: {v}" for k, v in type_breakdown.items() if v ) if tb: rows.append(["Tipos", tb]) constant_cols = profile.get("constant_cols") or [] if constant_cols: rows.append(["Columnas constantes", _truncate(", ".join(constant_cols), 40)]) all_null_cols = profile.get("all_null_cols") or [] if all_null_cols: rows.append(["Columnas all-null", _truncate(", ".join(all_null_cols), 40)]) key_candidates = profile.get("key_candidates") or [] if key_candidates: rows.append(["Candidatos a clave", _truncate(", ".join(key_candidates), 40)]) if not rows: rows.append(["(sin métricas de overview)", ""]) return _text_page(pdf, "Overview", _kv_lines(rows, key_width=20)) def _numeric_pages(pdf, columns: list) -> int: """Small multiples: a real histogram per numeric column, several per page.""" numeric_cols = [ c for c in columns if isinstance(c, dict) and c.get("numeric") and c["numeric"].get("histogram") ] if not numeric_cols: return 0 pages = 0 for start in range(0, len(numeric_cols), _NUMERIC_PER_PAGE): chunk = numeric_cols[start:start + _NUMERIC_PER_PAGE] fig, axes = plt.subplots( len(chunk), 1, figsize=_A5_PORTRAIT, squeeze=False, ) fig.suptitle("Distribuciones numéricas", fontsize=14, fontweight="bold", color=_INK, x=0.08, ha="left", y=0.98) for ax, col in zip(axes[:, 0], chunk): _draw_histogram(ax, col) # Hide unused axes if the chunk is short (keeps spacing even). for ax in axes[len(chunk):, 0]: ax.axis("off") fig.tight_layout(rect=[0, 0, 1, 0.95]) pdf.savefig(fig) plt.close(fig) pages += 1 return pages def _draw_histogram(ax, col: dict) -> None: """Draw one column's real histogram from its {lo, hi, count} bins.""" num = col.get("numeric") or {} hist = num.get("histogram") or [] lefts, widths, counts = [], [], [] for b in hist: if not isinstance(b, dict): continue lo = b.get("lo") hi = b.get("hi") cnt = b.get("count") or 0 if lo is None or hi is None: continue w = hi - lo if w <= 0: w = max(abs(lo) * 1e-6, 1e-6) lefts.append(lo) widths.append(w) counts.append(cnt) name = col.get("name") or "(col)" if not counts: ax.axis("off") ax.text(0.5, 0.5, f"{name}: sin datos numéricos", ha="center", va="center", fontsize=8, color=_MUTED, transform=ax.transAxes) return ax.bar(lefts, counts, width=widths, align="edge", color=_ACCENT, edgecolor="white", linewidth=0.3) # Graphical integrity: count axis starts at 0, never truncated. ax.set_ylim(bottom=0) _despine(ax) ax.set_title(_truncate(name, 28), fontsize=10, loc="left", pad=4) ax.grid(axis="y", color=_MUTED, alpha=0.15, linewidth=0.5) ax.set_axisbelow(True) # Median reference line (a single light marker, no chartjunk). median = num.get("median") if isinstance(median, (int, float)) and not isinstance(median, bool): ax.axvline(median, color=_INK, linewidth=0.8, alpha=0.5) # One compact annotation line: mean / std / outliers. bits = [] if num.get("mean") is not None: bits.append(f"μ={_fmt_num(num.get('mean'))}") if num.get("std") is not None: bits.append(f"σ={_fmt_num(num.get('std'))}") if num.get("outlier_pct") is not None: bits.append(f"outliers={_fmt_num(num.get('outlier_pct'), 1)}%") if bits: ax.text(0.99, 0.92, " ".join(bits), transform=ax.transAxes, ha="right", va="top", fontsize=7, color=_MUTED) def _categorical_pages(pdf, columns: list) -> int: """Top-k horizontal bars per categorical column, several per page.""" cat_cols = [ c for c in columns if isinstance(c, dict) and c.get("categorical") and (c["categorical"].get("top")) ] if not cat_cols: return 0 pages = 0 for start in range(0, len(cat_cols), _CATEGORICAL_PER_PAGE): chunk = cat_cols[start:start + _CATEGORICAL_PER_PAGE] fig, axes = plt.subplots( len(chunk), 1, figsize=_A5_PORTRAIT, squeeze=False, ) fig.suptitle("Categóricas (top-k)", fontsize=14, fontweight="bold", color=_INK, x=0.08, ha="left", y=0.98) for ax, col in zip(axes[:, 0], chunk): _draw_topk_bars(ax, col) for ax in axes[len(chunk):, 0]: ax.axis("off") fig.tight_layout(rect=[0, 0, 1, 0.95]) pdf.savefig(fig) plt.close(fig) pages += 1 return pages def _draw_topk_bars(ax, col: dict) -> None: """Draw top-k counts for one categorical column as horizontal bars.""" cat = col.get("categorical") or {} top = cat.get("top") or [] labels, values = [], [] for item in top[:10]: if not isinstance(item, dict): continue labels.append(_truncate(item.get("value"), 20)) values.append(item.get("count") or 0) name = col.get("name") or "(col)" if not values: ax.axis("off") ax.text(0.5, 0.5, f"{name}: sin categorías", ha="center", va="center", fontsize=8, color=_MUTED, transform=ax.transAxes) return # Largest on top: reverse so barh reads naturally top-to-bottom. labels = labels[::-1] values = values[::-1] y = np.arange(len(values)) ax.barh(y, values, color=_ACCENT, edgecolor="white", linewidth=0.3) ax.set_yticks(y) ax.set_yticklabels(labels, fontsize=7) ax.set_xlim(left=0) # bars start at 0 — honest length encoding. _despine(ax) ax.set_title(_truncate(name, 28), fontsize=10, loc="left", pad=4) ax.grid(axis="x", color=_MUTED, alpha=0.15, linewidth=0.5) ax.set_axisbelow(True) if cat.get("entropy") is not None: ax.text(0.99, 1.02, f"entropía={_fmt_num(cat.get('entropy'))}", transform=ax.transAxes, ha="right", va="bottom", fontsize=7, color=_MUTED) def _quality_page(pdf, columns: list) -> int: """Worst-quality columns first, with their issues/flags.""" scored = [ c for c in columns if isinstance(c, dict) and c.get("quality_score") is not None ] if not scored: return 0 scored = sorted(scored, key=lambda c: c.get("quality_score")) lines = [f"{'columna':<20} {'score':>6} problemas", "-" * 52] for col in scored: issues = col.get("issues") or col.get("flags") or [] issues_s = ", ".join(issues) if isinstance(issues, list) else str(issues) lines.append( f"{_truncate(col.get('name'), 20):<20} " f"{_fmt_num(col.get('quality_score'), 1):>6} {_truncate(issues_s, 24)}" ) return _text_page(pdf, "Calidad", lines, subtitle="ordenado de peor a mejor calidad") def _correlations_page(pdf, correlations) -> int: """Heatmap of the association matrix reconstructed from the pairs list.""" if not correlations: return 0 pairs = correlations if isinstance(correlations, dict): pairs = correlations.get("pairs") or correlations.get("strong") or [] if not pairs: return 0 # Build the symmetric label set and a value matrix from the pairs. labels = [] for p in pairs: if not isinstance(p, dict): continue for key in ("a", "col_a", "b", "col_b"): v = p.get(key) if v is not None and v not in labels: labels.append(v) if len(labels) < 2: return 0 idx = {lab: i for i, lab in enumerate(labels)} n = len(labels) mat = np.full((n, n), np.nan) for i in range(n): mat[i, i] = 1.0 for p in pairs: if not isinstance(p, dict): continue a = p.get("a") or p.get("col_a") b = p.get("b") or p.get("col_b") val = p.get("value") if val is None: val = p.get("corr") if a in idx and b in idx and val is not None: try: fv = float(val) except (TypeError, ValueError): continue mat[idx[a], idx[b]] = fv mat[idx[b], idx[a]] = fv fig, ax = plt.subplots(figsize=_A5_PORTRAIT) fig.suptitle("Correlaciones / asociación", fontsize=14, fontweight="bold", color=_INK, x=0.08, ha="left", y=0.97) im = ax.imshow(mat, cmap="RdBu_r", vmin=-1, vmax=1, aspect="auto") ax.set_xticks(np.arange(n)) ax.set_yticks(np.arange(n)) ax.set_xticklabels([_truncate(lab, 12) for lab in labels], rotation=60, ha="right", fontsize=7, color=_INK) ax.set_yticklabels([_truncate(lab, 14) for lab in labels], fontsize=7, color=_INK) ax.tick_params(length=0) for side in ("top", "right", "left", "bottom"): ax.spines[side].set_visible(False) # Annotate cells only when few columns (keeps it legible on a phone). if n <= 8: for i in range(n): for j in range(n): if not np.isnan(mat[i, j]): ax.text(j, i, _fmt_num(mat[i, j], 2), ha="center", va="center", fontsize=6, color=_INK if abs(mat[i, j]) < 0.6 else "white") cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04) cbar.ax.tick_params(labelsize=7) fig.tight_layout(rect=[0, 0, 1, 0.94]) pdf.savefig(fig) plt.close(fig) return 1 def _llm_pages(pdf, llm) -> int: """Render the LLM block (data dictionary / summary) as wrapped text pages.""" if not llm: return 0 lines = [] if isinstance(llm, dict): for key, value in llm.items(): if value is None: continue lines.append(f"## {key}") lines.extend(_wrap_value(value)) lines.append("") else: lines.extend(_wrap_value(llm)) if not lines: return 0 return _paginate_text(pdf, "Análisis LLM", lines) def _generic_pages(pdf, profile: dict) -> int: """Forward-compat: dump unknown top-level sections so they still reach the reader.""" extras = { k: v for k, v in profile.items() if k not in _KNOWN_TOP_KEYS and v is not None } if not extras: return 0 lines = [] for key, value in extras.items(): lines.append(f"## {key}") lines.extend(_wrap_value(value)) lines.append("") if not lines: return 0 return _paginate_text(pdf, "Otras secciones", lines, subtitle="bloques nuevos del profile (forward-compat)") def _wrap_value(value, width: int = 78) -> list: """Flatten an arbitrary value into wrapped, readable text lines.""" out = [] if isinstance(value, dict): for k, v in value.items(): out.append(f"- {k}: {_truncate(_scalar(v), 64)}") elif isinstance(value, (list, tuple)): for item in value: if isinstance(item, dict): out.append("- " + _truncate( ", ".join(f"{k}={_scalar(v)}" for k, v in item.items()), 70)) else: out.append(f"- {_truncate(_scalar(item), 72)}") else: for line in textwrap.wrap(str(value), width=width) or [""]: out.append(line) return out def _scalar(v) -> str: """Compact one-line representation of a scalar/nested value.""" if isinstance(v, float): return _fmt_num(v) if isinstance(v, (dict, list, tuple)): return _truncate(str(v), 60) return str(v) def _paginate_text(pdf, title: str, lines: list, subtitle: str = None, per_page: int = 34) -> int: """Split a long list of text lines across several text pages.""" pages = 0 for start in range(0, len(lines), per_page): chunk = lines[start:start + per_page] page_title = title if pages == 0 else f"{title} (cont.)" pages += _text_page(pdf, page_title, chunk, subtitle=subtitle if pages == 0 else None) return pages # --------------------------------------------------------------------------- # # Dedicated builders for forward-compat blocks (models / series / caveats). # Before these existed, ``models``/``series``/``caveats`` fell to the generic # dump and were rendered as truncated ``str(dict)``. Each builder is fully # defensive, reads with ``.get`` and returns the number of pages it produced. # --------------------------------------------------------------------------- # def _models_pages(pdf, models) -> int: """Render the cheap-models block (PCA / KMeans / outliers / normality).""" if not isinstance(models, dict): return 0 lines = [] pca = models.get("pca") if isinstance(pca, dict): lines.append("## PCA") n_used = pca.get("n_rows_used") n_feat = pca.get("n_features") if n_used is not None or n_feat is not None: lines.append( f" {pca.get('n_components')} comp · " f"{_fmt_num(n_used)} filas · {_fmt_num(n_feat)} features" ) evr = pca.get("explained_variance_ratio") or [] cum = pca.get("cumulative") or [] for i, var in enumerate(evr): acc = cum[i] if i < len(cum) else None lines.append(f" PC{i + 1}: var {_fmt_pct(var)} acum {_fmt_pct(acc)}") loadings = pca.get("top_loadings") or [] if loadings: lines.append(" cargas principales:") for ld in loadings[:8]: if not isinstance(ld, dict): continue comp = ld.get("component") comp_label = f"PC{comp + 1}" if isinstance(comp, int) else str(comp) lines.append( f" {comp_label} {_truncate(ld.get('feature'), 18)}: " f"{_fmt_num(ld.get('loading'), 3)}" ) lines.append("") km = models.get("kmeans") if isinstance(km, dict): lines.append("## KMeans") head = f" mejor k = {_fmt_num(km.get('best_k'))}" if km.get("silhouette") is not None: head += f" silhouette {_fmt_num(km.get('silhouette'), 3)}" lines.append(head) sizes = km.get("cluster_sizes") or [] if sizes: lines.append(" tamaños cluster: " + ", ".join( _fmt_num(s) for s in sizes)) for sc in km.get("scores_by_k") or []: if not isinstance(sc, dict): continue lines.append( f" k={sc.get('k')}: silhouette {_fmt_num(sc.get('silhouette'), 3)}" f" inertia {_fmt_num(sc.get('inertia'), 1)}" ) lines.append("") out = models.get("outliers") if isinstance(out, dict): lines.append("## Outliers multivariante (Isolation Forest)") # outlier_pct del modelo ya viene en escala 0-100. line = f" {_fmt_num(out.get('n_outliers'))} outliers" if out.get("outlier_pct") is not None: line += f" ({_fmt_num(out.get('outlier_pct'), 2)}%)" if out.get("threshold") is not None: line += f" umbral {_fmt_num(out.get('threshold'), 3)}" lines.append(line) lines.append("") normality = models.get("normality") if isinstance(normality, dict): lines.append("## Normalidad (Jarque-Bera)") for col_name, res in normality.items(): if not isinstance(res, dict): continue jb = res.get("jarque_bera") or {} lines.append( f" {_truncate(col_name, 18):<18} normal={res.get('is_normal')}" f" JB p={_fmt_num(jb.get('p'), 4)}" ) lines.append("") note = models.get("note") if note: lines.append(f"nota: {note}") if not [ln for ln in lines if ln.strip()]: return 0 return _paginate_text(pdf, "Modelos", lines) def _series_pages(pdf, series) -> int: """Render the time-series block: one compact summary per series column.""" if not isinstance(series, dict) or not series: return 0 lines = [] for col, s in series.items(): if not isinstance(s, dict): continue lines.append(f"## {col}") stat = s.get("stationarity") or {} if stat.get("verdict") is not None: lines.append(f" estacionariedad (ADF+KPSS): {stat.get('verdict')}") acf = s.get("acf_pacf") or {} if acf.get("is_autocorrelated") is not None: lines.append( " autocorrelada (Ljung-Box): " + ("sí" if acf.get("is_autocorrelated") else "no") ) stl = s.get("stl") or {} if stl.get("trend_strength") is not None: lines.append( f" fuerza tendencia (STL): {_fmt_num(stl.get('trend_strength'), 3)}") if stl.get("seasonal_strength") is not None: extra = (f" (periodo {stl.get('period')})" if stl.get("period") is not None else "") lines.append( f" fuerza estacional (STL): " f"{_fmt_num(stl.get('seasonal_strength'), 3)}{extra}") elif stl.get("note"): lines.append(f" STL: {_truncate(stl.get('note'), 60)}") if s.get("levels_suggested"): kind = s.get("levels_kind") if kind == "returns": lines.append(" sugerencia: convertir a retornos (serie financiera)") elif kind == "differences": lines.append(" sugerencia: trabajar sobre diferencias (serie física)") else: lines.append(" sugerencia: retornos o diferencias (serie de niveles)") lines.append("") if not [ln for ln in lines if ln.strip()]: return 0 return _paginate_text(pdf, "Series temporales", lines) def _caveats_pages(pdf, caveats) -> int: """Render the exploratory caveats block as a wrapped, readable list.""" cav_list = [] if isinstance(caveats, dict): cav_list = caveats.get("caveats") or [] elif isinstance(caveats, list): cav_list = caveats lines = [] for cav in cav_list: if not isinstance(cav, dict): continue topic = cav.get("topic") or cav.get("id") or "" msg = cav.get("message") or "" lines.append(f"## {topic}") lines.extend(textwrap.wrap(str(msg), width=78) or [""]) lines.append("") if not [ln for ln in lines if ln.strip()]: return 0 return _paginate_text(pdf, "Avisos exploratorios", lines, subtitle="el EDA genera hipótesis, no conclusiones") # --------------------------------------------------------------------------- # # DB-level (relational) page builders — used by render_eda_pdf_relational. # --------------------------------------------------------------------------- # def _db_cover_page(pdf, db_profile: dict, title: str) -> int: """Cover for a DatabaseProfile: name, date, table count, FK count.""" fig = plt.figure(figsize=_A5_PORTRAIT) db_path = db_profile.get("db_path") or "(base sin nombre)" heading = title or f"EDA base — {os.path.basename(str(db_path))}" fig.text(0.08, 0.82, heading, fontsize=20, fontweight="bold", color=_INK, wrap=True) sub = [f"fuente: {_truncate(db_path, 44)}"] when = db_profile.get("profiled_at") or datetime.now(timezone.utc).strftime( "%Y-%m-%d %H:%M UTC") sub.append(f"generado: {when}") fig.text(0.08, 0.74, "\n".join(sub), fontsize=10, color=_MUTED, va="top") n_tables = db_profile.get("n_tables") fig.text(0.08, 0.58, f"{_fmt_num(n_tables)} tablas", fontsize=16, color=_ACCENT, fontweight="bold") n_fk = len(db_profile.get("fk_candidates") or []) fig.text(0.08, 0.51, f"{_fmt_num(n_fk)} relaciones FK candidatas", fontsize=12, color=_INK) fig.text(0.08, 0.06, "Tufte · alta densidad de datos · lectura en móvil", fontsize=8, color=_MUTED, style="italic") pdf.savefig(fig) plt.close(fig) return 1 def _db_tables_page(pdf, db_profile: dict) -> int: """One text page summarising every table (rows / cols / quality).""" tables = db_profile.get("tables") or [] if not isinstance(tables, list) or not tables: return 0 lines = [f"{'tabla':<24}{'filas':>9}{'cols':>6}{'cal':>6}", "-" * 45] for t in tables: if not isinstance(t, dict): continue lines.append( f"{_truncate(t.get('table'), 24):<24}" f"{_fmt_num(t.get('n_rows')):>9}" f"{_fmt_num(t.get('n_cols')):>6}" f"{_fmt_num(t.get('quality_score'), 1):>6}" ) return _paginate_text(pdf, "Tablas", lines, subtitle="resumen por tabla") def _db_fk_page(pdf, db_profile: dict) -> int: """FK candidates table + the join-graph mermaid text.""" fks = db_profile.get("fk_candidates") or [] lines = [] if isinstance(fks, list) and fks: lines.append(f"{'from':<26}{'to':<26}{'incl':>7}") lines.append("-" * 59) for fk in fks: if not isinstance(fk, dict): continue frm = f"{fk.get('from_table')}.{fk.get('from_col')}" to = f"{fk.get('to_table')}.{fk.get('to_col')}" inc = fk.get("inclusion") inc_s = (_fmt_num(inc, 3) if isinstance(inc, (int, float)) and not isinstance(inc, bool) else str(inc)) lines.append( f"{_truncate(frm, 25):<26}{_truncate(to, 25):<26}{inc_s:>7}") else: lines.append("(sin relaciones FK candidatas detectadas)") mermaid = (db_profile.get("join_graph") or {}).get("mermaid") if mermaid: lines.append("") lines.append("## join graph (mermaid)") for raw in str(mermaid).splitlines(): lines.append(_truncate(raw, 72)) return _paginate_text(pdf, "Relaciones inter-tabla", lines, subtitle="FK candidatas + join graph") # --------------------------------------------------------------------------- # # Public entry point # --------------------------------------------------------------------------- # def render_eda_pdf(profile: dict, out_path: str, title: str = None) -> dict: """Render a TableProfile dict into a portable, mobile-readable multi-page PDF. The report is laid out for reading on a phone: A5 portrait pages, single column, large type, Tufte-style high data-ink charts (real histograms as small multiples, top-k bars, an association heatmap). Every profile key is read defensively and only present sections are rendered; unknown top-level blocks are dumped on a forward-compat page rather than dropped. Args: profile: TableProfile dict from the `eda` capability group (the dict returned by ``profile_table`` under ``profile``). May have many keys absent or None; a None/empty profile still yields a 1-page PDF. out_path: filesystem path where the PDF is written. Parent directories are created if missing. title: optional report title for the cover. Defaults to ``"EDA —