diff --git a/python/functions/datascience/automatic_eda/md_completeness_test.py b/python/functions/datascience/automatic_eda/md_completeness_test.py new file mode 100644 index 00000000..c1c910bc --- /dev/null +++ b/python/functions/datascience/automatic_eda/md_completeness_test.py @@ -0,0 +1,253 @@ +"""Tests for the Markdown completeness appendix (report 2053). + +The AutomaticEDA Markdown is the output meant to be *pasted into an LLM*, so it +must carry EVERYTHING the engine computed — even the numbers the human-facing +chapters (shared with the PDF/PPTX) drop for readability. ``render_md`` appends a +full-data appendix built from ``meta['profile']`` that closes the six losses the +evaluation found: + +1. the complete association matrix (every pair, incl. correlation_ratio / + cramers_v) — not just the top extremes; +2. every numeric statistic for every numeric column (skew/kurtosis/percentiles); +3. the concrete recommended re-expression; +4. KMeans ``scores_by_k``; +5. the normality test statistics; +6. correct headers for bar/scree figure tables (not ``Desde/Hasta/Frecuencia``). + +Self-contained: a synthetic profile, no DuckDB, no heavy renderer. +""" + +import os +import sys + +import pytest # noqa: F401 + +_HERE = os.path.dirname(os.path.abspath(__file__)) +_FUNCTIONS = os.path.abspath(os.path.join(_HERE, "..", "..", "..")) # python/functions +if _FUNCTIONS not in sys.path: + sys.path.insert(0, _FUNCTIONS) + +from datascience.automatic_eda import model # noqa: E402 +from datascience.automatic_eda.render_md_impl import ( # noqa: E402 + _bars_table, + _is_histogram_caption, + _profile_appendix, + render_md, +) + + +# --------------------------------------------------------------------------- # +# Synthetic profile fixtures. +# --------------------------------------------------------------------------- # +def _numeric(skew, kurtosis): + """A numeric stat block with every key the appendix serializes.""" + return { + "count": 100, "min": 0.0, "max": 10.0, "mean": 5.0, "median": 5.0, + "mode": 4.0, "std": 2.0, "variance": 4.0, "cv": 0.4, + "p1": 0.1, "p5": 0.5, "p25": 2.5, "p50": 5.0, "p75": 7.5, + "p95": 9.5, "p99": 9.9, "iqr": 5.0, "skew": skew, "kurtosis": kurtosis, + "n_outliers": 1, "distribution_type": "normal", + } + + +def _profile(): + """A small but structurally faithful TableProfile (3 numeric, 2 categorical).""" + pairs = [ + {"a": "A", "b": "B", "a_type": "numeric", "b_type": "numeric", + "method": "pearson/spearman", "value": 0.8, + "p_value": 1e-9, "p_value_adjusted": 2e-9, "significant": True}, + {"a": "A", "b": "C", "a_type": "numeric", "b_type": "numeric", + "method": "pearson/spearman", "value": -0.3, + "p_value": 0.01, "p_value_adjusted": 0.02, "significant": True}, + {"a": "A", "b": "Cat1", "a_type": "numeric", "b_type": "categorical", + "method": "correlation_ratio", "value": 0.45, + "p_value": 0.001, "p_value_adjusted": 0.002, "significant": True}, + # The single cat-cat pair the human chapter never shows. + {"a": "Cat1", "b": "Cat2", "a_type": "categorical", + "b_type": "categorical", "method": "cramers_v", "value": 0.11, + "p_value": 0.04, "p_value_adjusted": 0.05, "significant": False}, + ] + return { + "correlations": { + "pairs": pairs, + "multiple_testing": {"method": "bh", "n_tests": 4, "n_rejected": 3}, + }, + "columns": [ + {"name": "A", "count": 100, "numeric": _numeric(0.0, -1.2), + "reexpression": {"recommended": "none", "ladder_power": 1.0, + "reason": "symmetric", "alternatives": []}}, + {"name": "B", "count": 100, "numeric": _numeric(4.77, 33.1), + "reexpression": {"recommended": "log1p", "ladder_power": 0.0, + "reason": "skew 4.77 with zeros", + "alternatives": [{"transform": "yeo-johnson"}, + {"transform": "sqrt"}]}}, + {"name": "C", "count": 100, "numeric": _numeric(-0.6, 0.2)}, + {"name": "Cat1", "categorical": {"top": [], "mode": "x"}}, + {"name": "Cat2", "categorical": {"top": [], "mode": "y"}}, + ], + "models": { + "kmeans": { + "best_k": 3, + "scores_by_k": [ + {"k": 2, "silhouette": 0.46, "inertia": 900.0}, + {"k": 3, "silhouette": 0.50, "inertia": 550.0}, + {"k": 4, "silhouette": 0.38, "inertia": 430.0}, + ], + "cluster_sizes": [40, 35, 25], + }, + "normality": { + "A": {"n": 100, + "jarque_bera": {"stat": 18.7, "p": 8e-5, "normal": False}, + "dagostino": {"stat": 18.1, "p": 1e-4, "normal": False}, + "shapiro": {"stat": 0.98, "p": 7e-8, "normal": False}, + "is_normal": False}, + "C": {"n": 100, + "jarque_bera": {"stat": 2.1, "p": 0.35, "normal": True}, + "dagostino": {"stat": 1.9, "p": 0.38, "normal": True}, + "shapiro": {"stat": 0.99, "p": 0.12, "normal": True}, + "is_normal": True}, + }, + }, + } + + +def _dummy_chapters(): + """A minimal one-chapter document so render_md does not early-return empty.""" + return model.as_chapters([ + {"id": "intro", "title": "Intro", + "blocks": [{"kind": "markdown", "text": "cuerpo del informe"}]}, + ]) + + +def _render(tmp_path, profile): + out = os.path.join(str(tmp_path), "out.md") + res = render_md(_dummy_chapters(), out, {"title": "EDA — t", "profile": profile}) + assert res["path"] == out + return open(out, encoding="utf-8").read() + + +def _table_rows(md, section_title): + """Count data rows of the first Markdown table under ``section_title``.""" + seg = md.split(section_title, 1)[1] + rows, in_t, seen_sep = 0, False, False + for ln in seg.splitlines(): + if ln.startswith("|"): + in_t = True + stripped = ln.replace("|", "").replace(" ", "") + if stripped and set(stripped) == {"-"}: + seen_sep = True + continue + if seen_sep: + rows += 1 + elif in_t and not ln.strip(): + break + return rows + + +# --------------------------------------------------------------------------- # +# Golden: every datum the profile holds reaches the .md. +# --------------------------------------------------------------------------- # +def test_appendix_lists_all_correlation_pairs(tmp_path): + md = _render(tmp_path, _profile()) + assert "## Apéndice — Datos completos del perfil" in md + # All 4 pairs (the real titanic profile has 28; here 4 synthetic). + assert _table_rows(md, "### Matriz de asociación") == 4 + # The cat-cat Cramér's V pair the human chapter drops is present. + assert "Cat1 ↔ Cat2" in md + assert "cramers_v" in md + assert "correlation_ratio" in md + + +def test_appendix_has_skew_kurtosis_for_every_numeric(tmp_path): + md = _render(tmp_path, _profile()) + seg = md.split("### Estadísticos numéricos completos", 1)[1].split("###", 1)[0] + lines = [l for l in seg.splitlines() if l.startswith("|")] + header = [h.strip() for h in lines[0].strip("|").split("|")] + assert "skew" in header and "kurtosis" in header + ski, kui = header.index("skew"), header.index("kurtosis") + data = lines[2:] # skip header + separator + assert len(data) == 3 # exactly the 3 numeric columns + for row in data: + cells = [c.strip() for c in row.strip("|").split("|")] + assert cells[ski] != "", f"missing skew in {cells[0]}" + assert cells[kui] != "", f"missing kurtosis in {cells[0]}" + + +def test_appendix_has_extended_percentiles(tmp_path): + md = _render(tmp_path, _profile()) + seg = md.split("### Estadísticos numéricos completos", 1)[1] + header = [h.strip() for h in seg.splitlines()[2].strip("|").split("|")] + for p in ("p1", "p5", "p25", "p75", "p95", "p99"): + assert p in header, f"percentile {p} missing from describe header" + + +def test_appendix_names_concrete_reexpression(tmp_path): + md = _render(tmp_path, _profile()) + assert "### Re-expresión recomendada" in md + assert "log1p" in md # the concrete transform, not just "consider re-expressing" + assert "yeo-johnson" in md # alternatives listed too + + +def test_appendix_has_kmeans_scores_by_k(tmp_path): + md = _render(tmp_path, _profile()) + assert "scores_by_k" in md + assert _table_rows(md, "#### KMeans — selección de k") == 3 # k=2,3,4 + + +def test_appendix_has_normality_statistics(tmp_path): + md = _render(tmp_path, _profile()) + assert "JB stat" in md # the statistic, not only the p-value + assert "Shapiro stat" in md + assert _table_rows(md, "#### Tests de normalidad") == 2 # cols A and C + + +# --------------------------------------------------------------------------- # +# Edge: a profile missing models / correlations degrades, never raises. +# --------------------------------------------------------------------------- # +def test_lite_profile_without_models(tmp_path): + prof = _profile() + prof.pop("models") # lite: no KMeans/normality + md = _render(tmp_path, prof) + assert "scores_by_k" not in md # section skipped + assert "Matriz de asociación" in md # correlations still dumped + assert "## Apéndice" in md + + +def test_profile_without_correlations(tmp_path): + prof = _profile() + prof.pop("correlations") + md = _render(tmp_path, prof) # must not raise + assert "Matriz de asociación" not in md + assert "Estadísticos numéricos completos" in md # numeric section still there + + +def test_no_profile_means_no_appendix(tmp_path): + out = os.path.join(str(tmp_path), "noprof.md") + res = render_md(_dummy_chapters(), out, {"title": "x"}) + assert res["path"] == out + assert "## Apéndice" not in open(out, encoding="utf-8").read() + + +def test_appendix_helper_is_defensive(): + assert _profile_appendix(None) == "" + assert _profile_appendix({}) == "" + assert _profile_appendix({"columns": []}) == "" + + +# --------------------------------------------------------------------------- # +# Loss #6: bar/scree figure tables get a non-misleading header. +# --------------------------------------------------------------------------- # +def test_histogram_caption_detection(): + assert _is_histogram_caption("Histograma de Age") + assert _is_histogram_caption("Distribución de Fare") + assert not _is_histogram_caption("Media de Survived por Sex") + assert not _is_histogram_caption("Varianza explicada (scree PCA)") + + +def test_bars_table_custom_header(): + bars = [(0.0, 1.0, 5.0), (1.0, 2.0, 3.0)] + hist = _bars_table(bars) # default histogram header + assert "| Desde | Hasta | Frecuencia |" in hist + bar = _bars_table(bars, ("Inicio", "Fin", "Valor")) + assert "| Inicio | Fin | Valor |" in bar + assert "Frecuencia" not in bar diff --git a/python/functions/datascience/automatic_eda/render_md_impl.py b/python/functions/datascience/automatic_eda/render_md_impl.py index fba8ba6f..08488af0 100644 --- a/python/functions/datascience/automatic_eda/render_md_impl.py +++ b/python/functions/datascience/automatic_eda/render_md_impl.py @@ -178,9 +178,17 @@ def _md_data_table(block) -> str: return "\n".join(lines) -def _bars_table(bars: list) -> str: - """Render extracted bar/histogram data as a Markdown table (Desde/Hasta/Frec).""" - lines = ["| Desde | Hasta | Frecuencia |", "| --- | --- | --- |"] +def _bars_table(bars: list, header: tuple = ("Desde", "Hasta", "Frecuencia")) -> str: + """Render extracted bar/histogram data as a Markdown table. + + ``header`` is the 3-column header to use. Histogram bars are + ``(Desde, Hasta, Frecuencia)``; bar/scree charts (means by group, PCA + explained variance) are *not* bins, so the caller passes a semantically + correct header (e.g. ``(Inicio, Fin, Valor)``) to avoid the misleading + "Frecuencia" label — see report 2053, loss #6. + """ + h0, h1, h2 = header + lines = [f"| {h0} | {h1} | {h2} |", "| --- | --- | --- |"] shown = bars[:_MAX_BAR_ROWS] for x0, x1, h in shown: lines.append(f"| {_fmt_num(x0)} | {_fmt_num(x1)} | {_fmt_num(h)} |") @@ -191,6 +199,18 @@ def _bars_table(bars: list) -> str: return out +def _is_histogram_caption(caption: str) -> bool: + """True when a figure caption describes a histogram (genuine numeric bins). + + Histograms are the only figures whose bars are real ``[Desde, Hasta)`` bins + with a frequency count. Bar charts (means by group) and the PCA scree plot + carry per-category / per-component values, not bins — they must not inherit + the ``Desde/Hasta/Frecuencia`` header. + """ + c = (caption or "").lower() + return "histograma" in c or "distribución" in c or "distribucion" in c + + def _extract_bars(fig) -> list: """Collect (x_from, x_to, height) of the rectangular bars of a matplotlib fig. @@ -253,7 +273,13 @@ def _md_figure(block, meta: dict, out_path: str, counter: list) -> str: if fig is not None: bars = _extract_bars(fig) if bars: - parts.append(_bars_table(bars)) + # A histogram's bars are genuine numeric bins (Desde/Hasta/ + # Frecuencia). Bar charts and the PCA scree plot are not bins — + # give them a header that does not lie about "Frecuencia". + header = (("Desde", "Hasta", "Frecuencia") + if _is_histogram_caption(caption) + else ("Inicio", "Fin", "Valor")) + parts.append(_bars_table(bars, header)) if meta.get("embed_figures"): png = _embed_png(fig, out_path, counter) if png: @@ -354,6 +380,258 @@ def _serialize_block(block, meta: dict, out_path: str, counter: list) -> str: return _md_note(model.Note(text=model._safe_str(block))) +# --------------------------------------------------------------------------- # +# Profile appendix — the data the human-facing chapters drop. +# +# The chapter document (shared with the PDF/PPTX renderers) is designed for human +# reading and intentionally omits raw numbers: the correlation matrix shows only +# the top extremes, the numeric blocks skip skew/kurtosis/extended percentiles, +# the model chapter does not list ``scores_by_k`` or the normality test +# statistics. But the Markdown is meant to be *pasted into an LLM*, so it should +# carry EVERYTHING the engine computed. This appendix serializes the full +# ``profile`` (passed via ``meta['profile']``) as Markdown tables, additively: +# the PDF/PPTX are untouched, the .md simply has more than they do. Each section +# is emitted only when its source data is present, so a ``lite`` profile (no +# models) or a profile without correlations degrades cleanly instead of raising. +# See report 2053 for the six losses this closes. +# --------------------------------------------------------------------------- # +def _pair_types(a_type, b_type) -> str: + """Short ``num↔cat`` label for an association pair's variable types.""" + def short(t): + t = model._safe_str(t).lower() + if t.startswith("num"): + return "num" + if t.startswith("cat"): + return "cat" + return t or "?" + return f"{short(a_type)}↔{short(b_type)}" + + +def _app_correlations(corr: dict) -> str: + """Loss #1 — every association pair (not just the top extremes). + + Dumps all of ``correlations['pairs']`` as a table (pair · types · method · + value · p · p-FDR · significant), ordered by |value| desc so the strongest + associations lead while nothing is cut. Includes the ``correlation_ratio`` + (num↔cat) and ``cramers_v`` (cat↔cat) pairs the human chapter never shows. + """ + pairs = list(corr.get("pairs", []) or []) + if not pairs: + return "" + def keyfn(p): + try: + return -abs(float(p.get("value"))) + except Exception: # noqa: BLE001 + return 0.0 + pairs_sorted = sorted(pairs, key=keyfn) + lines = ["### Matriz de asociación — todos los pares", + "", + ("| Par | Tipos | Método | Valor | p-value | p-ajustado (FDR) " + "| ¿Sig? |"), + "| --- | --- | --- | --- | --- | --- | --- |"] + for p in pairs_sorted: + par = f"{_cell(p.get('a'))} ↔ {_cell(p.get('b'))}" + types = _pair_types(p.get("a_type"), p.get("b_type")) + method = _cell(p.get("method")) + val = _fmt_num(p.get("value")) + pv = _fmt_num(p.get("p_value")) if p.get("p_value") is not None else "" + padj = (_fmt_num(p.get("p_value_adjusted")) + if p.get("p_value_adjusted") is not None else "") + sig = "sí" if p.get("significant") else "no" + lines.append( + f"| {par} | {types} | {method} | {val} | {pv} | {padj} | {sig} |") + mt = corr.get("multiple_testing") or {} + n_tests = mt.get("n_tests", corr.get("n_tests")) + n_rej = mt.get("n_rejected") + note_bits = [f"{len(pairs)} pares en total"] + if n_tests is not None and n_rej is not None: + note_bits.append( + f"{n_rej} de {n_tests} significativos tras corrección " + f"{model._safe_str(mt.get('method', 'FDR')).upper()}") + lines.append("") + lines.append(f"*{'; '.join(note_bits)}.*") + return "\n".join(lines) + + +# Numeric statistics, in serialization order: (profile key, column header). +_NUM_STATS = [ + ("count", "n"), ("mean", "mean"), ("median", "median"), ("mode", "mode"), + ("std", "std"), ("variance", "variance"), ("cv", "cv"), + ("skew", "skew"), ("kurtosis", "kurtosis"), + ("min", "min"), ("p1", "p1"), ("p5", "p5"), ("p25", "p25"), ("p50", "p50"), + ("p75", "p75"), ("p95", "p95"), ("p99", "p99"), ("iqr", "iqr"), + ("max", "max"), ("n_outliers", "outliers"), + ("distribution_type", "distribución"), +] + + +def _app_numeric_describe(columns: list) -> str: + """Loss #2 — every numeric statistic for every numeric column. + + One row per numeric column with the full describe: mean/median/mode/std/ + variance/cv, skew & kurtosis (for ALL columns, not only the skewed ones), + p1/p5/p25/p50/p75/p95/p99, iqr, min/max, outliers and distribution_type. + """ + rows = [] + for info in (columns or []): + num = info.get("numeric") if isinstance(info, dict) else None + if not num: + continue + name = _cell(info.get("name")) + cells = [name] + for key, _hdr in _NUM_STATS: + v = num.get("count" if key == "count" else key) + if key == "count": + v = num.get("count", info.get("count")) + if key == "distribution_type": + cells.append(_cell(v)) + else: + cells.append(_fmt_num(v) if v is not None else "") + rows.append(cells) + if not rows: + return "" + header = ["Columna"] + [hdr for _k, hdr in _NUM_STATS] + lines = ["### Estadísticos numéricos completos (describe)", + "", + "| " + " | ".join(header) + " |", + "| " + " | ".join(["---"] * len(header)) + " |"] + for cells in rows: + lines.append("| " + " | ".join(cells) + " |") + return "\n".join(lines) + + +def _app_reexpression(columns: list) -> str: + """Loss #3 — the concrete recommended re-expression per column. + + Names the transform (log1p/sqrt/yeo-johnson/none) instead of a vague + "consider re-expressing", with the ladder power, reason and alternatives. + """ + rows = [] + for info in (columns or []): + rx = info.get("reexpression") if isinstance(info, dict) else None + if not rx or not isinstance(rx, dict): + continue + rec = model._safe_str(rx.get("recommended")).strip() + if not rec: + continue + alts = rx.get("alternatives") or [] + alt_txt = ", ".join( + model._safe_str(a.get("transform")) for a in alts + if isinstance(a, dict) and a.get("transform")) or "—" + rows.append([ + _cell(info.get("name")), _cell(rec), + _fmt_num(rx.get("ladder_power")) if rx.get("ladder_power") is not None else "", + _cell(rx.get("reason")), _cell(alt_txt), + ]) + if not rows: + return "" + lines = ["### Re-expresión recomendada (escalera de Tukey)", + "", + "| Columna | Recomendada | Potencia | Razón | Alternativas |", + "| --- | --- | --- | --- | --- |"] + for r in rows: + lines.append("| " + " | ".join(r) + " |") + return "\n".join(lines) + + +def _app_kmeans_scores(kmeans: dict) -> str: + """Loss #4 — KMeans silhouette + inertia per k (justifies the chosen k).""" + scores = list(kmeans.get("scores_by_k", []) or []) + if not scores: + return "" + best_k = kmeans.get("best_k") + lines = ["#### KMeans — selección de k (`scores_by_k`)", + "", + "| k | Silhouette | Inercia | Elegido |", + "| --- | --- | --- | --- |"] + for s in scores: + if not isinstance(s, dict): + continue + k = s.get("k") + chosen = "✓" if best_k is not None and k == best_k else "" + lines.append( + f"| {_fmt_num(k)} | {_fmt_num(s.get('silhouette'))} " + f"| {_fmt_num(s.get('inertia'))} | {chosen} |") + return "\n".join(lines) + + +def _app_normality(normality: dict) -> str: + """Loss #5 — each normality test's statistic next to its p-value.""" + if not isinstance(normality, dict) or not normality: + return "" + lines = ["#### Tests de normalidad (estadístico + p-value)", + "", + ("| Columna | n | JB stat | JB p | D'Agostino stat | D'Agostino p " + "| Shapiro stat | Shapiro p | ¿Normal? |"), + "| --- | --- | --- | --- | --- | --- | --- | --- | --- |"] + any_row = False + for col, res in normality.items(): + if not isinstance(res, dict): + continue + jb = res.get("jarque_bera") or {} + da = res.get("dagostino") or {} + sh = res.get("shapiro") or {} + is_norm = "sí" if res.get("is_normal") else "no" + lines.append( + f"| {_cell(col)} | {_fmt_num(res.get('n')) if res.get('n') is not None else ''} " + f"| {_fmt_num(jb.get('stat'))} | {_fmt_num(jb.get('p'))} " + f"| {_fmt_num(da.get('stat'))} | {_fmt_num(da.get('p'))} " + f"| {_fmt_num(sh.get('stat'))} | {_fmt_num(sh.get('p'))} | {is_norm} |") + any_row = True + return "\n".join(lines) if any_row else "" + + +def _profile_appendix(profile: dict) -> str: + """Build the full-data appendix from a TableProfile dict (additive). + + Returns a Markdown ``## Apéndice`` section with one sub-table per loss the + human chapters drop, or ``""`` when the profile carries none of them. Never + raises: a missing/oddly-shaped section is skipped, not fatal. + """ + if not isinstance(profile, dict): + return "" + sections: list = [] + try: + corr = profile.get("correlations") or {} + seg = _app_correlations(corr) if isinstance(corr, dict) else "" + if seg: + sections.append(seg) + except Exception: # noqa: BLE001 + pass + try: + columns = profile.get("columns") or [] + seg = _app_numeric_describe(columns) + if seg: + sections.append(seg) + seg = _app_reexpression(columns) + if seg: + sections.append(seg) + except Exception: # noqa: BLE001 + pass + try: + models = profile.get("models") or {} + if isinstance(models, dict): + model_segs = [] + seg = _app_kmeans_scores(models.get("kmeans") or {}) + if seg: + model_segs.append(seg) + seg = _app_normality(models.get("normality") or {}) + if seg: + model_segs.append(seg) + if model_segs: + sections.append( + "### Modelos — detalle\n\n" + "\n\n".join(model_segs)) + except Exception: # noqa: BLE001 + pass + if not sections: + return "" + intro = ("Volcado completo de los datos que el motor computó y que los " + "capítulos (pensados para lectura humana / PDF) resumen. " + "Pensado para que un LLM reconstruya el análisis entero.") + return ("## Apéndice — Datos completos del perfil\n\n" + f"*{intro}*\n\n" + "\n\n".join(sections)) + + # --------------------------------------------------------------------------- # # Entry point. # --------------------------------------------------------------------------- # @@ -437,6 +715,18 @@ def render_md(chapters: list, out_path: str, meta: dict = None) -> dict: segments.append(seg) chapters_meta.append({"id": ch.id, "version": ch.version}) + # Full-data appendix: dump everything the profile holds that the human + # chapters drop (additive — the .md ends up with more than the PDF/PPTX). + # Emitted only when a profile is supplied via meta['profile']; never fatal. + try: + appendix = _profile_appendix(meta.get("profile")) + except Exception as e: # noqa: BLE001 + appendix = "" + notes.append(f"apéndice de perfil omitido: {e}") + if appendix: + segments.append("---") + segments.append(appendix) + content = "\n\n".join(segments) + "\n" note = f"{len(content)} caracteres" if notes: diff --git a/python/functions/pipelines/render_automatic_eda.py b/python/functions/pipelines/render_automatic_eda.py index 5361b927..942ee456 100644 --- a/python/functions/pipelines/render_automatic_eda.py +++ b/python/functions/pipelines/render_automatic_eda.py @@ -261,7 +261,15 @@ def render_automatic_eda( md_path = None if emit_md: md_path = os.path.join(out_dir, base + ".md") - rmd = render_automatic_eda_markdown(prof, md_path, meta) or {} + # El Markdown es la salida MÁS completa: además del documento por + # capítulos (compartido con PDF/PPTX) volca un apéndice con TODOS los + # datos numéricos del perfil (matriz de asociación completa, describe + # con skew/kurtosis/percentiles, re-expresiones, scores_by_k de + # KMeans, estadísticos de normalidad). Se le pasa el `prof` vía + # meta['profile']; un meta propio evita alterar el de PDF/PPTX. + md_meta = dict(meta) + md_meta["profile"] = prof + rmd = render_automatic_eda_markdown(prof, md_path, md_meta) or {} return { "status": "ok",