feat(eda): render de models en markdown + PDF DB-level para profile_database (H4,H9)
- H4: render_eda_markdown anade seccion Modelos (PCA/KMeans/normalidad/outliers); render_eda_pdf formatea models/series/caveats como tablas (no str(dict) crudo) - H9: profile_database gana flag emit_pdf -> PDF movil DB-level (resumen tablas + join graph) via render_eda_pdf_relational; clave report_pdf_path - aditivos y retrocompatibles (flags default False). 38 tests verdes Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -405,6 +405,110 @@ def render_eda_markdown(profile: dict) -> str:
|
||||
parts.append("## Series temporales")
|
||||
parts.extend(series_blocks)
|
||||
|
||||
# 7d. Modelos baratos (PCA, KMeans, outliers multivariantes, normalidad). El
|
||||
# pipeline corre `run_eda_models` cuando se pide con run_models; el bloque está
|
||||
# completo en el JSON pero antes no tenía formatter en markdown y se omitía. Se
|
||||
# lee todo defensivo con .get y cada submodelo se renderiza solo si está presente.
|
||||
models = profile.get("models")
|
||||
if isinstance(models, dict):
|
||||
model_parts: list[str] = []
|
||||
|
||||
pca = models.get("pca")
|
||||
if isinstance(pca, dict):
|
||||
evr = pca.get("explained_variance_ratio") or []
|
||||
cum = pca.get("cumulative") or []
|
||||
pca_rows = []
|
||||
for i, var in enumerate(evr):
|
||||
acc = cum[i] if i < len(cum) else None
|
||||
pca_rows.append([f"PC{i + 1}", _fmt_pct(var), _fmt_pct(acc)])
|
||||
sub = ["### PCA"]
|
||||
n_feat = pca.get("n_features")
|
||||
n_used = pca.get("n_rows_used")
|
||||
if n_feat is not None or n_used is not None:
|
||||
sub.append(
|
||||
f"{pca.get('n_components')} componentes sobre "
|
||||
f"{n_used if n_used is not None else '?'} filas, "
|
||||
f"{n_feat if n_feat is not None else '?'} features."
|
||||
)
|
||||
if pca_rows:
|
||||
sub.append(_md_table(
|
||||
["componente", "var. explicada", "acumulada"], pca_rows))
|
||||
loadings = pca.get("top_loadings") or []
|
||||
load_rows = []
|
||||
for ld in loadings[:12]:
|
||||
if not isinstance(ld, dict):
|
||||
continue
|
||||
comp = ld.get("component")
|
||||
comp_label = f"PC{comp + 1}" if isinstance(comp, int) else str(comp)
|
||||
load_rows.append([comp_label, ld.get("feature"),
|
||||
_fmt_num(ld.get("loading"), 3)])
|
||||
if load_rows:
|
||||
sub.append("Cargas principales:")
|
||||
sub.append(_md_table(["componente", "feature", "carga"], load_rows))
|
||||
model_parts.append("\n\n".join(sub))
|
||||
|
||||
km = models.get("kmeans")
|
||||
if isinstance(km, dict):
|
||||
sub = ["### KMeans"]
|
||||
best_k = km.get("best_k")
|
||||
sil = km.get("silhouette")
|
||||
sizes = km.get("cluster_sizes") or []
|
||||
head = f"mejor k = {_fmt_num(best_k)}"
|
||||
if sil is not None:
|
||||
head += f" (silhouette {_fmt_num(sil, 3)})"
|
||||
if sizes:
|
||||
head += ". Tamaños de cluster: " + ", ".join(
|
||||
_fmt_num(s) for s in sizes)
|
||||
sub.append(head + ".")
|
||||
score_rows = []
|
||||
for sc in km.get("scores_by_k") or []:
|
||||
if not isinstance(sc, dict):
|
||||
continue
|
||||
score_rows.append([sc.get("k"), _fmt_num(sc.get("silhouette"), 3),
|
||||
_fmt_num(sc.get("inertia"), 2)])
|
||||
if score_rows:
|
||||
sub.append(_md_table(["k", "silhouette", "inertia"], score_rows))
|
||||
model_parts.append("\n\n".join(sub))
|
||||
|
||||
out = models.get("outliers")
|
||||
if isinstance(out, dict):
|
||||
# outlier_pct del modelo multivariante ya viene en escala 0-100.
|
||||
n_out = out.get("n_outliers")
|
||||
pct = out.get("outlier_pct")
|
||||
thr = out.get("threshold")
|
||||
line = f"{_fmt_num(n_out)} filas marcadas como outlier"
|
||||
if pct is not None:
|
||||
line += f" ({_fmt_num(pct, 2)}%)"
|
||||
if thr is not None:
|
||||
line += f"; umbral de score {_fmt_num(thr, 3)}"
|
||||
model_parts.append("### Outliers multivariante (Isolation Forest)\n\n"
|
||||
+ line + ".")
|
||||
|
||||
normality = models.get("normality")
|
||||
if isinstance(normality, dict):
|
||||
norm_rows = []
|
||||
for col_name, res in normality.items():
|
||||
if not isinstance(res, dict):
|
||||
continue
|
||||
jb = res.get("jarque_bera") or {}
|
||||
norm_rows.append([
|
||||
col_name,
|
||||
"sí" if res.get("is_normal") else "no",
|
||||
_fmt_num(jb.get("p")) if jb.get("p") is not None else "",
|
||||
])
|
||||
if norm_rows:
|
||||
model_parts.append(
|
||||
"### Normalidad\n\n"
|
||||
+ _md_table(["columna", "normal", "Jarque-Bera p"], norm_rows))
|
||||
|
||||
note = models.get("note")
|
||||
if note:
|
||||
model_parts.append(f"> {note}")
|
||||
|
||||
if model_parts:
|
||||
parts.append("## Modelos")
|
||||
parts.extend(model_parts)
|
||||
|
||||
# 8. LLM analysis (tolerate None for now).
|
||||
llm = profile.get("llm")
|
||||
if llm:
|
||||
|
||||
Reference in New Issue
Block a user