feat(eda): render de models en markdown + PDF DB-level para profile_database (H4,H9)
- H4: render_eda_markdown anade seccion Modelos (PCA/KMeans/normalidad/outliers); render_eda_pdf formatea models/series/caveats como tablas (no str(dict) crudo) - H9: profile_database gana flag emit_pdf -> PDF movil DB-level (resumen tablas + join graph) via render_eda_pdf_relational; clave report_pdf_path - aditivos y retrocompatibles (flags default False). 38 tests verdes Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -173,3 +173,62 @@ def test_tolerates_empty_profile():
|
||||
def test_tolerates_none_profile():
|
||||
md = render_eda_markdown(None)
|
||||
assert "# EDA — (unnamed)" in md
|
||||
|
||||
|
||||
def _sample_models():
|
||||
"""Bloque `models` como el que produce run_eda_models (PCA/KMeans/...)."""
|
||||
return {
|
||||
"n_numeric_cols": 3,
|
||||
"pca": {
|
||||
"n_components": 2,
|
||||
"n_rows_used": 1000,
|
||||
"n_features": 3,
|
||||
"explained_variance_ratio": [0.62, 0.21],
|
||||
"cumulative": [0.62, 0.83],
|
||||
"top_loadings": [
|
||||
{"component": 0, "feature": "price", "loading": 0.71},
|
||||
{"component": 1, "feature": "qty", "loading": -0.55},
|
||||
],
|
||||
},
|
||||
"kmeans": {
|
||||
"best_k": 3,
|
||||
"silhouette": 0.48,
|
||||
"cluster_sizes": [500, 300, 200],
|
||||
"scores_by_k": [
|
||||
{"k": 2, "silhouette": 0.41, "inertia": 1200.0},
|
||||
{"k": 3, "silhouette": 0.48, "inertia": 900.0},
|
||||
],
|
||||
},
|
||||
"outliers": {
|
||||
"n_outliers": 35,
|
||||
"outlier_pct": 3.5,
|
||||
"threshold": -0.51,
|
||||
},
|
||||
"normality": {
|
||||
"price": {"jarque_bera": {"p": 0.0001}, "is_normal": False},
|
||||
},
|
||||
"note": "",
|
||||
}
|
||||
|
||||
|
||||
def test_models_section_rendered():
|
||||
# H4: el bloque models antes se omitía en markdown; ahora tiene formatter.
|
||||
profile = _sample_profile()
|
||||
profile["models"] = _sample_models()
|
||||
md = render_eda_markdown(profile)
|
||||
assert "## Modelos" in md
|
||||
assert "### PCA" in md
|
||||
assert "### KMeans" in md
|
||||
assert "### Outliers multivariante (Isolation Forest)" in md
|
||||
assert "### Normalidad" in md
|
||||
# Datos reales del PCA renderizados (varianza explicada ×100) y KMeans.
|
||||
assert "62.0" in md # explained_variance_ratio 0.62 -> 62.00%
|
||||
assert "mejor k = 3" in md
|
||||
# outlier_pct del modelo ya viene en escala 0-100: 3.5 -> "3.5%", no "350".
|
||||
assert "3.5%" in md
|
||||
|
||||
|
||||
def test_models_absent_when_none():
|
||||
# Edge: profile sin models (None) no produce sección Modelos ni rompe.
|
||||
md = render_eda_markdown(_sample_profile()) # models=None en el sample
|
||||
assert "## Modelos" not in md
|
||||
|
||||
Reference in New Issue
Block a user