763e06c127
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
112 lines
3.2 KiB
Python
112 lines
3.2 KiB
Python
"""Tests para run_eda_models."""
|
|
|
|
import numpy as np
|
|
|
|
from run_eda_models import run_eda_models
|
|
|
|
|
|
def _numeric(values: list) -> dict:
|
|
"""Envuelve una lista como columna numerica del perfil."""
|
|
return {"values": values, "type": "numeric"}
|
|
|
|
|
|
def test_three_numeric_columns_runs_all_models():
|
|
# Tres columnas con estructura latente: x e y correlacionadas, z ruido.
|
|
rng = np.random.default_rng(0)
|
|
n = 120
|
|
x = rng.normal(0.0, 1.0, n)
|
|
y = x * 2.0 + rng.normal(0.0, 0.3, n)
|
|
z = rng.normal(5.0, 1.0, n)
|
|
|
|
columns = {
|
|
"x": _numeric(x.tolist()),
|
|
"y": _numeric(y.tolist()),
|
|
"z": _numeric(z.tolist()),
|
|
}
|
|
|
|
result = run_eda_models(columns)
|
|
|
|
assert result["n_numeric_cols"] == 3
|
|
assert result["note"] == ""
|
|
|
|
# PCA presente y con varianza explicada.
|
|
assert result["pca"] is not None
|
|
assert result["pca"]["n_components"] >= 1
|
|
assert len(result["pca"]["explained_variance_ratio"]) >= 1
|
|
|
|
# KMeans presente con un k elegido.
|
|
assert result["kmeans"] is not None
|
|
assert result["kmeans"]["best_k"] >= 2
|
|
|
|
# Outliers presente (puede ser 0 outliers, pero el bloque existe).
|
|
assert result["outliers"] is not None
|
|
assert "n_outliers" in result["outliers"]
|
|
|
|
# Normality presente, una entrada por columna numerica.
|
|
assert result["normality"] is not None
|
|
assert set(result["normality"].keys()) == {"x", "y", "z"}
|
|
for col in ("x", "y", "z"):
|
|
assert result["normality"][col]["n"] == n
|
|
|
|
|
|
def test_single_numeric_column_note_and_normality_only():
|
|
rng = np.random.default_rng(7)
|
|
values = rng.normal(10.0, 2.0, 100).tolist()
|
|
columns = {
|
|
"only": _numeric(values),
|
|
"label": {"values": ["a"] * 100, "type": "categorical"},
|
|
}
|
|
|
|
result = run_eda_models(columns)
|
|
|
|
assert result["n_numeric_cols"] == 1
|
|
assert result["note"] == "insuficientes columnas numericas para modelos multivariantes"
|
|
|
|
# Multivariantes en None.
|
|
assert result["pca"] is None
|
|
assert result["kmeans"] is None
|
|
assert result["outliers"] is None
|
|
|
|
# Normality univariante si se ejecuta con una sola columna.
|
|
assert result["normality"] is not None
|
|
assert "only" in result["normality"]
|
|
assert result["normality"]["only"]["n"] == 100
|
|
|
|
|
|
def test_flags_disable_models():
|
|
rng = np.random.default_rng(1)
|
|
n = 60
|
|
columns = {
|
|
"a": _numeric(rng.normal(0, 1, n).tolist()),
|
|
"b": _numeric(rng.normal(0, 1, n).tolist()),
|
|
}
|
|
|
|
result = run_eda_models(
|
|
columns,
|
|
run_pca=False,
|
|
run_kmeans=False,
|
|
run_isolation=False,
|
|
run_normality=False,
|
|
)
|
|
|
|
assert result["n_numeric_cols"] == 2
|
|
assert result["pca"] is None
|
|
assert result["kmeans"] is None
|
|
assert result["outliers"] is None
|
|
assert result["normality"] is None
|
|
assert result["note"] == ""
|
|
|
|
|
|
def test_no_numeric_columns_returns_note_and_no_normality():
|
|
columns = {
|
|
"cat": {"values": ["x", "y", "z"], "type": "categorical"},
|
|
}
|
|
|
|
result = run_eda_models(columns)
|
|
|
|
assert result["n_numeric_cols"] == 0
|
|
assert result["note"] == "insuficientes columnas numericas para modelos multivariantes"
|
|
assert result["pca"] is None
|
|
# run_normality True pero no hay columnas numericas -> None.
|
|
assert result["normality"] is None
|