"""Tests para run_eda_models.""" import numpy as np from run_eda_models import run_eda_models def _numeric(values: list) -> dict: """Envuelve una lista como columna numerica del perfil.""" return {"values": values, "type": "numeric"} def test_three_numeric_columns_runs_all_models(): # Tres columnas con estructura latente: x e y correlacionadas, z ruido. rng = np.random.default_rng(0) n = 120 x = rng.normal(0.0, 1.0, n) y = x * 2.0 + rng.normal(0.0, 0.3, n) z = rng.normal(5.0, 1.0, n) columns = { "x": _numeric(x.tolist()), "y": _numeric(y.tolist()), "z": _numeric(z.tolist()), } result = run_eda_models(columns) assert result["n_numeric_cols"] == 3 assert result["note"] == "" # PCA presente y con varianza explicada. assert result["pca"] is not None assert result["pca"]["n_components"] >= 1 assert len(result["pca"]["explained_variance_ratio"]) >= 1 # KMeans presente con un k elegido. assert result["kmeans"] is not None assert result["kmeans"]["best_k"] >= 2 # Outliers presente (puede ser 0 outliers, pero el bloque existe). assert result["outliers"] is not None assert "n_outliers" in result["outliers"] # Normality presente, una entrada por columna numerica. assert result["normality"] is not None assert set(result["normality"].keys()) == {"x", "y", "z"} for col in ("x", "y", "z"): assert result["normality"][col]["n"] == n def test_single_numeric_column_note_and_normality_only(): rng = np.random.default_rng(7) values = rng.normal(10.0, 2.0, 100).tolist() columns = { "only": _numeric(values), "label": {"values": ["a"] * 100, "type": "categorical"}, } result = run_eda_models(columns) assert result["n_numeric_cols"] == 1 assert result["note"] == "insuficientes columnas numericas para modelos multivariantes" # Multivariantes en None. assert result["pca"] is None assert result["kmeans"] is None assert result["outliers"] is None # Normality univariante si se ejecuta con una sola columna. assert result["normality"] is not None assert "only" in result["normality"] assert result["normality"]["only"]["n"] == 100 def test_flags_disable_models(): rng = np.random.default_rng(1) n = 60 columns = { "a": _numeric(rng.normal(0, 1, n).tolist()), "b": _numeric(rng.normal(0, 1, n).tolist()), } result = run_eda_models( columns, run_pca=False, run_kmeans=False, run_isolation=False, run_normality=False, ) assert result["n_numeric_cols"] == 2 assert result["pca"] is None assert result["kmeans"] is None assert result["outliers"] is None assert result["normality"] is None assert result["note"] == "" def test_no_numeric_columns_returns_note_and_no_normality(): columns = { "cat": {"values": ["x", "y", "z"], "type": "categorical"}, } result = run_eda_models(columns) assert result["n_numeric_cols"] == 0 assert result["note"] == "insuficientes columnas numericas para modelos multivariantes" assert result["pca"] is None # run_normality True pero no hay columnas numericas -> None. assert result["normality"] is None