"""Tests para association_matrix.""" from datascience import association_matrix def _find_pair(pairs, a, b): """Devuelve el par (a, b) sin importar el orden en que aparezca, o None.""" for p in pairs: if {p["a"], p["b"]} == {a, b}: return p return None def test_two_correlated_numerics_strong_pearson(): columns = { "size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"}, "price": { "values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1], "type": "numeric", }, } result = association_matrix(columns, strong_threshold=0.5) pair = _find_pair(result["pairs"], "size", "price") assert pair is not None assert pair["method"] == "pearson/spearman" assert abs(pair["value"]) > 0.95 assert "pearson" in pair["extra"] and "spearman" in pair["extra"] # El par fuertemente correlado aparece en strong. assert _find_pair(result["strong"], "size", "price") is not None def test_numeric_explained_by_category_strong_correlation_ratio(): columns = { "region": { "values": ["N", "N", "S", "S", "E", "E", "W", "W"], "type": "categorical", }, "score": { "values": [10.0, 11.0, 50.0, 49.0, 90.0, 91.0, 30.0, 31.0], "type": "numeric", }, } result = association_matrix(columns, strong_threshold=0.5) pair = _find_pair(result["pairs"], "region", "score") assert pair is not None assert pair["method"] == "correlation_ratio" # La categoria explica casi toda la varianza de la numerica. assert pair["value"] > 0.9 assert _find_pair(result["strong"], "region", "score") is not None def test_independent_pair_not_strong(): # x e y construidos para ser practicamente independientes (sin relacion). columns = { "x": {"values": [1, 2, 1, 2, 1, 2, 1, 2], "type": "numeric"}, "y": {"values": [5, 5, 5, 5, 5, 5, 5, 6], "type": "numeric"}, } result = association_matrix(columns, strong_threshold=0.5) pair = _find_pair(result["pairs"], "x", "y") assert pair is not None # Ni la metrica principal ni la MI superan el umbral fuerte. assert abs(pair["value"]) < 0.5 assert pair["extra"]["mi"] < 0.5 assert _find_pair(result["strong"], "x", "y") is None def test_empty_dict_does_not_crash(): result = association_matrix({}) assert result["pairs"] == [] assert result["strong"] == [] assert "methods_legend" in result assert "pearson" in result["methods_legend"] def test_single_column_returns_empty(): columns = {"only": {"values": [1, 2, 3, 4], "type": "numeric"}} result = association_matrix(columns) assert result["pairs"] == [] assert result["strong"] == []