763e06c127
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
83 lines
2.7 KiB
Python
83 lines
2.7 KiB
Python
"""Tests para association_matrix."""
|
|
|
|
from datascience import association_matrix
|
|
|
|
|
|
def _find_pair(pairs, a, b):
|
|
"""Devuelve el par (a, b) sin importar el orden en que aparezca, o None."""
|
|
for p in pairs:
|
|
if {p["a"], p["b"]} == {a, b}:
|
|
return p
|
|
return None
|
|
|
|
|
|
def test_two_correlated_numerics_strong_pearson():
|
|
columns = {
|
|
"size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"},
|
|
"price": {
|
|
"values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1],
|
|
"type": "numeric",
|
|
},
|
|
}
|
|
result = association_matrix(columns, strong_threshold=0.5)
|
|
|
|
pair = _find_pair(result["pairs"], "size", "price")
|
|
assert pair is not None
|
|
assert pair["method"] == "pearson/spearman"
|
|
assert abs(pair["value"]) > 0.95
|
|
assert "pearson" in pair["extra"] and "spearman" in pair["extra"]
|
|
# El par fuertemente correlado aparece en strong.
|
|
assert _find_pair(result["strong"], "size", "price") is not None
|
|
|
|
|
|
def test_numeric_explained_by_category_strong_correlation_ratio():
|
|
columns = {
|
|
"region": {
|
|
"values": ["N", "N", "S", "S", "E", "E", "W", "W"],
|
|
"type": "categorical",
|
|
},
|
|
"score": {
|
|
"values": [10.0, 11.0, 50.0, 49.0, 90.0, 91.0, 30.0, 31.0],
|
|
"type": "numeric",
|
|
},
|
|
}
|
|
result = association_matrix(columns, strong_threshold=0.5)
|
|
|
|
pair = _find_pair(result["pairs"], "region", "score")
|
|
assert pair is not None
|
|
assert pair["method"] == "correlation_ratio"
|
|
# La categoria explica casi toda la varianza de la numerica.
|
|
assert pair["value"] > 0.9
|
|
assert _find_pair(result["strong"], "region", "score") is not None
|
|
|
|
|
|
def test_independent_pair_not_strong():
|
|
# x e y construidos para ser practicamente independientes (sin relacion).
|
|
columns = {
|
|
"x": {"values": [1, 2, 1, 2, 1, 2, 1, 2], "type": "numeric"},
|
|
"y": {"values": [5, 5, 5, 5, 5, 5, 5, 6], "type": "numeric"},
|
|
}
|
|
result = association_matrix(columns, strong_threshold=0.5)
|
|
|
|
pair = _find_pair(result["pairs"], "x", "y")
|
|
assert pair is not None
|
|
# Ni la metrica principal ni la MI superan el umbral fuerte.
|
|
assert abs(pair["value"]) < 0.5
|
|
assert pair["extra"]["mi"] < 0.5
|
|
assert _find_pair(result["strong"], "x", "y") is None
|
|
|
|
|
|
def test_empty_dict_does_not_crash():
|
|
result = association_matrix({})
|
|
assert result["pairs"] == []
|
|
assert result["strong"] == []
|
|
assert "methods_legend" in result
|
|
assert "pearson" in result["methods_legend"]
|
|
|
|
|
|
def test_single_column_returns_empty():
|
|
columns = {"only": {"values": [1, 2, 3, 4], "type": "numeric"}}
|
|
result = association_matrix(columns)
|
|
assert result["pairs"] == []
|
|
assert result["strong"] == []
|