"""Tests para association_matrix.""" from datascience import association_matrix def _find_pair(pairs, a, b): """Devuelve el par (a, b) sin importar el orden en que aparezca, o None.""" for p in pairs: if {p["a"], p["b"]} == {a, b}: return p return None def test_two_correlated_numerics_strong_pearson(): columns = { "size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"}, "price": { "values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1], "type": "numeric", }, } result = association_matrix(columns, strong_threshold=0.5) pair = _find_pair(result["pairs"], "size", "price") assert pair is not None assert pair["method"] == "pearson/spearman" assert abs(pair["value"]) > 0.95 assert "pearson" in pair["extra"] and "spearman" in pair["extra"] # El par fuertemente correlado aparece en strong. assert _find_pair(result["strong"], "size", "price") is not None def test_numeric_explained_by_category_strong_correlation_ratio(): columns = { "region": { "values": ["N", "N", "S", "S", "E", "E", "W", "W"], "type": "categorical", }, "score": { "values": [10.0, 11.0, 50.0, 49.0, 90.0, 91.0, 30.0, 31.0], "type": "numeric", }, } result = association_matrix(columns, strong_threshold=0.5) pair = _find_pair(result["pairs"], "region", "score") assert pair is not None assert pair["method"] == "correlation_ratio" # La categoria explica casi toda la varianza de la numerica. assert pair["value"] > 0.9 assert _find_pair(result["strong"], "region", "score") is not None def test_independent_pair_not_strong(): # x e y construidos para ser practicamente independientes (sin relacion). columns = { "x": {"values": [1, 2, 1, 2, 1, 2, 1, 2], "type": "numeric"}, "y": {"values": [5, 5, 5, 5, 5, 5, 5, 6], "type": "numeric"}, } result = association_matrix(columns, strong_threshold=0.5) pair = _find_pair(result["pairs"], "x", "y") assert pair is not None # Ni la metrica principal ni la MI superan el umbral fuerte. assert abs(pair["value"]) < 0.5 assert pair["extra"]["mi"] < 0.5 assert _find_pair(result["strong"], "x", "y") is None def test_empty_dict_does_not_crash(): result = association_matrix({}) assert result["pairs"] == [] assert result["strong"] == [] assert "methods_legend" in result assert "pearson" in result["methods_legend"] def test_single_column_returns_empty(): columns = {"only": {"values": [1, 2, 3, 4], "type": "numeric"}} result = association_matrix(columns) assert result["pairs"] == [] assert result["strong"] == [] def test_pairs_carry_significance_fields(): # Tras la correccion FDR cada par evaluado lleva p_value, p_value_adjusted y # significant. Un par num-num fuertemente correlado es significativo. columns = { "size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"}, "price": { "values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1], "type": "numeric", }, } result = association_matrix(columns, strong_threshold=0.5) pair = _find_pair(result["pairs"], "size", "price") assert "p_value" in pair and "p_value_adjusted" in pair and "significant" in pair assert pair["p_value"] is not None and pair["p_value"] < 0.05 assert pair["significant"] is True # p ajustado nunca por debajo del crudo. assert pair["p_value_adjusted"] >= pair["p_value"] - 1e-12 def test_result_reports_multiple_testing_summary(): columns = { "size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"}, "price": { "values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1], "type": "numeric", }, } result = association_matrix(columns) # n_tests = total de pares evaluados. assert result["n_tests"] == len(result["pairs"]) mt = result["multiple_testing"] assert mt["method"] == "bh" assert mt["alpha"] == 0.05 assert mt["n_rejected"] >= 1 assert mt["n_tests"] >= 1 def test_strong_requires_corrected_significance(): # Par num-num con magnitud alta pero p-valor no diminuto. Con alpha normal es # fuerte; con un alpha mas estricto que su p-valor, deja de ser significativo # y sale de strong AUNQUE la magnitud siga por encima del umbral. Esto prueba # que strong se basa en la significancia corregida, no solo en el umbral. columns = { "a": {"values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "type": "numeric"}, "b": {"values": [2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12], "type": "numeric"}, } relaxed = association_matrix(columns, strong_threshold=0.5, alpha=0.05) pair = _find_pair(relaxed["pairs"], "a", "b") assert pair["p_value"] is not None and pair["p_value"] < 0.05 assert abs(pair["value"]) >= 0.5 assert _find_pair(relaxed["strong"], "a", "b") is not None # alpha mas estricto que el p-valor del par -> ya no significativo. strict = association_matrix( columns, strong_threshold=0.5, alpha=pair["p_value"] / 10.0 ) sp = _find_pair(strict["pairs"], "a", "b") assert abs(sp["value"]) >= 0.5 # magnitud intacta assert sp["significant"] is False assert _find_pair(strict["strong"], "a", "b") is None def test_bonferroni_method_is_accepted(): columns = { "size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"}, "price": { "values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1], "type": "numeric", }, } result = association_matrix(columns, fdr_method="bonferroni") assert result["multiple_testing"]["method"] == "bonferroni" pair = _find_pair(result["pairs"], "size", "price") assert pair["p_value_adjusted"] is not None