feat(eda): series temporales + rigor anti-data-mining + PDF movil + /eda + benchmark issues

Bloque del grupo eda (sesion ausente EDA-benchmark): - 8 funciones nuevas: adf_kpss_stationarity, acf_pacf, stl_decompose, to_returns, fdr_correction, suggest_reexpression, exploratory_caveats, render_eda_pdf - integracion: profile_table (run_series, emit_pdf), association_matrix (FDR Benjamini-Hochberg), render_eda_markdown (secciones series/reexpresion/caveats) - slash commands /eda y /capitulos - issues 0173-0177: mejoras del /eda derivadas del benchmark sobre 12 datasets reales (outlier_pct x100, periodo estacional, FK inference, render models, tipos id-like) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-29 03:34:01 +02:00
parent 02301aaed3
commit 7ac69ab4fb
33 changed files with 3995 additions and 51 deletions
@@ -80,3 +80,79 @@ def test_single_column_returns_empty():
    result = association_matrix(columns)
    assert result["pairs"] == []
    assert result["strong"] == []
+
+
+def test_pairs_carry_significance_fields():
+    # Tras la correccion FDR cada par evaluado lleva p_value, p_value_adjusted y
+    # significant. Un par num-num fuertemente correlado es significativo.
+    columns = {
+        "size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"},
+        "price": {
+            "values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1],
+            "type": "numeric",
+        },
+    }
+    result = association_matrix(columns, strong_threshold=0.5)
+    pair = _find_pair(result["pairs"], "size", "price")
+    assert "p_value" in pair and "p_value_adjusted" in pair and "significant" in pair
+    assert pair["p_value"] is not None and pair["p_value"] < 0.05
+    assert pair["significant"] is True
+    # p ajustado nunca por debajo del crudo.
+    assert pair["p_value_adjusted"] >= pair["p_value"] - 1e-12
+
+
+def test_result_reports_multiple_testing_summary():
+    columns = {
+        "size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"},
+        "price": {
+            "values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1],
+            "type": "numeric",
+        },
+    }
+    result = association_matrix(columns)
+    # n_tests = total de pares evaluados.
+    assert result["n_tests"] == len(result["pairs"])
+    mt = result["multiple_testing"]
+    assert mt["method"] == "bh"
+    assert mt["alpha"] == 0.05
+    assert mt["n_rejected"] >= 1
+    assert mt["n_tests"] >= 1
+
+
+def test_strong_requires_corrected_significance():
+    # Par num-num con magnitud alta pero p-valor no diminuto. Con alpha normal es
+    # fuerte; con un alpha mas estricto que su p-valor, deja de ser significativo
+    # y sale de strong AUNQUE la magnitud siga por encima del umbral. Esto prueba
+    # que strong se basa en la significancia corregida, no solo en el umbral.
+    columns = {
+        "a": {"values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "type": "numeric"},
+        "b": {"values": [2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12], "type": "numeric"},
+    }
+    relaxed = association_matrix(columns, strong_threshold=0.5, alpha=0.05)
+    pair = _find_pair(relaxed["pairs"], "a", "b")
+    assert pair["p_value"] is not None and pair["p_value"] < 0.05
+    assert abs(pair["value"]) >= 0.5
+    assert _find_pair(relaxed["strong"], "a", "b") is not None
+
+    # alpha mas estricto que el p-valor del par -> ya no significativo.
+    strict = association_matrix(
+        columns, strong_threshold=0.5, alpha=pair["p_value"] / 10.0
+    )
+    sp = _find_pair(strict["pairs"], "a", "b")
+    assert abs(sp["value"]) >= 0.5  # magnitud intacta
+    assert sp["significant"] is False
+    assert _find_pair(strict["strong"], "a", "b") is None
+
+
+def test_bonferroni_method_is_accepted():
+    columns = {
+        "size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"},
+        "price": {
+            "values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1],
+            "type": "numeric",
+        },
+    }
+    result = association_matrix(columns, fdr_method="bonferroni")
+    assert result["multiple_testing"]["method"] == "bonferroni"
+    pair = _find_pair(result["pairs"], "size", "price")
+    assert pair["p_value_adjusted"] is not None