feat(eda): series temporales + rigor anti-data-mining + PDF movil + /eda + benchmark issues

Bloque del grupo eda (sesion ausente EDA-benchmark):
- 8 funciones nuevas: adf_kpss_stationarity, acf_pacf, stl_decompose, to_returns,
  fdr_correction, suggest_reexpression, exploratory_caveats, render_eda_pdf
- integracion: profile_table (run_series, emit_pdf), association_matrix (FDR Benjamini-Hochberg),
  render_eda_markdown (secciones series/reexpresion/caveats)
- slash commands /eda y /capitulos
- issues 0173-0177: mejoras del /eda derivadas del benchmark sobre 12 datasets reales
  (outlier_pct x100, periodo estacional, FK inference, render models, tipos id-like)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Egutierrez
2026-06-29 03:34:01 +02:00
parent 02301aaed3
commit 7ac69ab4fb
33 changed files with 3995 additions and 51 deletions
@@ -80,3 +80,79 @@ def test_single_column_returns_empty():
result = association_matrix(columns)
assert result["pairs"] == []
assert result["strong"] == []
def test_pairs_carry_significance_fields():
# Tras la correccion FDR cada par evaluado lleva p_value, p_value_adjusted y
# significant. Un par num-num fuertemente correlado es significativo.
columns = {
"size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"},
"price": {
"values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1],
"type": "numeric",
},
}
result = association_matrix(columns, strong_threshold=0.5)
pair = _find_pair(result["pairs"], "size", "price")
assert "p_value" in pair and "p_value_adjusted" in pair and "significant" in pair
assert pair["p_value"] is not None and pair["p_value"] < 0.05
assert pair["significant"] is True
# p ajustado nunca por debajo del crudo.
assert pair["p_value_adjusted"] >= pair["p_value"] - 1e-12
def test_result_reports_multiple_testing_summary():
columns = {
"size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"},
"price": {
"values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1],
"type": "numeric",
},
}
result = association_matrix(columns)
# n_tests = total de pares evaluados.
assert result["n_tests"] == len(result["pairs"])
mt = result["multiple_testing"]
assert mt["method"] == "bh"
assert mt["alpha"] == 0.05
assert mt["n_rejected"] >= 1
assert mt["n_tests"] >= 1
def test_strong_requires_corrected_significance():
# Par num-num con magnitud alta pero p-valor no diminuto. Con alpha normal es
# fuerte; con un alpha mas estricto que su p-valor, deja de ser significativo
# y sale de strong AUNQUE la magnitud siga por encima del umbral. Esto prueba
# que strong se basa en la significancia corregida, no solo en el umbral.
columns = {
"a": {"values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "type": "numeric"},
"b": {"values": [2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12], "type": "numeric"},
}
relaxed = association_matrix(columns, strong_threshold=0.5, alpha=0.05)
pair = _find_pair(relaxed["pairs"], "a", "b")
assert pair["p_value"] is not None and pair["p_value"] < 0.05
assert abs(pair["value"]) >= 0.5
assert _find_pair(relaxed["strong"], "a", "b") is not None
# alpha mas estricto que el p-valor del par -> ya no significativo.
strict = association_matrix(
columns, strong_threshold=0.5, alpha=pair["p_value"] / 10.0
)
sp = _find_pair(strict["pairs"], "a", "b")
assert abs(sp["value"]) >= 0.5 # magnitud intacta
assert sp["significant"] is False
assert _find_pair(strict["strong"], "a", "b") is None
def test_bonferroni_method_is_accepted():
columns = {
"size": {"values": [1, 2, 3, 4, 5, 6, 7, 8], "type": "numeric"},
"price": {
"values": [2.1, 4.0, 5.9, 8.1, 10.0, 12.2, 13.8, 16.1],
"type": "numeric",
},
}
result = association_matrix(columns, fdr_method="bonferroni")
assert result["multiple_testing"]["method"] == "bonferroni"
pair = _find_pair(result["pairs"], "size", "price")
assert pair["p_value_adjusted"] is not None