caf8c25d99
- H1: render_eda_markdown ya no aplica doble x100 a outlier_pct (336% -> real) - H5: profile_database filtra base_tables_only (excluye VIEWs; sakila 21->16) - H12: suggest_reexpression salta columnas no-continuas - H13: to_returns/profile_table elige retornos (financiera) vs diferencias (fisica) - H14: test de regresion ATTACH sqlite via information_schema - +8 tests de las funciones eda nuevas (acf_pacf, adf_kpss, ...). 77 tests verdes - L/M (H2,H3,H4,H6,H7,H8,H9,H10,H11) quedan en issues 0174-0177 para revision Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
98 lines
3.4 KiB
Python
98 lines
3.4 KiB
Python
"""Tests para suggest_reexpression."""
|
|
|
|
from suggest_reexpression import suggest_reexpression
|
|
|
|
|
|
def test_aproximadamente_simetrica_recomienda_none():
|
|
# |skew| < 0.5 -> no hace falta re-expresar.
|
|
out = suggest_reexpression({"skew": 0.1, "min": 5.0, "zero_pct": 0.0, "negative_pct": 0.0})
|
|
assert out["recommended"] == "none"
|
|
assert out["ladder_power"] == 1.0
|
|
assert out["alternatives"] == []
|
|
assert out["note"] == ""
|
|
|
|
|
|
def test_positiva_fuerte_todo_positivo_recomienda_log():
|
|
# Cola derecha larga sobre datos estrictamente positivos -> log.
|
|
out = suggest_reexpression({"skew": 2.3, "min": 1.0, "zero_pct": 0.0, "negative_pct": 0.0})
|
|
assert out["recommended"] == "log"
|
|
assert out["ladder_power"] == 0.0
|
|
transforms = [a["transform"] for a in out["alternatives"]]
|
|
assert "box-cox" in transforms
|
|
|
|
|
|
def test_positiva_moderada_todo_positivo_recomienda_sqrt():
|
|
out = suggest_reexpression({"skew": 0.7, "min": 2.0, "zero_pct": 0.0, "negative_pct": 0.0})
|
|
assert out["recommended"] == "sqrt"
|
|
assert out["ladder_power"] == 0.5
|
|
|
|
|
|
def test_positiva_con_ceros_fuerte_recomienda_log1p():
|
|
# log(0) indefinido -> log1p en presencia de ceros.
|
|
out = suggest_reexpression({"skew": 1.5, "min": 0.0, "zero_pct": 12.0, "negative_pct": 0.0})
|
|
assert out["recommended"] == "log1p"
|
|
assert out["ladder_power"] == 0.0
|
|
|
|
|
|
def test_positiva_con_negativos_recomienda_yeo_johnson():
|
|
# log/Box-Cox no admiten negativos -> Yeo-Johnson.
|
|
out = suggest_reexpression({"skew": 1.8, "min": -4.0, "zero_pct": 0.0, "negative_pct": 20.0})
|
|
assert out["recommended"] == "yeo-johnson"
|
|
assert out["ladder_power"] is None # data-driven
|
|
|
|
|
|
def test_negativa_fuerte_todo_positivo_recomienda_cube():
|
|
# Cola izquierda -> subir por la escalera de Tukey.
|
|
out = suggest_reexpression({"skew": -1.6, "min": 3.0, "zero_pct": 0.0, "negative_pct": 0.0})
|
|
assert out["recommended"] == "cube"
|
|
assert out["ladder_power"] == 3.0
|
|
|
|
|
|
def test_negativa_moderada_todo_positivo_recomienda_square():
|
|
out = suggest_reexpression({"skew": -0.8, "min": 3.0, "zero_pct": 0.0, "negative_pct": 0.0})
|
|
assert out["recommended"] == "square"
|
|
assert out["ladder_power"] == 2.0
|
|
|
|
|
|
def test_dominio_desconocido_recomienda_yeo_johnson_con_nota():
|
|
# Solo skew, sin min/zero_pct/negative_pct -> opción segura + nota.
|
|
out = suggest_reexpression({"skew": 1.4})
|
|
assert out["recommended"] == "yeo-johnson"
|
|
assert "dominio desconocido" in out["note"]
|
|
|
|
|
|
def test_acepta_columnprofile_completo_con_numeric_anidado():
|
|
# Si llega un ColumnProfile entero, baja a su sub-bloque numeric.
|
|
profile = {
|
|
"name": "precio",
|
|
"inferred_type": "numeric",
|
|
"numeric": {"skew": 2.0, "min": 1.0, "zero_pct": 0.0, "negative_pct": 0.0},
|
|
}
|
|
out = suggest_reexpression(profile)
|
|
assert out["recommended"] == "log"
|
|
|
|
|
|
def test_skew_ausente_devuelve_nota():
|
|
out = suggest_reexpression({"min": 1.0, "max": 9.0})
|
|
assert out["recommended"] is None
|
|
assert "skew ausente" in out["note"]
|
|
|
|
|
|
def test_stats_vacio_devuelve_nota():
|
|
out = suggest_reexpression({})
|
|
assert out["recommended"] is None
|
|
assert out["alternatives"] == []
|
|
assert out["note"]
|
|
|
|
|
|
def test_no_dict_no_lanza():
|
|
out = suggest_reexpression(None)
|
|
assert out["recommended"] is None
|
|
assert out["note"]
|
|
|
|
|
|
def test_skew_no_numerico_devuelve_nota():
|
|
out = suggest_reexpression({"skew": "mucho"})
|
|
assert out["recommended"] is None
|
|
assert out["skew"] is None
|