Files
fn_registry/python/functions/datascience/suggest_reexpression_test.py
T
Egutierrez caf8c25d99 fix(eda): bugs de bajo riesgo del benchmark (H1,H5,H12,H13,H14) + tests faltantes
- H1: render_eda_markdown ya no aplica doble x100 a outlier_pct (336% -> real)
- H5: profile_database filtra base_tables_only (excluye VIEWs; sakila 21->16)
- H12: suggest_reexpression salta columnas no-continuas
- H13: to_returns/profile_table elige retornos (financiera) vs diferencias (fisica)
- H14: test de regresion ATTACH sqlite via information_schema
- +8 tests de las funciones eda nuevas (acf_pacf, adf_kpss, ...). 77 tests verdes
- L/M (H2,H3,H4,H6,H7,H8,H9,H10,H11) quedan en issues 0174-0177 para revision

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-29 03:51:11 +02:00

98 lines
3.4 KiB
Python

"""Tests para suggest_reexpression."""
from suggest_reexpression import suggest_reexpression
def test_aproximadamente_simetrica_recomienda_none():
# |skew| < 0.5 -> no hace falta re-expresar.
out = suggest_reexpression({"skew": 0.1, "min": 5.0, "zero_pct": 0.0, "negative_pct": 0.0})
assert out["recommended"] == "none"
assert out["ladder_power"] == 1.0
assert out["alternatives"] == []
assert out["note"] == ""
def test_positiva_fuerte_todo_positivo_recomienda_log():
# Cola derecha larga sobre datos estrictamente positivos -> log.
out = suggest_reexpression({"skew": 2.3, "min": 1.0, "zero_pct": 0.0, "negative_pct": 0.0})
assert out["recommended"] == "log"
assert out["ladder_power"] == 0.0
transforms = [a["transform"] for a in out["alternatives"]]
assert "box-cox" in transforms
def test_positiva_moderada_todo_positivo_recomienda_sqrt():
out = suggest_reexpression({"skew": 0.7, "min": 2.0, "zero_pct": 0.0, "negative_pct": 0.0})
assert out["recommended"] == "sqrt"
assert out["ladder_power"] == 0.5
def test_positiva_con_ceros_fuerte_recomienda_log1p():
# log(0) indefinido -> log1p en presencia de ceros.
out = suggest_reexpression({"skew": 1.5, "min": 0.0, "zero_pct": 12.0, "negative_pct": 0.0})
assert out["recommended"] == "log1p"
assert out["ladder_power"] == 0.0
def test_positiva_con_negativos_recomienda_yeo_johnson():
# log/Box-Cox no admiten negativos -> Yeo-Johnson.
out = suggest_reexpression({"skew": 1.8, "min": -4.0, "zero_pct": 0.0, "negative_pct": 20.0})
assert out["recommended"] == "yeo-johnson"
assert out["ladder_power"] is None # data-driven
def test_negativa_fuerte_todo_positivo_recomienda_cube():
# Cola izquierda -> subir por la escalera de Tukey.
out = suggest_reexpression({"skew": -1.6, "min": 3.0, "zero_pct": 0.0, "negative_pct": 0.0})
assert out["recommended"] == "cube"
assert out["ladder_power"] == 3.0
def test_negativa_moderada_todo_positivo_recomienda_square():
out = suggest_reexpression({"skew": -0.8, "min": 3.0, "zero_pct": 0.0, "negative_pct": 0.0})
assert out["recommended"] == "square"
assert out["ladder_power"] == 2.0
def test_dominio_desconocido_recomienda_yeo_johnson_con_nota():
# Solo skew, sin min/zero_pct/negative_pct -> opción segura + nota.
out = suggest_reexpression({"skew": 1.4})
assert out["recommended"] == "yeo-johnson"
assert "dominio desconocido" in out["note"]
def test_acepta_columnprofile_completo_con_numeric_anidado():
# Si llega un ColumnProfile entero, baja a su sub-bloque numeric.
profile = {
"name": "precio",
"inferred_type": "numeric",
"numeric": {"skew": 2.0, "min": 1.0, "zero_pct": 0.0, "negative_pct": 0.0},
}
out = suggest_reexpression(profile)
assert out["recommended"] == "log"
def test_skew_ausente_devuelve_nota():
out = suggest_reexpression({"min": 1.0, "max": 9.0})
assert out["recommended"] is None
assert "skew ausente" in out["note"]
def test_stats_vacio_devuelve_nota():
out = suggest_reexpression({})
assert out["recommended"] is None
assert out["alternatives"] == []
assert out["note"]
def test_no_dict_no_lanza():
out = suggest_reexpression(None)
assert out["recommended"] is None
assert out["note"]
def test_skew_no_numerico_devuelve_nota():
out = suggest_reexpression({"skew": "mucho"})
assert out["recommended"] is None
assert out["skew"] is None