"""Tests para suggest_reexpression.""" from suggest_reexpression import suggest_reexpression def test_aproximadamente_simetrica_recomienda_none(): # |skew| < 0.5 -> no hace falta re-expresar. out = suggest_reexpression({"skew": 0.1, "min": 5.0, "zero_pct": 0.0, "negative_pct": 0.0}) assert out["recommended"] == "none" assert out["ladder_power"] == 1.0 assert out["alternatives"] == [] assert out["note"] == "" def test_positiva_fuerte_todo_positivo_recomienda_log(): # Cola derecha larga sobre datos estrictamente positivos -> log. out = suggest_reexpression({"skew": 2.3, "min": 1.0, "zero_pct": 0.0, "negative_pct": 0.0}) assert out["recommended"] == "log" assert out["ladder_power"] == 0.0 transforms = [a["transform"] for a in out["alternatives"]] assert "box-cox" in transforms def test_positiva_moderada_todo_positivo_recomienda_sqrt(): out = suggest_reexpression({"skew": 0.7, "min": 2.0, "zero_pct": 0.0, "negative_pct": 0.0}) assert out["recommended"] == "sqrt" assert out["ladder_power"] == 0.5 def test_positiva_con_ceros_fuerte_recomienda_log1p(): # log(0) indefinido -> log1p en presencia de ceros. out = suggest_reexpression({"skew": 1.5, "min": 0.0, "zero_pct": 12.0, "negative_pct": 0.0}) assert out["recommended"] == "log1p" assert out["ladder_power"] == 0.0 def test_positiva_con_negativos_recomienda_yeo_johnson(): # log/Box-Cox no admiten negativos -> Yeo-Johnson. out = suggest_reexpression({"skew": 1.8, "min": -4.0, "zero_pct": 0.0, "negative_pct": 20.0}) assert out["recommended"] == "yeo-johnson" assert out["ladder_power"] is None # data-driven def test_negativa_fuerte_todo_positivo_recomienda_cube(): # Cola izquierda -> subir por la escalera de Tukey. out = suggest_reexpression({"skew": -1.6, "min": 3.0, "zero_pct": 0.0, "negative_pct": 0.0}) assert out["recommended"] == "cube" assert out["ladder_power"] == 3.0 def test_negativa_moderada_todo_positivo_recomienda_square(): out = suggest_reexpression({"skew": -0.8, "min": 3.0, "zero_pct": 0.0, "negative_pct": 0.0}) assert out["recommended"] == "square" assert out["ladder_power"] == 2.0 def test_dominio_desconocido_recomienda_yeo_johnson_con_nota(): # Solo skew, sin min/zero_pct/negative_pct -> opción segura + nota. out = suggest_reexpression({"skew": 1.4}) assert out["recommended"] == "yeo-johnson" assert "dominio desconocido" in out["note"] def test_acepta_columnprofile_completo_con_numeric_anidado(): # Si llega un ColumnProfile entero, baja a su sub-bloque numeric. profile = { "name": "precio", "inferred_type": "numeric", "numeric": {"skew": 2.0, "min": 1.0, "zero_pct": 0.0, "negative_pct": 0.0}, } out = suggest_reexpression(profile) assert out["recommended"] == "log" def test_skew_ausente_devuelve_nota(): out = suggest_reexpression({"min": 1.0, "max": 9.0}) assert out["recommended"] is None assert "skew ausente" in out["note"] def test_stats_vacio_devuelve_nota(): out = suggest_reexpression({}) assert out["recommended"] is None assert out["alternatives"] == [] assert out["note"] def test_no_dict_no_lanza(): out = suggest_reexpression(None) assert out["recommended"] is None assert out["note"] def test_skew_no_numerico_devuelve_nota(): out = suggest_reexpression({"skew": "mucho"}) assert out["recommended"] is None assert out["skew"] is None