"""Tests para build_boxplot_stats.""" import os import sys sys.path.insert(0, os.path.dirname(__file__)) from build_boxplot_stats import build_boxplot_stats # Keys that a non-empty result dict must always contain. _EXPECTED_KEYS = { "q1", "median", "q3", "iqr", "lower_fence", "upper_fence", "whisker_lo", "whisker_hi", "min", "max", "has_low_outliers", "has_high_outliers", "n_outliers", } def test_boxplot_tukey_basico(): """Golden: bloque numeric con outlier alto claro -> fences IQR de Tukey.""" numeric = { "min": 1.0, "max": 100.0, "p25": 10.0, "median": 25.0, "p75": 40.0, "iqr": 30.0, "n_outliers": 3, } box = build_boxplot_stats(numeric) assert set(box.keys()) == _EXPECTED_KEYS assert box["q1"] == 10.0 assert box["median"] == 25.0 assert box["q3"] == 40.0 # iqr recomputado desde los cuartiles. assert box["iqr"] == 30.0 # lower = 10 - 1.5*30 = -35 ; upper = 40 + 1.5*30 = 85. assert box["lower_fence"] == -35.0 assert box["upper_fence"] == 85.0 # whisker_lo = max(min=1, -35) = 1 ; whisker_hi = min(max=100, 85) = 85. assert box["whisker_lo"] == 1.0 assert box["whisker_hi"] == 85.0 assert box["min"] == 1.0 assert box["max"] == 100.0 # Solo hay outliers altos (100 > 85), no bajos (1 no < -35). assert box["has_low_outliers"] is False assert box["has_high_outliers"] is True # n_outliers se propaga del bloque z-score (informativo). assert box["n_outliers"] == 3 def test_percentiles_faltan_devuelve_vacio(): """Si falta p25/median/p75 -> {} (caller omite el boxplot).""" # Falta p25. assert build_boxplot_stats({"median": 25.0, "p75": 40.0}) == {} # Falta p75. assert build_boxplot_stats({"p25": 10.0, "median": 25.0}) == {} # Falta median y p50. assert build_boxplot_stats({"p25": 10.0, "p75": 40.0}) == {} # numeric None / no dict tambien es vacio, nunca lanza. assert build_boxplot_stats(None) == {} assert build_boxplot_stats({}) == {} def test_median_cae_a_p50(): """median ausente cae a p50.""" numeric = {"min": 0.0, "max": 10.0, "p25": 2.0, "p50": 5.0, "p75": 8.0} box = build_boxplot_stats(numeric) assert box["median"] == 5.0 assert box["q1"] == 2.0 assert box["q3"] == 8.0 def test_whiskers_usan_fence_si_falta_min_max(): """Sin min/max los bigotes caen a las fences y no hay outliers marcados.""" numeric = {"p25": 10.0, "median": 25.0, "p75": 40.0} # sin min ni max box = build_boxplot_stats(numeric) assert box["min"] is None assert box["max"] is None # iqr = 30, fences -35 / 85; los bigotes caen a las fences. assert box["whisker_lo"] == box["lower_fence"] == -35.0 assert box["whisker_hi"] == box["upper_fence"] == 85.0 # Sin extremos reales, no se afirma que haya outliers. assert box["has_low_outliers"] is False assert box["has_high_outliers"] is False # n_outliers ausente -> 0. assert box["n_outliers"] == 0 def test_tipos_salida_float_bool_int(): """Numericos en float, flags bool nativos, n_outliers int.""" numeric = { "min": -50.0, "max": 200.0, "p25": 10.0, "median": 25.0, "p75": 40.0, "n_outliers": 7, } box = build_boxplot_stats(numeric) for key in ("q1", "median", "q3", "iqr", "lower_fence", "upper_fence", "whisker_lo", "whisker_hi", "min", "max"): assert isinstance(box[key], float), f"{key} debe ser float" assert isinstance(box["has_low_outliers"], bool) assert isinstance(box["has_high_outliers"], bool) assert isinstance(box["n_outliers"], int) and not isinstance(box["n_outliers"], bool) # min=-50 < lower_fence=-35 -> outlier bajo ; max=200 > upper_fence=85 -> alto. assert box["has_low_outliers"] is True assert box["has_high_outliers"] is True assert box["n_outliers"] == 7