feat(eda): build_boxplot_stats — estadísticas de boxplot Tukey desde sub-bloque numeric

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-30 14:54:49 +02:00
parent cb7a7fc1fd
commit fcf5a4c6a3
3 changed files with 260 additions and 0 deletions
@@ -0,0 +1,108 @@
"""Tests para build_boxplot_stats."""
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
from build_boxplot_stats import build_boxplot_stats
# Keys that a non-empty result dict must always contain.
_EXPECTED_KEYS = {
"q1", "median", "q3", "iqr", "lower_fence", "upper_fence",
"whisker_lo", "whisker_hi", "min", "max",
"has_low_outliers", "has_high_outliers", "n_outliers",
}
def test_boxplot_tukey_basico():
"""Golden: bloque numeric con outlier alto claro -> fences IQR de Tukey."""
numeric = {
"min": 1.0, "max": 100.0,
"p25": 10.0, "median": 25.0, "p75": 40.0,
"iqr": 30.0, "n_outliers": 3,
}
box = build_boxplot_stats(numeric)
assert set(box.keys()) == _EXPECTED_KEYS
assert box["q1"] == 10.0
assert box["median"] == 25.0
assert box["q3"] == 40.0
# iqr recomputado desde los cuartiles.
assert box["iqr"] == 30.0
# lower = 10 - 1.5*30 = -35 ; upper = 40 + 1.5*30 = 85.
assert box["lower_fence"] == -35.0
assert box["upper_fence"] == 85.0
# whisker_lo = max(min=1, -35) = 1 ; whisker_hi = min(max=100, 85) = 85.
assert box["whisker_lo"] == 1.0
assert box["whisker_hi"] == 85.0
assert box["min"] == 1.0
assert box["max"] == 100.0
# Solo hay outliers altos (100 > 85), no bajos (1 no < -35).
assert box["has_low_outliers"] is False
assert box["has_high_outliers"] is True
# n_outliers se propaga del bloque z-score (informativo).
assert box["n_outliers"] == 3
def test_percentiles_faltan_devuelve_vacio():
"""Si falta p25/median/p75 -> {} (caller omite el boxplot)."""
# Falta p25.
assert build_boxplot_stats({"median": 25.0, "p75": 40.0}) == {}
# Falta p75.
assert build_boxplot_stats({"p25": 10.0, "median": 25.0}) == {}
# Falta median y p50.
assert build_boxplot_stats({"p25": 10.0, "p75": 40.0}) == {}
# numeric None / no dict tambien es vacio, nunca lanza.
assert build_boxplot_stats(None) == {}
assert build_boxplot_stats({}) == {}
def test_median_cae_a_p50():
"""median ausente cae a p50."""
numeric = {"min": 0.0, "max": 10.0, "p25": 2.0, "p50": 5.0, "p75": 8.0}
box = build_boxplot_stats(numeric)
assert box["median"] == 5.0
assert box["q1"] == 2.0
assert box["q3"] == 8.0
def test_whiskers_usan_fence_si_falta_min_max():
"""Sin min/max los bigotes caen a las fences y no hay outliers marcados."""
numeric = {"p25": 10.0, "median": 25.0, "p75": 40.0} # sin min ni max
box = build_boxplot_stats(numeric)
assert box["min"] is None
assert box["max"] is None
# iqr = 30, fences -35 / 85; los bigotes caen a las fences.
assert box["whisker_lo"] == box["lower_fence"] == -35.0
assert box["whisker_hi"] == box["upper_fence"] == 85.0
# Sin extremos reales, no se afirma que haya outliers.
assert box["has_low_outliers"] is False
assert box["has_high_outliers"] is False
# n_outliers ausente -> 0.
assert box["n_outliers"] == 0
def test_tipos_salida_float_bool_int():
"""Numericos en float, flags bool nativos, n_outliers int."""
numeric = {
"min": -50.0, "max": 200.0,
"p25": 10.0, "median": 25.0, "p75": 40.0,
"n_outliers": 7,
}
box = build_boxplot_stats(numeric)
for key in ("q1", "median", "q3", "iqr", "lower_fence", "upper_fence",
"whisker_lo", "whisker_hi", "min", "max"):
assert isinstance(box[key], float), f"{key} debe ser float"
assert isinstance(box["has_low_outliers"], bool)
assert isinstance(box["has_high_outliers"], bool)
assert isinstance(box["n_outliers"], int) and not isinstance(box["n_outliers"], bool)
# min=-50 < lower_fence=-35 -> outlier bajo ; max=200 > upper_fence=85 -> alto.
assert box["has_low_outliers"] is True
assert box["has_high_outliers"] is True
assert box["n_outliers"] == 7