fcf5a4c6a3
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
109 lines
3.8 KiB
Python
109 lines
3.8 KiB
Python
"""Tests para build_boxplot_stats."""
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
|
|
from build_boxplot_stats import build_boxplot_stats
|
|
|
|
# Keys that a non-empty result dict must always contain.
|
|
_EXPECTED_KEYS = {
|
|
"q1", "median", "q3", "iqr", "lower_fence", "upper_fence",
|
|
"whisker_lo", "whisker_hi", "min", "max",
|
|
"has_low_outliers", "has_high_outliers", "n_outliers",
|
|
}
|
|
|
|
|
|
def test_boxplot_tukey_basico():
|
|
"""Golden: bloque numeric con outlier alto claro -> fences IQR de Tukey."""
|
|
numeric = {
|
|
"min": 1.0, "max": 100.0,
|
|
"p25": 10.0, "median": 25.0, "p75": 40.0,
|
|
"iqr": 30.0, "n_outliers": 3,
|
|
}
|
|
box = build_boxplot_stats(numeric)
|
|
|
|
assert set(box.keys()) == _EXPECTED_KEYS
|
|
|
|
assert box["q1"] == 10.0
|
|
assert box["median"] == 25.0
|
|
assert box["q3"] == 40.0
|
|
# iqr recomputado desde los cuartiles.
|
|
assert box["iqr"] == 30.0
|
|
# lower = 10 - 1.5*30 = -35 ; upper = 40 + 1.5*30 = 85.
|
|
assert box["lower_fence"] == -35.0
|
|
assert box["upper_fence"] == 85.0
|
|
# whisker_lo = max(min=1, -35) = 1 ; whisker_hi = min(max=100, 85) = 85.
|
|
assert box["whisker_lo"] == 1.0
|
|
assert box["whisker_hi"] == 85.0
|
|
assert box["min"] == 1.0
|
|
assert box["max"] == 100.0
|
|
# Solo hay outliers altos (100 > 85), no bajos (1 no < -35).
|
|
assert box["has_low_outliers"] is False
|
|
assert box["has_high_outliers"] is True
|
|
# n_outliers se propaga del bloque z-score (informativo).
|
|
assert box["n_outliers"] == 3
|
|
|
|
|
|
def test_percentiles_faltan_devuelve_vacio():
|
|
"""Si falta p25/median/p75 -> {} (caller omite el boxplot)."""
|
|
# Falta p25.
|
|
assert build_boxplot_stats({"median": 25.0, "p75": 40.0}) == {}
|
|
# Falta p75.
|
|
assert build_boxplot_stats({"p25": 10.0, "median": 25.0}) == {}
|
|
# Falta median y p50.
|
|
assert build_boxplot_stats({"p25": 10.0, "p75": 40.0}) == {}
|
|
# numeric None / no dict tambien es vacio, nunca lanza.
|
|
assert build_boxplot_stats(None) == {}
|
|
assert build_boxplot_stats({}) == {}
|
|
|
|
|
|
def test_median_cae_a_p50():
|
|
"""median ausente cae a p50."""
|
|
numeric = {"min": 0.0, "max": 10.0, "p25": 2.0, "p50": 5.0, "p75": 8.0}
|
|
box = build_boxplot_stats(numeric)
|
|
assert box["median"] == 5.0
|
|
assert box["q1"] == 2.0
|
|
assert box["q3"] == 8.0
|
|
|
|
|
|
def test_whiskers_usan_fence_si_falta_min_max():
|
|
"""Sin min/max los bigotes caen a las fences y no hay outliers marcados."""
|
|
numeric = {"p25": 10.0, "median": 25.0, "p75": 40.0} # sin min ni max
|
|
box = build_boxplot_stats(numeric)
|
|
|
|
assert box["min"] is None
|
|
assert box["max"] is None
|
|
# iqr = 30, fences -35 / 85; los bigotes caen a las fences.
|
|
assert box["whisker_lo"] == box["lower_fence"] == -35.0
|
|
assert box["whisker_hi"] == box["upper_fence"] == 85.0
|
|
# Sin extremos reales, no se afirma que haya outliers.
|
|
assert box["has_low_outliers"] is False
|
|
assert box["has_high_outliers"] is False
|
|
# n_outliers ausente -> 0.
|
|
assert box["n_outliers"] == 0
|
|
|
|
|
|
def test_tipos_salida_float_bool_int():
|
|
"""Numericos en float, flags bool nativos, n_outliers int."""
|
|
numeric = {
|
|
"min": -50.0, "max": 200.0,
|
|
"p25": 10.0, "median": 25.0, "p75": 40.0,
|
|
"n_outliers": 7,
|
|
}
|
|
box = build_boxplot_stats(numeric)
|
|
|
|
for key in ("q1", "median", "q3", "iqr", "lower_fence", "upper_fence",
|
|
"whisker_lo", "whisker_hi", "min", "max"):
|
|
assert isinstance(box[key], float), f"{key} debe ser float"
|
|
|
|
assert isinstance(box["has_low_outliers"], bool)
|
|
assert isinstance(box["has_high_outliers"], bool)
|
|
assert isinstance(box["n_outliers"], int) and not isinstance(box["n_outliers"], bool)
|
|
|
|
# min=-50 < lower_fence=-35 -> outlier bajo ; max=200 > upper_fence=85 -> alto.
|
|
assert box["has_low_outliers"] is True
|
|
assert box["has_high_outliers"] is True
|
|
assert box["n_outliers"] == 7
|