fn_registry/python/functions/datascience/build_boxplot_stats_test.py

"""Tests para build_boxplot_stats."""

import os
import sys

sys.path.insert(0, os.path.dirname(__file__))

from build_boxplot_stats import build_boxplot_stats

# Keys that a non-empty result dict must always contain.
_EXPECTED_KEYS = {
    "q1", "median", "q3", "iqr", "lower_fence", "upper_fence",
    "whisker_lo", "whisker_hi", "min", "max",
    "has_low_outliers", "has_high_outliers", "n_outliers",
}


def test_boxplot_tukey_basico():
    """Golden: bloque numeric con outlier alto claro -> fences IQR de Tukey."""
    numeric = {
        "min": 1.0, "max": 100.0,
        "p25": 10.0, "median": 25.0, "p75": 40.0,
        "iqr": 30.0, "n_outliers": 3,
    }
    box = build_boxplot_stats(numeric)

    assert set(box.keys()) == _EXPECTED_KEYS

    assert box["q1"] == 10.0
    assert box["median"] == 25.0
    assert box["q3"] == 40.0
    # iqr recomputado desde los cuartiles.
    assert box["iqr"] == 30.0
    # lower = 10 - 1.5*30 = -35 ; upper = 40 + 1.5*30 = 85.
    assert box["lower_fence"] == -35.0
    assert box["upper_fence"] == 85.0
    # whisker_lo = max(min=1, -35) = 1 ; whisker_hi = min(max=100, 85) = 85.
    assert box["whisker_lo"] == 1.0
    assert box["whisker_hi"] == 85.0
    assert box["min"] == 1.0
    assert box["max"] == 100.0
    # Solo hay outliers altos (100 > 85), no bajos (1 no < -35).
    assert box["has_low_outliers"] is False
    assert box["has_high_outliers"] is True
    # n_outliers se propaga del bloque z-score (informativo).
    assert box["n_outliers"] == 3


def test_percentiles_faltan_devuelve_vacio():
    """Si falta p25/median/p75 -> {} (caller omite el boxplot)."""
    # Falta p25.
    assert build_boxplot_stats({"median": 25.0, "p75": 40.0}) == {}
    # Falta p75.
    assert build_boxplot_stats({"p25": 10.0, "median": 25.0}) == {}
    # Falta median y p50.
    assert build_boxplot_stats({"p25": 10.0, "p75": 40.0}) == {}
    # numeric None / no dict tambien es vacio, nunca lanza.
    assert build_boxplot_stats(None) == {}
    assert build_boxplot_stats({}) == {}


def test_median_cae_a_p50():
    """median ausente cae a p50."""
    numeric = {"min": 0.0, "max": 10.0, "p25": 2.0, "p50": 5.0, "p75": 8.0}
    box = build_boxplot_stats(numeric)
    assert box["median"] == 5.0
    assert box["q1"] == 2.0
    assert box["q3"] == 8.0


def test_whiskers_usan_fence_si_falta_min_max():
    """Sin min/max los bigotes caen a las fences y no hay outliers marcados."""
    numeric = {"p25": 10.0, "median": 25.0, "p75": 40.0}  # sin min ni max
    box = build_boxplot_stats(numeric)

    assert box["min"] is None
    assert box["max"] is None
    # iqr = 30, fences -35 / 85; los bigotes caen a las fences.
    assert box["whisker_lo"] == box["lower_fence"] == -35.0
    assert box["whisker_hi"] == box["upper_fence"] == 85.0
    # Sin extremos reales, no se afirma que haya outliers.
    assert box["has_low_outliers"] is False
    assert box["has_high_outliers"] is False
    # n_outliers ausente -> 0.
    assert box["n_outliers"] == 0


def test_tipos_salida_float_bool_int():
    """Numericos en float, flags bool nativos, n_outliers int."""
    numeric = {
        "min": -50.0, "max": 200.0,
        "p25": 10.0, "median": 25.0, "p75": 40.0,
        "n_outliers": 7,
    }
    box = build_boxplot_stats(numeric)

    for key in ("q1", "median", "q3", "iqr", "lower_fence", "upper_fence",
                "whisker_lo", "whisker_hi", "min", "max"):
        assert isinstance(box[key], float), f"{key} debe ser float"

    assert isinstance(box["has_low_outliers"], bool)
    assert isinstance(box["has_high_outliers"], bool)
    assert isinstance(box["n_outliers"], int) and not isinstance(box["n_outliers"], bool)

    # min=-50 < lower_fence=-35 -> outlier bajo ; max=200 > upper_fence=85 -> alto.
    assert box["has_low_outliers"] is True
    assert box["has_high_outliers"] is True
    assert box["n_outliers"] == 7