Files
fn_registry/python/functions/datascience/describe_numeric_test.py
T
egutierrez 763e06c127 feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00

86 lines
3.0 KiB
Python

"""Tests para describe_numeric."""
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
from describe_numeric import describe_numeric
# Keys that every result dict must always contain (the eda numeric_sub contract).
_EXPECTED_KEYS = {
"min", "max", "mean", "median", "mode", "std", "variance", "cv",
"p1", "p5", "p25", "p50", "p75", "p95", "p99", "iqr",
"skew", "kurtosis", "n_outliers", "outlier_pct",
"zero_pct", "negative_pct", "distribution_type", "histogram",
}
def test_lista_con_outlier_y_none():
"""Lista con outlier claro y None descartado."""
# Tight cluster around 2-4 plus a None to drop and a clear extreme outlier.
# A wide cluster (n=40) keeps std small so the extreme value's z-score
# exceeds the 3.0 threshold used by detect_outliers.
cluster = [1, 2, 2, 3, 4] * 8 # 40 numeric values, mode == 2
values = cluster + [None, 1000]
result = describe_numeric(values)
# Contract: all keys present.
assert set(result.keys()) == _EXPECTED_KEYS
# Non-numeric / missing dropped: 41 numeric values remain.
assert result["min"] == 1.0
assert result["max"] == 1000.0
# mean/median reasonable: median sits in the cluster, mean pulled up by 1000.
assert result["median"] < result["mean"]
assert 0.0 < result["median"] <= 5.0
assert result["mean"] > result["median"]
# mode = most frequent (2 appears twice per block).
assert result["mode"] == 2.0
# At least one z-score outlier detected (the 1000).
assert result["n_outliers"] >= 1
assert result["outlier_pct"] > 0.0
# Histogram non-empty and counts cover every numeric value.
assert len(result["histogram"]) > 0
total = sum(bucket["count"] for bucket in result["histogram"])
assert total == 41
for bucket in result["histogram"]:
assert "lo" in bucket and "hi" in bucket and "count" in bucket
# No zeros, no negatives in this sample.
assert result["zero_pct"] == 0.0
assert result["negative_pct"] == 0.0
def test_lista_vacia_todo_none():
"""Lista vacia (o sin numericos) devuelve todas las claves en None."""
result = describe_numeric([None, "abc", float("nan")])
assert set(result.keys()) == _EXPECTED_KEYS
for key in _EXPECTED_KEYS - {"histogram"}:
assert result[key] is None, f"{key} debe ser None"
assert result["histogram"] == []
def test_cv_none_cuando_mean_cero():
"""cv es None cuando la media es 0."""
# Symmetric around zero so mean == 0.
result = describe_numeric([-2, -1, 0, 1, 2])
assert result["mean"] == 0.0
assert result["cv"] is None
assert result["zero_pct"] == 20.0
assert result["negative_pct"] == 40.0
def test_iqr_y_percentiles():
"""iqr = p75 - p25 y percentiles coherentes."""
result = describe_numeric(list(range(1, 101))) # 1..100
assert result["iqr"] == result["p75"] - result["p25"]
assert result["p1"] <= result["p25"] <= result["p50"] <= result["p75"] <= result["p99"]
assert result["min"] == 1.0
assert result["max"] == 100.0