feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,90 @@
|
||||
"""Tests para summarize_categorical."""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
||||
from summarize_categorical import summarize_categorical
|
||||
|
||||
|
||||
def test_summarize_categorical_repeated():
|
||||
"""Lista con repetidos: top ordenado por count desc, mode/n_distinct/entropy."""
|
||||
values = ["a", "a", "b", "c", "a", None, ""]
|
||||
result = summarize_categorical(values)
|
||||
|
||||
# None descartado; total no-nulo = 6 (a,a,b,c,a,"").
|
||||
assert [t["value"] for t in result["top"]] == ["a", "b", "c", ""]
|
||||
assert result["top"][0]["count"] == 3
|
||||
# top ordenado por count descendente.
|
||||
counts = [t["count"] for t in result["top"]]
|
||||
assert counts == sorted(counts, reverse=True)
|
||||
assert abs(result["top"][0]["pct"] - 3 / 6) < 1e-12
|
||||
|
||||
assert result["mode"] == "a"
|
||||
assert abs(result["mode_pct"] - 3 / 6) < 1e-12
|
||||
assert result["n_distinct"] == 4
|
||||
assert result["entropy"] > 0
|
||||
assert result["imbalance"] == 3 / 1 # max_count(3) / min_count(1)
|
||||
assert result["len_min"] == 0 # the "" value
|
||||
assert result["len_max"] == 1
|
||||
|
||||
|
||||
def test_summarize_categorical_empty():
|
||||
"""Lista vacia: top=[] y resto de claves None."""
|
||||
result = summarize_categorical([])
|
||||
assert result["top"] == []
|
||||
for key in (
|
||||
"mode",
|
||||
"mode_pct",
|
||||
"n_distinct",
|
||||
"entropy",
|
||||
"imbalance",
|
||||
"len_mean",
|
||||
"len_min",
|
||||
"len_max",
|
||||
):
|
||||
assert result[key] is None
|
||||
|
||||
|
||||
def test_summarize_categorical_all_none():
|
||||
"""Lista de solo None se trata como vacia."""
|
||||
result = summarize_categorical([None, None, None])
|
||||
assert result["top"] == []
|
||||
assert result["n_distinct"] is None
|
||||
assert result["entropy"] is None
|
||||
|
||||
|
||||
def test_summarize_categorical_single_value():
|
||||
"""Un solo valor distinto: entropy 0.0, imbalance 1.0."""
|
||||
result = summarize_categorical(["x", "x", "x"])
|
||||
assert result["n_distinct"] == 1
|
||||
assert result["entropy"] == 0.0
|
||||
assert result["imbalance"] == 1.0
|
||||
assert result["mode"] == "x"
|
||||
assert result["mode_pct"] == 1.0
|
||||
assert result["len_mean"] == 1.0
|
||||
|
||||
|
||||
def test_summarize_categorical_top_k():
|
||||
"""top_k limita el numero de entradas en top sin alterar n_distinct."""
|
||||
values = ["a", "a", "b", "b", "c", "d", "e"]
|
||||
result = summarize_categorical(values, top_k=2)
|
||||
assert len(result["top"]) == 2
|
||||
assert result["n_distinct"] == 5
|
||||
|
||||
|
||||
def test_summarize_categorical_keys():
|
||||
"""El dict tiene exactamente las claves del contrato categorical_sub."""
|
||||
result = summarize_categorical(["a", "b"])
|
||||
assert set(result.keys()) == {
|
||||
"top",
|
||||
"mode",
|
||||
"mode_pct",
|
||||
"n_distinct",
|
||||
"entropy",
|
||||
"imbalance",
|
||||
"len_mean",
|
||||
"len_min",
|
||||
"len_max",
|
||||
}
|
||||
Reference in New Issue
Block a user