feat(browser): auto-commit con 178 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-20 18:22:23 +02:00
parent 7d100e7f3e
commit 763e06c127
178 changed files with 19917 additions and 317 deletions
@@ -0,0 +1,90 @@
"""Tests para summarize_categorical."""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from summarize_categorical import summarize_categorical
def test_summarize_categorical_repeated():
"""Lista con repetidos: top ordenado por count desc, mode/n_distinct/entropy."""
values = ["a", "a", "b", "c", "a", None, ""]
result = summarize_categorical(values)
# None descartado; total no-nulo = 6 (a,a,b,c,a,"").
assert [t["value"] for t in result["top"]] == ["a", "b", "c", ""]
assert result["top"][0]["count"] == 3
# top ordenado por count descendente.
counts = [t["count"] for t in result["top"]]
assert counts == sorted(counts, reverse=True)
assert abs(result["top"][0]["pct"] - 3 / 6) < 1e-12
assert result["mode"] == "a"
assert abs(result["mode_pct"] - 3 / 6) < 1e-12
assert result["n_distinct"] == 4
assert result["entropy"] > 0
assert result["imbalance"] == 3 / 1 # max_count(3) / min_count(1)
assert result["len_min"] == 0 # the "" value
assert result["len_max"] == 1
def test_summarize_categorical_empty():
"""Lista vacia: top=[] y resto de claves None."""
result = summarize_categorical([])
assert result["top"] == []
for key in (
"mode",
"mode_pct",
"n_distinct",
"entropy",
"imbalance",
"len_mean",
"len_min",
"len_max",
):
assert result[key] is None
def test_summarize_categorical_all_none():
"""Lista de solo None se trata como vacia."""
result = summarize_categorical([None, None, None])
assert result["top"] == []
assert result["n_distinct"] is None
assert result["entropy"] is None
def test_summarize_categorical_single_value():
"""Un solo valor distinto: entropy 0.0, imbalance 1.0."""
result = summarize_categorical(["x", "x", "x"])
assert result["n_distinct"] == 1
assert result["entropy"] == 0.0
assert result["imbalance"] == 1.0
assert result["mode"] == "x"
assert result["mode_pct"] == 1.0
assert result["len_mean"] == 1.0
def test_summarize_categorical_top_k():
"""top_k limita el numero de entradas en top sin alterar n_distinct."""
values = ["a", "a", "b", "b", "c", "d", "e"]
result = summarize_categorical(values, top_k=2)
assert len(result["top"]) == 2
assert result["n_distinct"] == 5
def test_summarize_categorical_keys():
"""El dict tiene exactamente las claves del contrato categorical_sub."""
result = summarize_categorical(["a", "b"])
assert set(result.keys()) == {
"top",
"mode",
"mode_pct",
"n_distinct",
"entropy",
"imbalance",
"len_mean",
"len_min",
"len_max",
}