feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
"""Tests para kmeans_segments."""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from kmeans_segments import kmeans_segments
|
||||
|
||||
|
||||
def _three_blobs(seed: int = 0, per_blob: int = 40):
|
||||
"""Genera 3 blobs gaussianos bien separados en 2D, alineados por fila."""
|
||||
rng = np.random.default_rng(seed)
|
||||
centers = [(0.0, 0.0), (12.0, 12.0), (0.0, 12.0)]
|
||||
xs: list[float] = []
|
||||
ys: list[float] = []
|
||||
for cx, cy in centers:
|
||||
pts = rng.normal(loc=(cx, cy), scale=0.4, size=(per_blob, 2))
|
||||
xs.extend(float(p[0]) for p in pts)
|
||||
ys.extend(float(p[1]) for p in pts)
|
||||
return {"x": xs, "y": ys}
|
||||
|
||||
|
||||
def test_three_separated_blobs_finds_k3():
|
||||
columns = _three_blobs(seed=0, per_blob=40)
|
||||
result = kmeans_segments(columns, k_min=2, k_max=8)
|
||||
|
||||
assert result["best_k"] == 3
|
||||
assert result["silhouette"] > 0.5
|
||||
assert result["n_features"] == 2
|
||||
assert result["n_rows_used"] == 120
|
||||
assert sum(result["cluster_sizes"]) == 120
|
||||
assert len(result["centers"]) == 3
|
||||
# scores_by_k cubre todo el rango probado.
|
||||
ks = [s["k"] for s in result["scores_by_k"]]
|
||||
assert ks == list(range(2, 9))
|
||||
|
||||
|
||||
def test_insufficient_rows_returns_note():
|
||||
# Solo 3 filas válidas, k_min*2 = 4 -> insuficiente.
|
||||
columns = {"x": [1.0, 2.0, 3.0], "y": [1.0, 2.0, 3.0]}
|
||||
result = kmeans_segments(columns, k_min=2, k_max=8)
|
||||
|
||||
assert result["best_k"] == 0
|
||||
assert result["note"] == "datos insuficientes"
|
||||
|
||||
|
||||
def test_insufficient_numeric_columns_returns_note():
|
||||
# Una sola columna numérica; la otra es texto -> menos de 2 numéricas.
|
||||
columns = {
|
||||
"x": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
|
||||
"label": ["a", "b", "c", "d", "e", "f"],
|
||||
}
|
||||
result = kmeans_segments(columns, k_min=2, k_max=8)
|
||||
|
||||
assert result["best_k"] == 0
|
||||
assert result["note"] == "datos insuficientes"
|
||||
|
||||
|
||||
def test_rows_with_none_are_dropped():
|
||||
columns = _three_blobs(seed=1, per_blob=40)
|
||||
# Inyectar None en una fila; debe descartarse, dejando 119.
|
||||
columns["x"][0] = None
|
||||
result = kmeans_segments(columns, k_min=2, k_max=8)
|
||||
|
||||
assert result["best_k"] == 3
|
||||
assert result["n_rows_used"] == 119
|
||||
Reference in New Issue
Block a user