"""Tests para kmeans_segments.""" import numpy as np from kmeans_segments import kmeans_segments def _three_blobs(seed: int = 0, per_blob: int = 40): """Genera 3 blobs gaussianos bien separados en 2D, alineados por fila.""" rng = np.random.default_rng(seed) centers = [(0.0, 0.0), (12.0, 12.0), (0.0, 12.0)] xs: list[float] = [] ys: list[float] = [] for cx, cy in centers: pts = rng.normal(loc=(cx, cy), scale=0.4, size=(per_blob, 2)) xs.extend(float(p[0]) for p in pts) ys.extend(float(p[1]) for p in pts) return {"x": xs, "y": ys} def test_three_separated_blobs_finds_k3(): columns = _three_blobs(seed=0, per_blob=40) result = kmeans_segments(columns, k_min=2, k_max=8) assert result["best_k"] == 3 assert result["silhouette"] > 0.5 assert result["n_features"] == 2 assert result["n_rows_used"] == 120 assert sum(result["cluster_sizes"]) == 120 assert len(result["centers"]) == 3 # scores_by_k cubre todo el rango probado. ks = [s["k"] for s in result["scores_by_k"]] assert ks == list(range(2, 9)) def test_insufficient_rows_returns_note(): # Solo 3 filas válidas, k_min*2 = 4 -> insuficiente. columns = {"x": [1.0, 2.0, 3.0], "y": [1.0, 2.0, 3.0]} result = kmeans_segments(columns, k_min=2, k_max=8) assert result["best_k"] == 0 assert result["note"] == "datos insuficientes" def test_insufficient_numeric_columns_returns_note(): # Una sola columna numérica; la otra es texto -> menos de 2 numéricas. columns = { "x": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], "label": ["a", "b", "c", "d", "e", "f"], } result = kmeans_segments(columns, k_min=2, k_max=8) assert result["best_k"] == 0 assert result["note"] == "datos insuficientes" def test_rows_with_none_are_dropped(): columns = _three_blobs(seed=1, per_blob=40) # Inyectar None en una fila; debe descartarse, dejando 119. columns["x"][0] = None result = kmeans_segments(columns, k_min=2, k_max=8) assert result["best_k"] == 3 assert result["n_rows_used"] == 119