"""Tests para missingness_row_patterns.""" import os import sys sys.path.insert(0, os.path.dirname(__file__)) from missingness_row_patterns import missingness_row_patterns _EXPECTED_KEYS = {"n_rows", "n_patterns", "complete_rows", "patterns"} def test_patron_dominante_completas_singleton(): """Golden: {A,B} co-faltan en 4 filas + 5 filas completas + 1 singleton {C}.""" # 10 filas. A y B faltan juntas en las filas 0-3; filas 4-8 completas; # la fila 9 solo le falta C. null_mask = { "A": [1, 1, 1, 1, 0, 0, 0, 0, 0, 0], "B": [1, 1, 1, 1, 0, 0, 0, 0, 0, 0], "C": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], } out = missingness_row_patterns(null_mask) assert set(out.keys()) == _EXPECTED_KEYS assert out["n_rows"] == 10 # 3 patrones distintos: (A,B), () y (C,). assert out["n_patterns"] == 3 # 5 filas completas (filas 4-8). assert out["complete_rows"] == 5 # Orden: n_rows desc; desempate menos columnas primero. # () tiene 5 filas, (A,B) 4, (C,) 1. pats = out["patterns"] assert len(pats) == 3 assert pats[0]["missing_cols"] == [] assert pats[0]["n_rows"] == 5 assert pats[0]["pct"] == 50.0 assert pats[1]["missing_cols"] == ["A", "B"] assert pats[1]["n_rows"] == 4 assert pats[1]["pct"] == 40.0 assert pats[2]["missing_cols"] == ["C"] assert pats[2]["n_rows"] == 1 assert pats[2]["pct"] == 10.0 # Tipos de salida. assert isinstance(out["n_rows"], int) assert isinstance(pats[0]["pct"], float) def test_mask_vacio(): """{} -> n_rows 0, sin patrones, nunca lanza.""" out = missingness_row_patterns({}) assert out == { "n_rows": 0, "n_patterns": 0, "complete_rows": 0, "patterns": [], } # No dict / None tambien degradan a vacio sin lanzar. assert missingness_row_patterns(None)["n_rows"] == 0 # Columnas presentes pero listas vacias -> n_rows 0. assert missingness_row_patterns({"A": [], "B": []})["patterns"] == [] def test_top_n_trunca_pero_cuenta_todos(): """top_n limita `patterns`, pero n_patterns reporta TODOS los distintos.""" null_mask = { "A": [0, 1, 1, 0, 1], "B": [0, 0, 0, 1, 1], "C": [0, 0, 0, 0, 1], } # Filas: () (A,) (A,) (B,) (A,B,C) out = missingness_row_patterns(null_mask, top_n=2) assert out["n_rows"] == 5 assert out["n_patterns"] == 4 # (), (A,), (B,), (A,B,C) assert out["complete_rows"] == 1 # Solo 2 patrones devueltos pese a haber 4. assert len(out["patterns"]) == 2 # (A,) domina con 2 filas; desempate del 2o entre los de 1 fila -> () (0 cols). assert out["patterns"][0]["missing_cols"] == ["A"] assert out["patterns"][0]["n_rows"] == 2 assert out["patterns"][1]["missing_cols"] == [] assert out["patterns"][1]["n_rows"] == 1