"""Tests para extract_null_mask. No usa DuckDB real: inyecta un query_fn FAKE (closure) que devuelve filas predefinidas (simulando el SELECT de bits 0/1) y, opcionalmente, captura el SQL recibido para verificar la query generada (CASE WHEN ... IS NULL + LIMIT). Asi el test es autocontenido y no depende de ningun backend. """ import os import sys sys.path.insert(0, os.path.dirname(__file__)) from extract_null_mask import extract_null_mask def _fake_query(rows, captured=None, status="ok", error=None): """Crea un query_fn FAKE. `captured` (lista opcional) recibe el SQL ejecutado para poder inspeccionarlo. `status`/`error` permiten simular un fallo del backend. """ def _q(sql): if captured is not None: captured.append(sql) if status != "ok": return {"status": "error", "error": error or "boom"} return {"status": "ok", "rows": rows} return _q def test_golden_mask_alineada(): """Golden: mask 0/1 por columna alineada por fila, n correcto, status ok.""" # Cada fila simula el SELECT (CASE WHEN col IS NULL THEN 1 ELSE 0 END) AS col. rows = [ {"email": 0, "telefono": 1, "edad": 0}, {"email": 0, "telefono": 0, "edad": 1}, {"email": 1, "telefono": 1, "edad": 0}, ] res = extract_null_mask(_fake_query(rows), "clientes", ["email", "telefono", "edad"]) assert res["status"] == "ok" assert res["table"] == "clientes" assert res["columns"] == ["email", "telefono", "edad"] assert res["n"] == 3 assert res["mask"]["email"] == [0, 0, 1] assert res["mask"]["telefono"] == [1, 0, 1] assert res["mask"]["edad"] == [0, 1, 0] # Todas las listas con la misma longitud. assert all(len(v) == res["n"] for v in res["mask"].values()) def test_celda_none_cuenta_como_falta(): """Una celda None se cuenta defensivamente como 1 (falta).""" rows = [ {"email": 0, "telefono": None}, {"email": None, "telefono": 1}, {"email": 1, "telefono": 0}, ] res = extract_null_mask(_fake_query(rows), "clientes", ["email", "telefono"]) assert res["status"] == "ok" assert res["mask"]["email"] == [0, 1, 1] assert res["mask"]["telefono"] == [1, 1, 0] assert res["n"] == 3 def test_columns_vacia_status_error(): """columns vacia -> status error con columns/mask/n vacios.""" res = extract_null_mask(_fake_query([]), "clientes", []) assert res["status"] == "error" assert "columns" in res["error"] assert res["table"] == "clientes" assert res["columns"] == [] assert res["mask"] == {} assert res["n"] == 0 def test_query_fn_status_error_propaga(): """query_fn que devuelve status != ok -> se propaga como error, mask {}.""" res = extract_null_mask( _fake_query([], status="error", error="db locked"), "clientes", ["email"], ) assert res["status"] == "error" assert "db locked" in res["error"] assert res["mask"] == {} assert res["n"] == 0 def test_query_fn_none_da_error_sin_reventar(): """query_fn None -> error degradado, sin excepcion.""" res = extract_null_mask(None, "clientes", ["email"]) assert res["status"] == "error" assert res["columns"] == [] assert res["mask"] == {} assert res["n"] == 0 def test_sql_contiene_case_y_limit(): """La query genera un CASE WHEN IS NULL por columna escapada + LIMIT sobre la tabla.""" captured = [] rows = [{"email": 0}] extract_null_mask( _fake_query(rows, captured), "clientes_tbl", ["email"], max_rows=123, ) assert len(captured) == 1 sql = captured[0] assert 'CASE WHEN "email" IS NULL THEN 1 ELSE 0 END' in sql assert 'AS "email"' in sql assert 'FROM "clientes_tbl"' in sql assert "LIMIT 123" in sql