"""Tests para profile_table — pipeline EDA one-shot del grupo `eda`. Crea una DuckDB temporal con tres columnas representativas: - id_str: enteros guardados como VARCHAR ('10','20',...) -> debe promocionarse a inferred_type "numeric" y recibir un bloque col["numeric"]. - precio: numerica nativa (DOUBLE). - categoria: categorica textual. Luego corre profile_table(write_report=False) y verifica el contrato. """ import os import tempfile import duckdb from pipelines.profile_table import profile_table def _make_db() -> str: """Crea una DuckDB temporal con la tabla de prueba y devuelve su path.""" tmp_dir = tempfile.mkdtemp(prefix="profile_table_test_") db_path = os.path.join(tmp_dir, "t.duckdb") con = duckdb.connect(db_path) con.execute( "CREATE TABLE items (" " id_str VARCHAR," # enteros guardados como texto " precio DOUBLE," # numerica nativa " categoria VARCHAR" # categorica ")" ) rows = [ ("10", 9.5, "alfa"), ("20", 12.0, "beta"), ("30", 7.25, "alfa"), ("40", 15.75, "gamma"), ("50", 3.0, "beta"), ("60", 22.4, "alfa"), ] con.executemany("INSERT INTO items VALUES (?, ?, ?)", rows) con.close() return db_path def _col(profile: dict, name: str) -> dict: return next(c for c in profile["columns"] if c["name"] == name) def test_varchar_integer_promotes_to_numeric(): db_path = _make_db() r = profile_table(db_path, "items", sample=5000, write_report=False) # status ok y sin tocar disco. assert r["status"] == "ok", r assert r["report_md_path"] is None assert r["report_json_path"] is None prof = r["profile"] # La columna VARCHAR-entera se promociono a numeric con bloque numeric. id_col = _col(prof, "id_str") assert id_col["inferred_type"] == "numeric", id_col["inferred_type"] assert id_col["numeric"] is not None assert id_col["numeric"]["min"] == 10.0 assert id_col["numeric"]["max"] == 60.0 # La numerica nativa sigue siendo numeric con su bloque. precio_col = _col(prof, "precio") assert precio_col["inferred_type"] == "numeric" assert precio_col["numeric"] is not None # La categorica recibe su bloque categorical. cat_col = _col(prof, "categoria") assert cat_col["inferred_type"] in ("categorical", "text") assert cat_col["categorical"] is not None assert cat_col["categorical"]["mode"] == "alfa" # key_candidates es una lista; quality_score existe (tabla y columnas). assert isinstance(prof["key_candidates"], list) assert prof["quality_score"] is not None assert id_col["quality_score"] is not None # type_breakdown recalculado refleja la promocion (>=2 numeric). assert prof["type_breakdown"]["numeric"] >= 2