Files
fn_registry/python/functions/pipelines/profile_table_test.py
T
egutierrez 763e06c127 feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00

84 lines
2.8 KiB
Python

"""Tests para profile_table — pipeline EDA one-shot del grupo `eda`.
Crea una DuckDB temporal con tres columnas representativas:
- id_str: enteros guardados como VARCHAR ('10','20',...) -> debe promocionarse
a inferred_type "numeric" y recibir un bloque col["numeric"].
- precio: numerica nativa (DOUBLE).
- categoria: categorica textual.
Luego corre profile_table(write_report=False) y verifica el contrato.
"""
import os
import tempfile
import duckdb
from pipelines.profile_table import profile_table
def _make_db() -> str:
"""Crea una DuckDB temporal con la tabla de prueba y devuelve su path."""
tmp_dir = tempfile.mkdtemp(prefix="profile_table_test_")
db_path = os.path.join(tmp_dir, "t.duckdb")
con = duckdb.connect(db_path)
con.execute(
"CREATE TABLE items ("
" id_str VARCHAR," # enteros guardados como texto
" precio DOUBLE," # numerica nativa
" categoria VARCHAR" # categorica
")"
)
rows = [
("10", 9.5, "alfa"),
("20", 12.0, "beta"),
("30", 7.25, "alfa"),
("40", 15.75, "gamma"),
("50", 3.0, "beta"),
("60", 22.4, "alfa"),
]
con.executemany("INSERT INTO items VALUES (?, ?, ?)", rows)
con.close()
return db_path
def _col(profile: dict, name: str) -> dict:
return next(c for c in profile["columns"] if c["name"] == name)
def test_varchar_integer_promotes_to_numeric():
db_path = _make_db()
r = profile_table(db_path, "items", sample=5000, write_report=False)
# status ok y sin tocar disco.
assert r["status"] == "ok", r
assert r["report_md_path"] is None
assert r["report_json_path"] is None
prof = r["profile"]
# La columna VARCHAR-entera se promociono a numeric con bloque numeric.
id_col = _col(prof, "id_str")
assert id_col["inferred_type"] == "numeric", id_col["inferred_type"]
assert id_col["numeric"] is not None
assert id_col["numeric"]["min"] == 10.0
assert id_col["numeric"]["max"] == 60.0
# La numerica nativa sigue siendo numeric con su bloque.
precio_col = _col(prof, "precio")
assert precio_col["inferred_type"] == "numeric"
assert precio_col["numeric"] is not None
# La categorica recibe su bloque categorical.
cat_col = _col(prof, "categoria")
assert cat_col["inferred_type"] in ("categorical", "text")
assert cat_col["categorical"] is not None
assert cat_col["categorical"]["mode"] == "alfa"
# key_candidates es una lista; quality_score existe (tabla y columnas).
assert isinstance(prof["key_candidates"], list)
assert prof["quality_score"] is not None
assert id_col["quality_score"] is not None
# type_breakdown recalculado refleja la promocion (>=2 numeric).
assert prof["type_breakdown"]["numeric"] >= 2