fix(eda): bugs de bajo riesgo del benchmark (H1,H5,H12,H13,H14) + tests faltantes
- H1: render_eda_markdown ya no aplica doble x100 a outlier_pct (336% -> real) - H5: profile_database filtra base_tables_only (excluye VIEWs; sakila 21->16) - H12: suggest_reexpression salta columnas no-continuas - H13: to_returns/profile_table elige retornos (financiera) vs diferencias (fisica) - H14: test de regresion ATTACH sqlite via information_schema - +8 tests de las funciones eda nuevas (acf_pacf, adf_kpss, ...). 77 tests verdes - L/M (H2,H3,H4,H6,H7,H8,H9,H10,H11) quedan en issues 0174-0177 para revision Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -78,6 +78,77 @@ def test_profile_database_two_related_tables():
|
||||
assert res["report_json_path"] is None
|
||||
|
||||
|
||||
def test_profile_database_excluye_views(tmp_path):
|
||||
# Regresión H5: una VIEW no es una tabla real. profile_database debe perfilar
|
||||
# solo las BASE TABLE y no contar las VIEWs (inflan n_tables y multiplican FK
|
||||
# falsas, al ser copias de columnas de las tablas base).
|
||||
db_path = os.path.join(str(tmp_path), "withviews.duckdb")
|
||||
_build_related_db(db_path)
|
||||
con = duckdb.connect(db_path)
|
||||
con.execute("CREATE VIEW customers_v AS SELECT id, name FROM customers")
|
||||
con.execute("CREATE VIEW orders_v AS SELECT order_id, total FROM orders")
|
||||
con.close()
|
||||
|
||||
res = profile_database(db_path, write_report=False)
|
||||
|
||||
assert res["status"] == "ok", res
|
||||
prof = res["db_profile"]
|
||||
# Solo las 2 tablas base; las 2 views quedan fuera.
|
||||
assert prof["n_tables"] == 2
|
||||
profiled = {tp["table"] for tp in prof["table_profiles"]}
|
||||
assert profiled == {"customers", "orders"}
|
||||
assert "customers_v" not in profiled
|
||||
assert "orders_v" not in profiled
|
||||
|
||||
|
||||
def test_profile_database_attach_sqlite_no_usa_sqlite_master(tmp_path):
|
||||
# Regresión H14: materializar una base SQLite vía ATTACH (information_schema,
|
||||
# no sqlite_master) y perfilarla con profile_database sin que falle. Blinda el
|
||||
# bug original 'sqlite_master does not exist'.
|
||||
import sqlite3
|
||||
|
||||
sqlite_path = os.path.join(str(tmp_path), "shop.sqlite")
|
||||
sconn = sqlite3.connect(sqlite_path)
|
||||
sconn.execute("CREATE TABLE customers (id INTEGER PRIMARY KEY, name TEXT)")
|
||||
sconn.execute("INSERT INTO customers VALUES (1,'Ana'),(2,'Luis'),(3,'Marta')")
|
||||
sconn.execute(
|
||||
"CREATE TABLE orders (order_id INTEGER, customer_id INTEGER, total REAL)"
|
||||
)
|
||||
sconn.execute(
|
||||
"INSERT INTO orders VALUES (10,1,99.5),(11,2,12.0),(12,3,7.25),(13,1,5.0)"
|
||||
)
|
||||
sconn.execute("CREATE VIEW big_orders AS SELECT * FROM orders WHERE total > 10")
|
||||
sconn.commit()
|
||||
sconn.close()
|
||||
|
||||
ddb_path = os.path.join(str(tmp_path), "shop_mat.duckdb")
|
||||
con = duckdb.connect(ddb_path)
|
||||
con.execute("INSTALL sqlite")
|
||||
con.execute("LOAD sqlite")
|
||||
con.execute(f"ATTACH '{sqlite_path}' AS src (TYPE sqlite)")
|
||||
rows = con.execute(
|
||||
"SELECT table_name FROM information_schema.tables "
|
||||
"WHERE table_catalog='src' AND table_type='BASE TABLE' "
|
||||
"AND table_name NOT LIKE 'sqlite_%'"
|
||||
).fetchall()
|
||||
for (name,) in rows:
|
||||
con.execute(f'CREATE TABLE "{name}" AS SELECT * FROM src."{name}"')
|
||||
con.execute("DETACH src")
|
||||
con.close()
|
||||
|
||||
res = profile_database(ddb_path, write_report=False)
|
||||
assert res["status"] == "ok", res
|
||||
prof = res["db_profile"]
|
||||
# Solo las 2 tablas base materializadas (la VIEW no se materializó).
|
||||
profiled = {tp["table"] for tp in prof["table_profiles"]}
|
||||
assert profiled == {"customers", "orders"}
|
||||
# FK orders.customer_id -> customers.id detectable.
|
||||
assert any(
|
||||
fk.get("from_table") == "orders" and fk.get("to_table") == "customers"
|
||||
for fk in prof["fk_candidates"]
|
||||
), prof["fk_candidates"]
|
||||
|
||||
|
||||
def test_profile_database_writes_report(tmp_path):
|
||||
db_path = os.path.join(str(tmp_path), "shop2.duckdb")
|
||||
_build_related_db(db_path)
|
||||
|
||||
Reference in New Issue
Block a user