feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
"""Tests para profile_database — perfilado de una base DuckDB + relaciones."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import duckdb
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from pipelines.profile_database import profile_database
|
||||
|
||||
|
||||
def _build_related_db(path: str) -> None:
|
||||
"""Crea una DuckDB con 2 tablas relacionadas: customers <- orders.
|
||||
|
||||
customers.id es clave; orders.customer_id contiene solo ids de customers,
|
||||
de modo que orders.customer_id -> customers.id es una FK detectable por
|
||||
containment.
|
||||
"""
|
||||
conn = duckdb.connect(path)
|
||||
try:
|
||||
conn.execute(
|
||||
"CREATE TABLE customers (id INTEGER, name VARCHAR, city VARCHAR)"
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO customers VALUES "
|
||||
"(1,'Ana','Madrid'),(2,'Luis','Sevilla'),"
|
||||
"(3,'Marta','Bilbao'),(4,'Jon','Vigo')"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE TABLE orders (order_id INTEGER, customer_id INTEGER, total DOUBLE)"
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO orders VALUES "
|
||||
"(10,1,99.5),(11,1,12.0),(12,2,45.0),"
|
||||
"(13,3,7.25),(14,4,200.0),(15,2,33.3)"
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_profile_database_two_related_tables():
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
db_path = os.path.join(d, "shop.duckdb")
|
||||
_build_related_db(db_path)
|
||||
|
||||
res = profile_database(db_path, write_report=False)
|
||||
|
||||
# status ok y dos tablas perfiladas
|
||||
assert res["status"] == "ok", res
|
||||
prof = res["db_profile"]
|
||||
assert prof["n_tables"] == 2
|
||||
|
||||
# los TableProfiles completos llegan para ambas tablas
|
||||
assert len(prof["table_profiles"]) == 2
|
||||
profiled_tables = {tp["table"] for tp in prof["table_profiles"]}
|
||||
assert profiled_tables == {"customers", "orders"}
|
||||
|
||||
# se detecta la relacion orders.customer_id -> customers.id
|
||||
fks = prof["fk_candidates"]
|
||||
assert any(
|
||||
fk.get("from_table") == "orders"
|
||||
and fk.get("from_col") == "customer_id"
|
||||
and fk.get("to_table") == "customers"
|
||||
and fk.get("to_col") == "id"
|
||||
for fk in fks
|
||||
), fks
|
||||
|
||||
# el join graph trae un diagrama mermaid
|
||||
graph = prof["join_graph"]
|
||||
assert "mermaid" in graph
|
||||
assert isinstance(graph["mermaid"], str)
|
||||
assert graph["mermaid"].startswith("graph LR")
|
||||
|
||||
# no se reportan paths cuando write_report=False
|
||||
assert res["report_md_path"] is None
|
||||
assert res["report_json_path"] is None
|
||||
|
||||
|
||||
def test_profile_database_writes_report(tmp_path):
|
||||
db_path = os.path.join(str(tmp_path), "shop2.duckdb")
|
||||
_build_related_db(db_path)
|
||||
report_dir = os.path.join(str(tmp_path), "reports")
|
||||
|
||||
res = profile_database(db_path, report_dir=report_dir, write_report=True)
|
||||
|
||||
assert res["status"] == "ok", res
|
||||
assert res["report_md_path"] is not None
|
||||
assert res["report_json_path"] is not None
|
||||
assert os.path.exists(res["report_md_path"])
|
||||
assert os.path.exists(res["report_json_path"])
|
||||
md = open(res["report_md_path"], encoding="utf-8").read()
|
||||
assert "# EDA base —" in md
|
||||
assert "## Relaciones inter-tabla" in md
|
||||
assert "```mermaid" in md
|
||||
Reference in New Issue
Block a user