"""Tests para profile_database — perfilado de una base DuckDB + relaciones.""" import os import sys import tempfile import duckdb sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) from pipelines.profile_database import profile_database def _build_related_db(path: str) -> None: """Crea una DuckDB con 2 tablas relacionadas: customers <- orders. customers.id es clave; orders.customer_id contiene solo ids de customers, de modo que orders.customer_id -> customers.id es una FK detectable por containment. """ conn = duckdb.connect(path) try: conn.execute( "CREATE TABLE customers (id INTEGER, name VARCHAR, city VARCHAR)" ) conn.execute( "INSERT INTO customers VALUES " "(1,'Ana','Madrid'),(2,'Luis','Sevilla')," "(3,'Marta','Bilbao'),(4,'Jon','Vigo')" ) conn.execute( "CREATE TABLE orders (order_id INTEGER, customer_id INTEGER, total DOUBLE)" ) conn.execute( "INSERT INTO orders VALUES " "(10,1,99.5),(11,1,12.0),(12,2,45.0)," "(13,3,7.25),(14,4,200.0),(15,2,33.3)" ) finally: conn.close() def test_profile_database_two_related_tables(): with tempfile.TemporaryDirectory() as d: db_path = os.path.join(d, "shop.duckdb") _build_related_db(db_path) res = profile_database(db_path, write_report=False) # status ok y dos tablas perfiladas assert res["status"] == "ok", res prof = res["db_profile"] assert prof["n_tables"] == 2 # los TableProfiles completos llegan para ambas tablas assert len(prof["table_profiles"]) == 2 profiled_tables = {tp["table"] for tp in prof["table_profiles"]} assert profiled_tables == {"customers", "orders"} # se detecta la relacion orders.customer_id -> customers.id fks = prof["fk_candidates"] assert any( fk.get("from_table") == "orders" and fk.get("from_col") == "customer_id" and fk.get("to_table") == "customers" and fk.get("to_col") == "id" for fk in fks ), fks # el join graph trae un diagrama mermaid graph = prof["join_graph"] assert "mermaid" in graph assert isinstance(graph["mermaid"], str) assert graph["mermaid"].startswith("graph LR") # no se reportan paths cuando write_report=False assert res["report_md_path"] is None assert res["report_json_path"] is None def test_profile_database_writes_report(tmp_path): db_path = os.path.join(str(tmp_path), "shop2.duckdb") _build_related_db(db_path) report_dir = os.path.join(str(tmp_path), "reports") res = profile_database(db_path, report_dir=report_dir, write_report=True) assert res["status"] == "ok", res assert res["report_md_path"] is not None assert res["report_json_path"] is not None assert os.path.exists(res["report_md_path"]) assert os.path.exists(res["report_json_path"]) md = open(res["report_md_path"], encoding="utf-8").read() assert "# EDA base —" in md assert "## Relaciones inter-tabla" in md assert "```mermaid" in md