"""Test del pipeline render_automatic_eda — EDA completo a PDF + PPTX. Self-contained: crea un DuckDB temporal pequeño con categóricas + fecha + lat/lon + varias numéricas, corre el pipeline (sin LLM) y verifica que emite PDF y PPTX con páginas/slides, manifest, y que los capítulos dependientes de ctx quedan POBLADOS (sin la nota de degradación). """ import os import sys _HERE = os.path.dirname(os.path.abspath(__file__)) _FUNCTIONS = os.path.abspath(os.path.join(_HERE, "..", "..")) # python/functions if _FUNCTIONS not in sys.path: sys.path.insert(0, _FUNCTIONS) import duckdb # noqa: E402 from pipelines.render_automatic_eda import render_automatic_eda # noqa: E402 def _make_db(path): con = duckdb.connect(path) con.execute( "CREATE TABLE sales (d DATE, region VARCHAR, channel VARCHAR, " "amount DOUBLE, units INTEGER, lat DOUBLE, lon DOUBLE)" ) from datetime import date, timedelta regions = ["norte", "sur", "este"] channels = ["web", "tienda"] centers = {"norte": (43.0, -3.0), "sur": (37.0, -5.0), "este": (39.5, -0.4)} rows = [] d0 = date(2024, 1, 1) for i in range(180): r = regions[i % 3] ch = channels[i % 2] clat, clon = centers[r] rows.append(( d0 + timedelta(days=i), r, ch, round(100 + (i % 7) * 13.5 + (5 if ch == "web" else 0), 2), 10 + (i % 5), round(clat + (i % 3) * 0.1, 4), round(clon + (i % 4) * 0.1, 4), )) con.executemany("INSERT INTO sales VALUES (?,?,?,?,?,?,?)", rows) con.close() def test_pipeline_emits_pdf_and_pptx_with_chapters(tmp_path): db = str(tmp_path / "sales.duckdb") _make_db(db) out = str(tmp_path / "out") r = render_automatic_eda(db, "sales", run_models=True, run_series=True, run_llm=False, out_dir=out, basename="test_sales") assert r["status"] == "ok", r.get("error") # Both formats produced. assert r["pdf_path"] and os.path.exists(r["pdf_path"]) assert r["pptx_path"] and os.path.exists(r["pptx_path"]) assert (r["n_pages"] or 0) > 0 assert (r["n_slides"] or 0) > 0 # Per-chapter manifest written next to the output. assert r["manifest_path"] and os.path.exists(r["manifest_path"]) def test_pipeline_chapters_populated_not_degraded(tmp_path): """The 4 ctx-dependent chapters build with real data (no degradation note).""" import json db = str(tmp_path / "sales.duckdb") _make_db(db) out = str(tmp_path / "out") r = render_automatic_eda(db, "sales", run_models=True, run_series=True, run_llm=False, out_dir=out, basename="t2") assert r["status"] == "ok" # The manifest lists the ctx-dependent chapters as actually rendered. with open(r["manifest_path"], encoding="utf-8") as fh: man = json.load(fh) chapters = man.get("chapters") or {} for cid in ("modelos", "timeseries", "geospatial", "agregacion"): assert cid in chapters, f"capítulo {cid} ausente del manifest: {list(chapters)}" def test_pipeline_bad_db_degrades_without_raising(tmp_path): r = render_automatic_eda(str(tmp_path / "nope.duckdb"), "ghost", out_dir=str(tmp_path / "o")) assert r["status"] == "error" assert "error" in r