"""Tests para render_eda_pdf. Importa el módulo directo (sys.path), igual que el resto de tests del grupo eda, para no depender del registro en __init__.py (lo añade el orquestador al integrar). """ import os import sys sys.path.insert(0, os.path.dirname(__file__)) from render_eda_pdf import ( render_eda_pdf, render_eda_pdf_relational, _models_pages, _series_pages, _caveats_pages, ) class _StubPdf: """Captura pdf.savefig sin escribir nada — para testear builders aislados.""" def __init__(self): self.figs = 0 def savefig(self, fig): self.figs += 1 def _synthetic_profile() -> dict: """TableProfile sintético mínimo: 2 numéricas + 1 categórica + overview.""" return { "table": "ventas", "source": "data/ventas.csv", "profiled_at": "2026-06-28 10:00 UTC", "n_rows": 1000, "n_cols": 3, "null_cell_pct": 0.02, "duplicate_rows": 5, "duplicate_pct": 0.005, "quality_score": 92.5, "type_breakdown": {"numeric": 2, "categorical": 1}, "key_candidates": ["id"], "columns": [ { "name": "precio", "inferred_type": "numeric", "semantic_type": "currency", "null_pct": 0.0, "distinct_count": 850, "unique_pct": 0.85, "quality_score": 95.0, "flags": [], "numeric": { "min": 1.0, "max": 100.0, "median": 40.0, "mean": 42.5, "std": 12.3, "p25": 30.0, "p75": 55.0, "outlier_pct": 1.2, "distribution_type": "right-skewed", "histogram": [ {"lo": 0.0, "hi": 25.0, "count": 100}, {"lo": 25.0, "hi": 50.0, "count": 500}, {"lo": 50.0, "hi": 75.0, "count": 300}, {"lo": 75.0, "hi": 100.0, "count": 50}, ], }, }, { "name": "unidades", "inferred_type": "numeric", "semantic_type": "integer", "null_pct": 0.01, "distinct_count": 40, "unique_pct": 0.04, "quality_score": 88.0, "flags": ["has_nulls"], "numeric": { "min": 1.0, "max": 12.0, "median": 4.0, "mean": 4.8, "std": 2.1, "outlier_pct": 0.0, "distribution_type": "normal", "histogram": [ {"lo": 1.0, "hi": 4.0, "count": 400}, {"lo": 4.0, "hi": 8.0, "count": 450}, {"lo": 8.0, "hi": 12.0, "count": 150}, ], }, }, { "name": "categoria", "inferred_type": "categorical", "semantic_type": "", "null_pct": 0.0, "distinct_count": 3, "unique_pct": 0.003, "quality_score": 99.0, "flags": [], "categorical": { "entropy": 1.05, "top": [ {"value": "neumaticos", "count": 500, "pct": 0.5}, {"value": "aceite", "count": 300, "pct": 0.3}, {"value": "filtros", "count": 200, "pct": 0.2}, ], }, }, ], "correlations": { "pairs": [ {"a": "precio", "b": "unidades", "value": -0.42, "method": "pearson"}, ], }, } def test_golden_genera_pdf_multipagina(tmp_path): """Caso real: profile completo -> PDF existe, pesa >0 y tiene varias páginas.""" out = str(tmp_path / "eda_ventas.pdf") res = render_eda_pdf(_synthetic_profile(), out, title="EDA — ventas") assert isinstance(res, dict) assert set(res.keys()) == {"pdf_path", "n_pages", "note"} assert res["pdf_path"] == out assert os.path.exists(out) assert os.path.getsize(out) > 0 # Cover + overview + numéricas + categóricas + calidad + correlaciones >= 5. assert res["n_pages"] >= 5 # Cabecera de archivo PDF. with open(out, "rb") as fh: assert fh.read(4) == b"%PDF" def test_edge_profile_vacio_no_revienta(tmp_path): """Edge: dict vacío -> 1 página garantizada, sin excepción.""" out = str(tmp_path / "vacio.pdf") res = render_eda_pdf({}, out) assert os.path.exists(out) assert os.path.getsize(out) > 0 assert res["n_pages"] >= 1 assert res["pdf_path"] == out def test_edge_profile_none_no_revienta(tmp_path): """Edge: None -> tratado como vacío, 1 página, sin excepción.""" out = str(tmp_path / "none.pdf") res = render_eda_pdf(None, out) assert os.path.exists(out) assert res["n_pages"] >= 1 def test_edge_solo_numericas(tmp_path): """Edge: profile sólo con columnas numéricas (sin categóricas ni corr).""" prof = { "table": "t", "n_rows": 10, "n_cols": 1, "columns": [ { "name": "x", "inferred_type": "numeric", "quality_score": 80.0, "numeric": { "median": 2.0, "mean": 2.0, "histogram": [{"lo": 0.0, "hi": 4.0, "count": 10}], }, }, ], } out = str(tmp_path / "num.pdf") res = render_eda_pdf(prof, out) assert os.path.exists(out) assert res["n_pages"] >= 2 # cover + numéricas al menos. def test_forward_compat_seccion_desconocida(tmp_path): """Error/forward-compat: un bloque nuevo del profile se vuelca, no rompe.""" prof = { "table": "t", "n_rows": 5, "columns": [], # Bloques que este renderer no conoce (otros agentes los añaden): "models": {"kmeans": {"k": 3, "silhouette": 0.55}}, "caveats": ["muestra pequeña", "fechas como texto"], } out = str(tmp_path / "fwd.pdf") res = render_eda_pdf(prof, out) assert os.path.exists(out) assert res["n_pages"] >= 1 # No se perdió ninguna sección por error. assert "omitida" not in res["note"] # --------------------------------------------------------------------------- # # H4: builders dedicados para models / series / caveats (antes caían al volcado # genérico como str(dict) truncado). Se testean aislados con un stub de pdf. # --------------------------------------------------------------------------- # def _sample_models() -> dict: return { "n_numeric_cols": 3, "pca": { "n_components": 2, "n_rows_used": 1000, "n_features": 3, "explained_variance_ratio": [0.62, 0.21], "cumulative": [0.62, 0.83], "top_loadings": [ {"component": 0, "feature": "precio", "loading": 0.71}, {"component": 1, "feature": "unidades", "loading": -0.55}, ], }, "kmeans": { "best_k": 3, "silhouette": 0.48, "cluster_sizes": [500, 300, 200], "scores_by_k": [{"k": 3, "silhouette": 0.48, "inertia": 900.0}], }, "outliers": {"n_outliers": 35, "outlier_pct": 3.5, "threshold": -0.51}, "normality": {"precio": {"jarque_bera": {"p": 0.0001}, "is_normal": False}}, "note": "", } def _sample_series() -> dict: return { "precio": { "stationarity": {"verdict": "non_stationary"}, "acf_pacf": {"is_autocorrelated": True}, "stl": {"trend_strength": 0.95, "seasonal_strength": 0.10, "period": 7}, "levels_suggested": True, "levels_kind": "returns", }, } def _sample_caveats() -> dict: return { "n": 1, "caveats": [ {"id": "exploratory_nature", "topic": "naturaleza exploratoria", "message": "El EDA genera hipótesis, no conclusiones."}, ], } def test_models_builder_produces_pages(): pdf = _StubPdf() assert _models_pages(pdf, _sample_models()) >= 1 assert pdf.figs >= 1 def test_series_builder_produces_pages(): pdf = _StubPdf() assert _series_pages(pdf, _sample_series()) >= 1 assert pdf.figs >= 1 def test_caveats_builder_produces_pages(): pdf = _StubPdf() assert _caveats_pages(pdf, _sample_caveats()) >= 1 assert pdf.figs >= 1 def test_builders_tolerate_none_and_empty(): pdf = _StubPdf() # None / vacío -> 0 páginas, sin excepción. assert _models_pages(pdf, None) == 0 assert _series_pages(pdf, {}) == 0 assert _caveats_pages(pdf, None) == 0 assert pdf.figs == 0 def test_models_series_caveats_no_caen_al_generico(tmp_path): # Con builder dedicado, models/series/caveats NO se vuelcan en "Otras # secciones" (genérico). El profile completo se renderiza sin error. prof = _synthetic_profile() prof["models"] = _sample_models() prof["series"] = _sample_series() prof["caveats"] = _sample_caveats() out = str(tmp_path / "full.pdf") res = render_eda_pdf(prof, out) assert os.path.exists(out) assert os.path.getsize(out) > 0 assert "omitida" not in res["note"] # Cover+overview+num+cat+calidad+corr + models + series + caveats. assert res["n_pages"] >= 8 # --------------------------------------------------------------------------- # # H9: render_eda_pdf_relational — PDF DB-level (resumen de tablas + join graph). # --------------------------------------------------------------------------- # def _synthetic_db_profile() -> dict: return { "db_path": "data/shop.duckdb", "profiled_at": "2026-06-29 01:00 UTC", "n_tables": 2, "tables": [ {"table": "customers", "n_rows": 4, "n_cols": 3, "quality_score": 98.0, "key_candidates": ["id"]}, {"table": "orders", "n_rows": 6, "n_cols": 3, "quality_score": 95.0, "key_candidates": ["order_id"]}, ], "fk_candidates": [ {"from_table": "orders", "from_col": "customer_id", "to_table": "customers", "to_col": "id", "inclusion": 1.0, "cardinality": "N:1"}, ], "join_graph": {"mermaid": "graph LR\n orders --> customers"}, } def test_relational_golden_genera_pdf(tmp_path): out = str(tmp_path / "eda_db.pdf") res = render_eda_pdf_relational(_synthetic_db_profile(), out, title="EDA base") assert isinstance(res, dict) assert set(res.keys()) == {"pdf_path", "n_pages", "note"} assert res["pdf_path"] == out assert os.path.exists(out) assert os.path.getsize(out) > 0 # cover + tablas + relaciones >= 3. assert res["n_pages"] >= 3 with open(out, "rb") as fh: assert fh.read(4) == b"%PDF" def test_relational_edge_vacio_no_revienta(tmp_path): out = str(tmp_path / "db_vacio.pdf") res = render_eda_pdf_relational({}, out) assert os.path.exists(out) assert res["n_pages"] >= 1 def test_relational_edge_none_no_revienta(tmp_path): out = str(tmp_path / "db_none.pdf") res = render_eda_pdf_relational(None, out) assert os.path.exists(out) assert res["n_pages"] >= 1