"""Tests para render_eda_pdf. Importa el módulo directo (sys.path), igual que el resto de tests del grupo eda, para no depender del registro en __init__.py (lo añade el orquestador al integrar). """ import os import sys sys.path.insert(0, os.path.dirname(__file__)) from render_eda_pdf import render_eda_pdf def _synthetic_profile() -> dict: """TableProfile sintético mínimo: 2 numéricas + 1 categórica + overview.""" return { "table": "ventas", "source": "data/ventas.csv", "profiled_at": "2026-06-28 10:00 UTC", "n_rows": 1000, "n_cols": 3, "null_cell_pct": 0.02, "duplicate_rows": 5, "duplicate_pct": 0.005, "quality_score": 92.5, "type_breakdown": {"numeric": 2, "categorical": 1}, "key_candidates": ["id"], "columns": [ { "name": "precio", "inferred_type": "numeric", "semantic_type": "currency", "null_pct": 0.0, "distinct_count": 850, "unique_pct": 0.85, "quality_score": 95.0, "flags": [], "numeric": { "min": 1.0, "max": 100.0, "median": 40.0, "mean": 42.5, "std": 12.3, "p25": 30.0, "p75": 55.0, "outlier_pct": 1.2, "distribution_type": "right-skewed", "histogram": [ {"lo": 0.0, "hi": 25.0, "count": 100}, {"lo": 25.0, "hi": 50.0, "count": 500}, {"lo": 50.0, "hi": 75.0, "count": 300}, {"lo": 75.0, "hi": 100.0, "count": 50}, ], }, }, { "name": "unidades", "inferred_type": "numeric", "semantic_type": "integer", "null_pct": 0.01, "distinct_count": 40, "unique_pct": 0.04, "quality_score": 88.0, "flags": ["has_nulls"], "numeric": { "min": 1.0, "max": 12.0, "median": 4.0, "mean": 4.8, "std": 2.1, "outlier_pct": 0.0, "distribution_type": "normal", "histogram": [ {"lo": 1.0, "hi": 4.0, "count": 400}, {"lo": 4.0, "hi": 8.0, "count": 450}, {"lo": 8.0, "hi": 12.0, "count": 150}, ], }, }, { "name": "categoria", "inferred_type": "categorical", "semantic_type": "", "null_pct": 0.0, "distinct_count": 3, "unique_pct": 0.003, "quality_score": 99.0, "flags": [], "categorical": { "entropy": 1.05, "top": [ {"value": "neumaticos", "count": 500, "pct": 0.5}, {"value": "aceite", "count": 300, "pct": 0.3}, {"value": "filtros", "count": 200, "pct": 0.2}, ], }, }, ], "correlations": { "pairs": [ {"a": "precio", "b": "unidades", "value": -0.42, "method": "pearson"}, ], }, } def test_golden_genera_pdf_multipagina(tmp_path): """Caso real: profile completo -> PDF existe, pesa >0 y tiene varias páginas.""" out = str(tmp_path / "eda_ventas.pdf") res = render_eda_pdf(_synthetic_profile(), out, title="EDA — ventas") assert isinstance(res, dict) assert set(res.keys()) == {"pdf_path", "n_pages", "note"} assert res["pdf_path"] == out assert os.path.exists(out) assert os.path.getsize(out) > 0 # Cover + overview + numéricas + categóricas + calidad + correlaciones >= 5. assert res["n_pages"] >= 5 # Cabecera de archivo PDF. with open(out, "rb") as fh: assert fh.read(4) == b"%PDF" def test_edge_profile_vacio_no_revienta(tmp_path): """Edge: dict vacío -> 1 página garantizada, sin excepción.""" out = str(tmp_path / "vacio.pdf") res = render_eda_pdf({}, out) assert os.path.exists(out) assert os.path.getsize(out) > 0 assert res["n_pages"] >= 1 assert res["pdf_path"] == out def test_edge_profile_none_no_revienta(tmp_path): """Edge: None -> tratado como vacío, 1 página, sin excepción.""" out = str(tmp_path / "none.pdf") res = render_eda_pdf(None, out) assert os.path.exists(out) assert res["n_pages"] >= 1 def test_edge_solo_numericas(tmp_path): """Edge: profile sólo con columnas numéricas (sin categóricas ni corr).""" prof = { "table": "t", "n_rows": 10, "n_cols": 1, "columns": [ { "name": "x", "inferred_type": "numeric", "quality_score": 80.0, "numeric": { "median": 2.0, "mean": 2.0, "histogram": [{"lo": 0.0, "hi": 4.0, "count": 10}], }, }, ], } out = str(tmp_path / "num.pdf") res = render_eda_pdf(prof, out) assert os.path.exists(out) assert res["n_pages"] >= 2 # cover + numéricas al menos. def test_forward_compat_seccion_desconocida(tmp_path): """Error/forward-compat: un bloque nuevo del profile se vuelca, no rompe.""" prof = { "table": "t", "n_rows": 5, "columns": [], # Bloques que este renderer no conoce (otros agentes los añaden): "models": {"kmeans": {"k": 3, "silhouette": 0.55}}, "caveats": ["muestra pequeña", "fechas como texto"], } out = str(tmp_path / "fwd.pdf") res = render_eda_pdf(prof, out) assert os.path.exists(out) assert res["n_pages"] >= 1 # No se perdió ninguna sección por error. assert "omitida" not in res["note"]