"""Tests para build_eda_notebook. No ejecuta el notebook generado: solo valida que el .ipynb se escribe como JSON nbformat v4 valido y que las celdas opcionales (modelos / LLM) aparecen segun los flags. La validacion del contenido se hace sobre el dict deserializado. """ import json import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) from functions.datascience.build_eda_notebook import build_eda_notebook def _load(path: str) -> dict: with open(path, "r", encoding="utf-8") as f: return json.load(f) def test_genera_notebook_ok(tmp_path): out = str(tmp_path / "eda.ipynb") r = build_eda_notebook("/tmp/x.duckdb", "ventas", out) assert r["status"] == "ok" assert r["notebook_path"] == out assert os.path.exists(out) assert r["n_cells"] >= 1 def test_notebook_es_json_nbformat_valido(tmp_path): out = str(tmp_path / "eda.ipynb") r = build_eda_notebook("/tmp/x.duckdb", "ventas", out) assert r["status"] == "ok" nb = _load(out) assert nb["nbformat"] == 4 assert isinstance(nb.get("cells"), list) assert len(nb["cells"]) > 0 # Cada celda tiene cell_type valido. for cell in nb["cells"]: assert cell["cell_type"] in ("code", "markdown") # n_cells coincide con las celdas del archivo. assert r["n_cells"] == len(nb["cells"]) # El titulo referencia la tabla. assert any( c["cell_type"] == "markdown" and "ventas" in "".join(c["source"]) for c in nb["cells"] ) def test_run_models_anade_celda_de_modelos(tmp_path): out = str(tmp_path / "eda.ipynb") base = build_eda_notebook("/tmp/x.duckdb", "ventas", out, run_models=False) out2 = str(tmp_path / "eda_models.ipynb") r = build_eda_notebook("/tmp/x.duckdb", "ventas", out2, run_models=True) assert r["status"] == "ok" nb = _load(out2) sources = "".join("".join(c["source"]) for c in nb["cells"]) assert "models" in sources assert "explained_variance_ratio" in sources assert "best_k" in sources assert "n_outliers" in sources # run_models=True aƱade celdas respecto al base. assert r["n_cells"] > base["n_cells"] # profile_table dentro del notebook usa run_models=True. assert "run_models=True" in sources def test_run_llm_anade_celda_de_insights(tmp_path): out = str(tmp_path / "eda_llm.ipynb") r = build_eda_notebook("/tmp/x.duckdb", "ventas", out, run_llm=True) assert r["status"] == "ok" nb = _load(out) sources = "".join("".join(c["source"]) for c in nb["cells"]) assert "eda_llm_insights" in sources def test_sin_flags_no_anade_celdas_opcionales(tmp_path): out = str(tmp_path / "eda_plain.ipynb") r = build_eda_notebook("/tmp/x.duckdb", "ventas", out) assert r["status"] == "ok" nb = _load(out) sources = "".join("".join(c["source"]) for c in nb["cells"]) assert "eda_llm_insights" not in sources assert "explained_variance_ratio" not in sources def test_crea_directorio_padre(tmp_path): out = str(tmp_path / "nested" / "deep" / "eda.ipynb") r = build_eda_notebook("/tmp/x.duckdb", "ventas", out) assert r["status"] == "ok" assert os.path.exists(out)