763e06c127
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
94 lines
3.1 KiB
Python
94 lines
3.1 KiB
Python
"""Tests para build_eda_notebook.
|
|
|
|
No ejecuta el notebook generado: solo valida que el .ipynb se escribe como JSON
|
|
nbformat v4 valido y que las celdas opcionales (modelos / LLM) aparecen segun
|
|
los flags. La validacion del contenido se hace sobre el dict deserializado.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
from functions.datascience.build_eda_notebook import build_eda_notebook
|
|
|
|
|
|
def _load(path: str) -> dict:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
|
|
def test_genera_notebook_ok(tmp_path):
|
|
out = str(tmp_path / "eda.ipynb")
|
|
r = build_eda_notebook("/tmp/x.duckdb", "ventas", out)
|
|
assert r["status"] == "ok"
|
|
assert r["notebook_path"] == out
|
|
assert os.path.exists(out)
|
|
assert r["n_cells"] >= 1
|
|
|
|
|
|
def test_notebook_es_json_nbformat_valido(tmp_path):
|
|
out = str(tmp_path / "eda.ipynb")
|
|
r = build_eda_notebook("/tmp/x.duckdb", "ventas", out)
|
|
assert r["status"] == "ok"
|
|
nb = _load(out)
|
|
assert nb["nbformat"] == 4
|
|
assert isinstance(nb.get("cells"), list)
|
|
assert len(nb["cells"]) > 0
|
|
# Cada celda tiene cell_type valido.
|
|
for cell in nb["cells"]:
|
|
assert cell["cell_type"] in ("code", "markdown")
|
|
# n_cells coincide con las celdas del archivo.
|
|
assert r["n_cells"] == len(nb["cells"])
|
|
# El titulo referencia la tabla.
|
|
assert any(
|
|
c["cell_type"] == "markdown" and "ventas" in "".join(c["source"])
|
|
for c in nb["cells"]
|
|
)
|
|
|
|
|
|
def test_run_models_anade_celda_de_modelos(tmp_path):
|
|
out = str(tmp_path / "eda.ipynb")
|
|
base = build_eda_notebook("/tmp/x.duckdb", "ventas", out, run_models=False)
|
|
|
|
out2 = str(tmp_path / "eda_models.ipynb")
|
|
r = build_eda_notebook("/tmp/x.duckdb", "ventas", out2, run_models=True)
|
|
assert r["status"] == "ok"
|
|
nb = _load(out2)
|
|
sources = "".join("".join(c["source"]) for c in nb["cells"])
|
|
assert "models" in sources
|
|
assert "explained_variance_ratio" in sources
|
|
assert "best_k" in sources
|
|
assert "n_outliers" in sources
|
|
# run_models=True añade celdas respecto al base.
|
|
assert r["n_cells"] > base["n_cells"]
|
|
# profile_table dentro del notebook usa run_models=True.
|
|
assert "run_models=True" in sources
|
|
|
|
|
|
def test_run_llm_anade_celda_de_insights(tmp_path):
|
|
out = str(tmp_path / "eda_llm.ipynb")
|
|
r = build_eda_notebook("/tmp/x.duckdb", "ventas", out, run_llm=True)
|
|
assert r["status"] == "ok"
|
|
nb = _load(out)
|
|
sources = "".join("".join(c["source"]) for c in nb["cells"])
|
|
assert "eda_llm_insights" in sources
|
|
|
|
|
|
def test_sin_flags_no_anade_celdas_opcionales(tmp_path):
|
|
out = str(tmp_path / "eda_plain.ipynb")
|
|
r = build_eda_notebook("/tmp/x.duckdb", "ventas", out)
|
|
assert r["status"] == "ok"
|
|
nb = _load(out)
|
|
sources = "".join("".join(c["source"]) for c in nb["cells"])
|
|
assert "eda_llm_insights" not in sources
|
|
assert "explained_variance_ratio" not in sources
|
|
|
|
|
|
def test_crea_directorio_padre(tmp_path):
|
|
out = str(tmp_path / "nested" / "deep" / "eda.ipynb")
|
|
r = build_eda_notebook("/tmp/x.duckdb", "ventas", out)
|
|
assert r["status"] == "ok"
|
|
assert os.path.exists(out)
|