feat(browser): auto-commit con 178 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00
parent 7d100e7f3e
commit 763e06c127
178 changed files with 19917 additions and 317 deletions
@@ -0,0 +1,93 @@
+"""Tests para build_eda_notebook.
+
+No ejecuta el notebook generado: solo valida que el .ipynb se escribe como JSON
+nbformat v4 valido y que las celdas opcionales (modelos / LLM) aparecen segun
+los flags. La validacion del contenido se hace sobre el dict deserializado.
+"""
+
+import json
+import os
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
+
+from functions.datascience.build_eda_notebook import build_eda_notebook
+
+
+def _load(path: str) -> dict:
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def test_genera_notebook_ok(tmp_path):
+    out = str(tmp_path / "eda.ipynb")
+    r = build_eda_notebook("/tmp/x.duckdb", "ventas", out)
+    assert r["status"] == "ok"
+    assert r["notebook_path"] == out
+    assert os.path.exists(out)
+    assert r["n_cells"] >= 1
+
+
+def test_notebook_es_json_nbformat_valido(tmp_path):
+    out = str(tmp_path / "eda.ipynb")
+    r = build_eda_notebook("/tmp/x.duckdb", "ventas", out)
+    assert r["status"] == "ok"
+    nb = _load(out)
+    assert nb["nbformat"] == 4
+    assert isinstance(nb.get("cells"), list)
+    assert len(nb["cells"]) > 0
+    # Cada celda tiene cell_type valido.
+    for cell in nb["cells"]:
+        assert cell["cell_type"] in ("code", "markdown")
+    # n_cells coincide con las celdas del archivo.
+    assert r["n_cells"] == len(nb["cells"])
+    # El titulo referencia la tabla.
+    assert any(
+        c["cell_type"] == "markdown" and "ventas" in "".join(c["source"])
+        for c in nb["cells"]
+    )
+
+
+def test_run_models_anade_celda_de_modelos(tmp_path):
+    out = str(tmp_path / "eda.ipynb")
+    base = build_eda_notebook("/tmp/x.duckdb", "ventas", out, run_models=False)
+
+    out2 = str(tmp_path / "eda_models.ipynb")
+    r = build_eda_notebook("/tmp/x.duckdb", "ventas", out2, run_models=True)
+    assert r["status"] == "ok"
+    nb = _load(out2)
+    sources = "".join("".join(c["source"]) for c in nb["cells"])
+    assert "models" in sources
+    assert "explained_variance_ratio" in sources
+    assert "best_k" in sources
+    assert "n_outliers" in sources
+    # run_models=True añade celdas respecto al base.
+    assert r["n_cells"] > base["n_cells"]
+    # profile_table dentro del notebook usa run_models=True.
+    assert "run_models=True" in sources
+
+
+def test_run_llm_anade_celda_de_insights(tmp_path):
+    out = str(tmp_path / "eda_llm.ipynb")
+    r = build_eda_notebook("/tmp/x.duckdb", "ventas", out, run_llm=True)
+    assert r["status"] == "ok"
+    nb = _load(out)
+    sources = "".join("".join(c["source"]) for c in nb["cells"])
+    assert "eda_llm_insights" in sources
+
+
+def test_sin_flags_no_anade_celdas_opcionales(tmp_path):
+    out = str(tmp_path / "eda_plain.ipynb")
+    r = build_eda_notebook("/tmp/x.duckdb", "ventas", out)
+    assert r["status"] == "ok"
+    nb = _load(out)
+    sources = "".join("".join(c["source"]) for c in nb["cells"])
+    assert "eda_llm_insights" not in sources
+    assert "explained_variance_ratio" not in sources
+
+
+def test_crea_directorio_padre(tmp_path):
+    out = str(tmp_path / "nested" / "deep" / "eda.ipynb")
+    r = build_eda_notebook("/tmp/x.duckdb", "ventas", out)
+    assert r["status"] == "ok"
+    assert os.path.exists(out)