feat(browser): auto-commit con 178 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00
parent 7d100e7f3e
commit 763e06c127
178 changed files with 19917 additions and 317 deletions
@@ -0,0 +1,203 @@
+"""Tests para eda_llm_insights.
+
+NO acceden a red ni a credenciales: _build_prompt y _parse_llm_json son puras y
+testeables aisladas; la unica via que llamaria al LLM (eda_llm_insights) se
+prueba monkeypatcheando ask_llm con una respuesta simulada.
+"""
+
+import json
+
+from datascience.eda_llm_insights import (
+    _build_prompt,
+    _parse_llm_json,
+    eda_llm_insights,
+)
+
+# Perfil de ejemplo con la forma que produce profile_table.
+_PROFILE = {
+    "table": "ventas",
+    "n_rows": 1000,
+    "columns": [
+        {
+            "name": "importe",
+            "inferred_type": "numeric",
+            "semantic_type": "currency",
+            "null_pct": 0.0,
+            "distinct_count": 950,
+            "numeric": {"min": 1.0, "max": 999.0, "mean": 50.5, "p50": 42.0},
+            "categorical": None,
+        },
+        {
+            "name": "categoria",
+            "inferred_type": "categorical",
+            "semantic_type": "",
+            "null_pct": 0.05,
+            "distinct_count": 3,
+            "numeric": None,
+            "categorical": {
+                "top": [
+                    {"value": "neumaticos", "count": 600, "pct": 0.6},
+                    {"value": "frenos", "count": 300, "pct": 0.3},
+                    {"value": "aceite", "count": 100, "pct": 0.1},
+                ],
+                "mode": "neumaticos",
+            },
+        },
+    ],
+    "correlations": {
+        "strong": [
+            {"a": "importe", "b": "categoria", "method": "correlation_ratio", "value": 0.72},
+        ],
+    },
+}
+
+
+def test_build_prompt_includes_table_and_columns():
+    prompt = _build_prompt(_PROFILE)
+    assert isinstance(prompt, str)
+    assert "ventas" in prompt
+    assert "importe" in prompt
+    assert "categoria" in prompt
+    # n_rows presente.
+    assert "1000" in prompt
+
+
+def test_build_prompt_includes_numeric_stats_and_top_values():
+    prompt = _build_prompt(_PROFILE)
+    # Stats numericas de importe.
+    assert "stats[" in prompt
+    assert "mean=50.5" in prompt
+    # Top valores de categorica.
+    assert "neumaticos" in prompt
+    # Correlaciones fuertes.
+    assert "correlation_ratio" in prompt
+
+
+def test_build_prompt_handles_empty_profile():
+    prompt = _build_prompt({})
+    assert isinstance(prompt, str)
+    assert "Columnas: 0" in prompt
+
+
+def test_parse_llm_json_plain():
+    payload = {"summary": "una tabla", "dictionary": [], "pii": []}
+    text = json.dumps(payload)
+    parsed = _parse_llm_json(text)
+    assert parsed["summary"] == "una tabla"
+
+
+def test_parse_llm_json_with_fences():
+    payload = {"summary": "con fences", "analyses": ["a1"]}
+    text = "```json\n" + json.dumps(payload) + "\n```"
+    parsed = _parse_llm_json(text)
+    assert parsed["summary"] == "con fences"
+    assert parsed["analyses"] == ["a1"]
+
+
+def test_parse_llm_json_with_surrounding_text():
+    payload = {"summary": "rodeado"}
+    text = "Aqui tienes el resultado:\n" + json.dumps(payload) + "\nEspero que sirva."
+    parsed = _parse_llm_json(text)
+    assert parsed["summary"] == "rodeado"
+
+
+def test_parse_llm_json_nested_braces_in_strings():
+    # Un valor string con llaves no debe romper el matching.
+    text = '{"summary": "usa {placeholders}", "cleaning": ["fix {x}"]}'
+    parsed = _parse_llm_json(text)
+    assert parsed["summary"] == "usa {placeholders}"
+    assert parsed["cleaning"] == ["fix {x}"]
+
+
+def test_parse_llm_json_raises_without_object():
+    try:
+        _parse_llm_json("no hay json aqui")
+        assert False, "esperaba ValueError"
+    except ValueError:
+        pass
+
+
+def test_eda_llm_insights_ok_with_monkeypatched_llm(monkeypatch):
+    """Simula la respuesta del LLM y verifica el shape de salida (sin red)."""
+    fake = {
+        "summary": "Tabla de ventas",
+        "row_meaning": "Una fila = una venta",
+        "dictionary": [
+            {
+                "column": "importe",
+                "description": "monto",
+                "business_meaning": "ingreso",
+                "unit": "EUR",
+            }
+        ],
+        "pii": [],
+        "cleaning": ["normalizar categoria"],
+        "analyses": ["ventas por categoria"],
+    }
+
+    import datascience.eda_llm_insights as mod
+
+    monkeypatch.setattr(
+        mod, "ask_llm", lambda prompt, model="x", system="", echo=True: json.dumps(fake)
+    )
+
+    out = eda_llm_insights(_PROFILE)
+    assert out["status"] == "ok"
+    llm = out["llm"]
+    assert set(llm.keys()) == {
+        "summary",
+        "row_meaning",
+        "dictionary",
+        "pii",
+        "cleaning",
+        "analyses",
+    }
+    assert llm["summary"] == "Tabla de ventas"
+    assert llm["dictionary"][0]["unit"] == "EUR"
+
+
+def test_eda_llm_insights_fills_missing_keys(monkeypatch):
+    """Si el LLM omite claves, se rellenan con defaults vacios."""
+    import datascience.eda_llm_insights as mod
+
+    monkeypatch.setattr(
+        mod,
+        "ask_llm",
+        lambda prompt, model="x", system="", echo=True: '{"summary": "solo summary"}',
+    )
+
+    out = eda_llm_insights(_PROFILE)
+    assert out["status"] == "ok"
+    llm = out["llm"]
+    assert llm["summary"] == "solo summary"
+    assert llm["dictionary"] == []
+    assert llm["pii"] == []
+    assert llm["cleaning"] == []
+    assert llm["analyses"] == []
+    assert llm["row_meaning"] == ""
+
+
+def test_eda_llm_insights_error_on_empty_profile():
+    out = eda_llm_insights({})
+    assert out["status"] == "error"
+    assert "profile" in out["error"]
+
+
+def test_eda_llm_insights_error_on_empty_llm_response(monkeypatch):
+    import datascience.eda_llm_insights as mod
+
+    monkeypatch.setattr(
+        mod, "ask_llm", lambda prompt, model="x", system="", echo=True: ""
+    )
+    out = eda_llm_insights(_PROFILE)
+    assert out["status"] == "error"
+
+
+def test_eda_llm_insights_error_on_unparseable_llm_response(monkeypatch):
+    import datascience.eda_llm_insights as mod
+
+    monkeypatch.setattr(
+        mod, "ask_llm", lambda prompt, model="x", system="", echo=True: "sin json"
+    )
+    out = eda_llm_insights(_PROFILE)
+    assert out["status"] == "error"