feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,203 @@
|
||||
"""Tests para eda_llm_insights.
|
||||
|
||||
NO acceden a red ni a credenciales: _build_prompt y _parse_llm_json son puras y
|
||||
testeables aisladas; la unica via que llamaria al LLM (eda_llm_insights) se
|
||||
prueba monkeypatcheando ask_llm con una respuesta simulada.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
from datascience.eda_llm_insights import (
|
||||
_build_prompt,
|
||||
_parse_llm_json,
|
||||
eda_llm_insights,
|
||||
)
|
||||
|
||||
# Perfil de ejemplo con la forma que produce profile_table.
|
||||
_PROFILE = {
|
||||
"table": "ventas",
|
||||
"n_rows": 1000,
|
||||
"columns": [
|
||||
{
|
||||
"name": "importe",
|
||||
"inferred_type": "numeric",
|
||||
"semantic_type": "currency",
|
||||
"null_pct": 0.0,
|
||||
"distinct_count": 950,
|
||||
"numeric": {"min": 1.0, "max": 999.0, "mean": 50.5, "p50": 42.0},
|
||||
"categorical": None,
|
||||
},
|
||||
{
|
||||
"name": "categoria",
|
||||
"inferred_type": "categorical",
|
||||
"semantic_type": "",
|
||||
"null_pct": 0.05,
|
||||
"distinct_count": 3,
|
||||
"numeric": None,
|
||||
"categorical": {
|
||||
"top": [
|
||||
{"value": "neumaticos", "count": 600, "pct": 0.6},
|
||||
{"value": "frenos", "count": 300, "pct": 0.3},
|
||||
{"value": "aceite", "count": 100, "pct": 0.1},
|
||||
],
|
||||
"mode": "neumaticos",
|
||||
},
|
||||
},
|
||||
],
|
||||
"correlations": {
|
||||
"strong": [
|
||||
{"a": "importe", "b": "categoria", "method": "correlation_ratio", "value": 0.72},
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_build_prompt_includes_table_and_columns():
|
||||
prompt = _build_prompt(_PROFILE)
|
||||
assert isinstance(prompt, str)
|
||||
assert "ventas" in prompt
|
||||
assert "importe" in prompt
|
||||
assert "categoria" in prompt
|
||||
# n_rows presente.
|
||||
assert "1000" in prompt
|
||||
|
||||
|
||||
def test_build_prompt_includes_numeric_stats_and_top_values():
|
||||
prompt = _build_prompt(_PROFILE)
|
||||
# Stats numericas de importe.
|
||||
assert "stats[" in prompt
|
||||
assert "mean=50.5" in prompt
|
||||
# Top valores de categorica.
|
||||
assert "neumaticos" in prompt
|
||||
# Correlaciones fuertes.
|
||||
assert "correlation_ratio" in prompt
|
||||
|
||||
|
||||
def test_build_prompt_handles_empty_profile():
|
||||
prompt = _build_prompt({})
|
||||
assert isinstance(prompt, str)
|
||||
assert "Columnas: 0" in prompt
|
||||
|
||||
|
||||
def test_parse_llm_json_plain():
|
||||
payload = {"summary": "una tabla", "dictionary": [], "pii": []}
|
||||
text = json.dumps(payload)
|
||||
parsed = _parse_llm_json(text)
|
||||
assert parsed["summary"] == "una tabla"
|
||||
|
||||
|
||||
def test_parse_llm_json_with_fences():
|
||||
payload = {"summary": "con fences", "analyses": ["a1"]}
|
||||
text = "```json\n" + json.dumps(payload) + "\n```"
|
||||
parsed = _parse_llm_json(text)
|
||||
assert parsed["summary"] == "con fences"
|
||||
assert parsed["analyses"] == ["a1"]
|
||||
|
||||
|
||||
def test_parse_llm_json_with_surrounding_text():
|
||||
payload = {"summary": "rodeado"}
|
||||
text = "Aqui tienes el resultado:\n" + json.dumps(payload) + "\nEspero que sirva."
|
||||
parsed = _parse_llm_json(text)
|
||||
assert parsed["summary"] == "rodeado"
|
||||
|
||||
|
||||
def test_parse_llm_json_nested_braces_in_strings():
|
||||
# Un valor string con llaves no debe romper el matching.
|
||||
text = '{"summary": "usa {placeholders}", "cleaning": ["fix {x}"]}'
|
||||
parsed = _parse_llm_json(text)
|
||||
assert parsed["summary"] == "usa {placeholders}"
|
||||
assert parsed["cleaning"] == ["fix {x}"]
|
||||
|
||||
|
||||
def test_parse_llm_json_raises_without_object():
|
||||
try:
|
||||
_parse_llm_json("no hay json aqui")
|
||||
assert False, "esperaba ValueError"
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def test_eda_llm_insights_ok_with_monkeypatched_llm(monkeypatch):
|
||||
"""Simula la respuesta del LLM y verifica el shape de salida (sin red)."""
|
||||
fake = {
|
||||
"summary": "Tabla de ventas",
|
||||
"row_meaning": "Una fila = una venta",
|
||||
"dictionary": [
|
||||
{
|
||||
"column": "importe",
|
||||
"description": "monto",
|
||||
"business_meaning": "ingreso",
|
||||
"unit": "EUR",
|
||||
}
|
||||
],
|
||||
"pii": [],
|
||||
"cleaning": ["normalizar categoria"],
|
||||
"analyses": ["ventas por categoria"],
|
||||
}
|
||||
|
||||
import datascience.eda_llm_insights as mod
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod, "ask_llm", lambda prompt, model="x", system="", echo=True: json.dumps(fake)
|
||||
)
|
||||
|
||||
out = eda_llm_insights(_PROFILE)
|
||||
assert out["status"] == "ok"
|
||||
llm = out["llm"]
|
||||
assert set(llm.keys()) == {
|
||||
"summary",
|
||||
"row_meaning",
|
||||
"dictionary",
|
||||
"pii",
|
||||
"cleaning",
|
||||
"analyses",
|
||||
}
|
||||
assert llm["summary"] == "Tabla de ventas"
|
||||
assert llm["dictionary"][0]["unit"] == "EUR"
|
||||
|
||||
|
||||
def test_eda_llm_insights_fills_missing_keys(monkeypatch):
|
||||
"""Si el LLM omite claves, se rellenan con defaults vacios."""
|
||||
import datascience.eda_llm_insights as mod
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"ask_llm",
|
||||
lambda prompt, model="x", system="", echo=True: '{"summary": "solo summary"}',
|
||||
)
|
||||
|
||||
out = eda_llm_insights(_PROFILE)
|
||||
assert out["status"] == "ok"
|
||||
llm = out["llm"]
|
||||
assert llm["summary"] == "solo summary"
|
||||
assert llm["dictionary"] == []
|
||||
assert llm["pii"] == []
|
||||
assert llm["cleaning"] == []
|
||||
assert llm["analyses"] == []
|
||||
assert llm["row_meaning"] == ""
|
||||
|
||||
|
||||
def test_eda_llm_insights_error_on_empty_profile():
|
||||
out = eda_llm_insights({})
|
||||
assert out["status"] == "error"
|
||||
assert "profile" in out["error"]
|
||||
|
||||
|
||||
def test_eda_llm_insights_error_on_empty_llm_response(monkeypatch):
|
||||
import datascience.eda_llm_insights as mod
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod, "ask_llm", lambda prompt, model="x", system="", echo=True: ""
|
||||
)
|
||||
out = eda_llm_insights(_PROFILE)
|
||||
assert out["status"] == "error"
|
||||
|
||||
|
||||
def test_eda_llm_insights_error_on_unparseable_llm_response(monkeypatch):
|
||||
import datascience.eda_llm_insights as mod
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod, "ask_llm", lambda prompt, model="x", system="", echo=True: "sin json"
|
||||
)
|
||||
out = eda_llm_insights(_PROFILE)
|
||||
assert out["status"] == "error"
|
||||
Reference in New Issue
Block a user