Files
fn_registry/python/functions/datascience/eda_llm_insights_test.py
T
egutierrez 763e06c127 feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00

204 lines
5.9 KiB
Python

"""Tests para eda_llm_insights.
NO acceden a red ni a credenciales: _build_prompt y _parse_llm_json son puras y
testeables aisladas; la unica via que llamaria al LLM (eda_llm_insights) se
prueba monkeypatcheando ask_llm con una respuesta simulada.
"""
import json
from datascience.eda_llm_insights import (
_build_prompt,
_parse_llm_json,
eda_llm_insights,
)
# Perfil de ejemplo con la forma que produce profile_table.
_PROFILE = {
"table": "ventas",
"n_rows": 1000,
"columns": [
{
"name": "importe",
"inferred_type": "numeric",
"semantic_type": "currency",
"null_pct": 0.0,
"distinct_count": 950,
"numeric": {"min": 1.0, "max": 999.0, "mean": 50.5, "p50": 42.0},
"categorical": None,
},
{
"name": "categoria",
"inferred_type": "categorical",
"semantic_type": "",
"null_pct": 0.05,
"distinct_count": 3,
"numeric": None,
"categorical": {
"top": [
{"value": "neumaticos", "count": 600, "pct": 0.6},
{"value": "frenos", "count": 300, "pct": 0.3},
{"value": "aceite", "count": 100, "pct": 0.1},
],
"mode": "neumaticos",
},
},
],
"correlations": {
"strong": [
{"a": "importe", "b": "categoria", "method": "correlation_ratio", "value": 0.72},
],
},
}
def test_build_prompt_includes_table_and_columns():
prompt = _build_prompt(_PROFILE)
assert isinstance(prompt, str)
assert "ventas" in prompt
assert "importe" in prompt
assert "categoria" in prompt
# n_rows presente.
assert "1000" in prompt
def test_build_prompt_includes_numeric_stats_and_top_values():
prompt = _build_prompt(_PROFILE)
# Stats numericas de importe.
assert "stats[" in prompt
assert "mean=50.5" in prompt
# Top valores de categorica.
assert "neumaticos" in prompt
# Correlaciones fuertes.
assert "correlation_ratio" in prompt
def test_build_prompt_handles_empty_profile():
prompt = _build_prompt({})
assert isinstance(prompt, str)
assert "Columnas: 0" in prompt
def test_parse_llm_json_plain():
payload = {"summary": "una tabla", "dictionary": [], "pii": []}
text = json.dumps(payload)
parsed = _parse_llm_json(text)
assert parsed["summary"] == "una tabla"
def test_parse_llm_json_with_fences():
payload = {"summary": "con fences", "analyses": ["a1"]}
text = "```json\n" + json.dumps(payload) + "\n```"
parsed = _parse_llm_json(text)
assert parsed["summary"] == "con fences"
assert parsed["analyses"] == ["a1"]
def test_parse_llm_json_with_surrounding_text():
payload = {"summary": "rodeado"}
text = "Aqui tienes el resultado:\n" + json.dumps(payload) + "\nEspero que sirva."
parsed = _parse_llm_json(text)
assert parsed["summary"] == "rodeado"
def test_parse_llm_json_nested_braces_in_strings():
# Un valor string con llaves no debe romper el matching.
text = '{"summary": "usa {placeholders}", "cleaning": ["fix {x}"]}'
parsed = _parse_llm_json(text)
assert parsed["summary"] == "usa {placeholders}"
assert parsed["cleaning"] == ["fix {x}"]
def test_parse_llm_json_raises_without_object():
try:
_parse_llm_json("no hay json aqui")
assert False, "esperaba ValueError"
except ValueError:
pass
def test_eda_llm_insights_ok_with_monkeypatched_llm(monkeypatch):
"""Simula la respuesta del LLM y verifica el shape de salida (sin red)."""
fake = {
"summary": "Tabla de ventas",
"row_meaning": "Una fila = una venta",
"dictionary": [
{
"column": "importe",
"description": "monto",
"business_meaning": "ingreso",
"unit": "EUR",
}
],
"pii": [],
"cleaning": ["normalizar categoria"],
"analyses": ["ventas por categoria"],
}
import datascience.eda_llm_insights as mod
monkeypatch.setattr(
mod, "ask_llm", lambda prompt, model="x", system="", echo=True: json.dumps(fake)
)
out = eda_llm_insights(_PROFILE)
assert out["status"] == "ok"
llm = out["llm"]
assert set(llm.keys()) == {
"summary",
"row_meaning",
"dictionary",
"pii",
"cleaning",
"analyses",
}
assert llm["summary"] == "Tabla de ventas"
assert llm["dictionary"][0]["unit"] == "EUR"
def test_eda_llm_insights_fills_missing_keys(monkeypatch):
"""Si el LLM omite claves, se rellenan con defaults vacios."""
import datascience.eda_llm_insights as mod
monkeypatch.setattr(
mod,
"ask_llm",
lambda prompt, model="x", system="", echo=True: '{"summary": "solo summary"}',
)
out = eda_llm_insights(_PROFILE)
assert out["status"] == "ok"
llm = out["llm"]
assert llm["summary"] == "solo summary"
assert llm["dictionary"] == []
assert llm["pii"] == []
assert llm["cleaning"] == []
assert llm["analyses"] == []
assert llm["row_meaning"] == ""
def test_eda_llm_insights_error_on_empty_profile():
out = eda_llm_insights({})
assert out["status"] == "error"
assert "profile" in out["error"]
def test_eda_llm_insights_error_on_empty_llm_response(monkeypatch):
import datascience.eda_llm_insights as mod
monkeypatch.setattr(
mod, "ask_llm", lambda prompt, model="x", system="", echo=True: ""
)
out = eda_llm_insights(_PROFILE)
assert out["status"] == "error"
def test_eda_llm_insights_error_on_unparseable_llm_response(monkeypatch):
import datascience.eda_llm_insights as mod
monkeypatch.setattr(
mod, "ask_llm", lambda prompt, model="x", system="", echo=True: "sin json"
)
out = eda_llm_insights(_PROFILE)
assert out["status"] == "error"