96da9e3015
Cuatro funciones nuevas del grupo eda que nutren el capítulo AGREGACION: - select_groupby_keys (pure): elige categóricas agrupables + numéricas medida desde el TableProfile. - groupby_stats_duckdb (impure): GROUP BY push-down en DuckDB (count/mean/median/std/min/max por grupo). - pivot_table_duckdb (impure): pivot A×B push-down, limitado a top filas/cols para no cortar. - suggest_aggregations_llm (impure): el LLM elige las agregaciones interesantes con fallback determinista. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
199 lines
7.1 KiB
Python
199 lines
7.1 KiB
Python
"""Tests para suggest_aggregations_llm.
|
|
|
|
NO acceden a red ni a credenciales: las funciones internas (_build_prompt,
|
|
_extract_json, _validate_*, _fallback_*) son puras y testeables aisladas; la unica
|
|
via que llamaria al LLM (suggest_aggregations_llm) se prueba reemplazando el simbolo
|
|
`ask_llm` del modulo bajo prueba con una funcion simulada. Los candidatos van
|
|
literales en el test: NO se importa select_groupby_keys.
|
|
|
|
Cubre golden (LLM ok con columnas validas), edge (max_aggs respetado, sin candidatos)
|
|
y error (LLM caido -> fallback, JSON invalido -> fallback, columna inventada -> se
|
|
descarta). Todos sin tocar la red.
|
|
"""
|
|
|
|
import json
|
|
|
|
import datascience.suggest_aggregations_llm as M
|
|
from datascience.suggest_aggregations_llm import (
|
|
_extract_json,
|
|
_validate_aggregations,
|
|
suggest_aggregations_llm,
|
|
)
|
|
|
|
# Candidatos de ejemplo con la forma que produce select_groupby_keys (literales).
|
|
_CANDIDATES = {
|
|
"group_keys": [
|
|
{"col": "categoria", "cardinality": 8, "score": 0.91},
|
|
{"col": "region", "cardinality": 5, "score": 0.74},
|
|
{"col": "canal", "cardinality": 3, "score": 0.60},
|
|
],
|
|
"measures": ["importe", "unidades"],
|
|
"pivots": [
|
|
{"index": "categoria", "columns": "region", "value": "importe"},
|
|
],
|
|
}
|
|
_PROFILE = {"table": "ventas"}
|
|
|
|
|
|
def _fake_returner(text):
|
|
"""Devuelve un ask_llm simulado que ignora args y retorna `text`."""
|
|
|
|
def _fake(prompt, model="x", system="", echo=True, **kwargs):
|
|
return text
|
|
|
|
return _fake
|
|
|
|
|
|
# --- _extract_json (parser puro, sin red) ---
|
|
|
|
|
|
def test_extract_json_object():
|
|
obj = {"aggregations": [{"group_by": "categoria", "measures": ["importe"], "why": "x"}]}
|
|
assert _extract_json(json.dumps(obj)) == obj
|
|
|
|
|
|
def test_extract_json_wrapped_in_fences_and_junk():
|
|
obj = {"aggregations": [], "pivots": []}
|
|
text = "Claro, aqui tienes:\n```json\n" + json.dumps(obj) + "\n```\nFin."
|
|
assert _extract_json(text) == obj
|
|
|
|
|
|
def test_extract_json_non_json_returns_none():
|
|
assert _extract_json("no hay json aqui") is None
|
|
assert _extract_json("") is None
|
|
assert _extract_json(None) is None
|
|
|
|
|
|
# --- _validate_aggregations (puro) ---
|
|
|
|
|
|
def test_validate_aggregations_drops_invalid_columns():
|
|
group_cols = {"categoria", "region"}
|
|
measure_set = {"importe", "unidades"}
|
|
raw = [
|
|
{"group_by": "categoria", "measures": ["importe", "inventada"], "why": "ok"},
|
|
{"group_by": "no_existe", "measures": ["importe"], "why": "mala"},
|
|
{"group_by": "region", "measures": ["solo_inventada"], "why": "sin medidas"},
|
|
]
|
|
out = _validate_aggregations(raw, group_cols, measure_set, max_aggs=4)
|
|
# Solo sobrevive la primera, con las medidas recortadas a las validas.
|
|
assert out == [{"group_by": "categoria", "measures": ["importe"], "why": "ok"}]
|
|
|
|
|
|
# --- suggest_aggregations_llm: camino LLM (golden) ---
|
|
|
|
|
|
def test_llm_path_uses_selection(monkeypatch):
|
|
llm_obj = {
|
|
"aggregations": [
|
|
{"group_by": "categoria", "measures": ["importe"], "why": "ventas por familia"},
|
|
{"group_by": "region", "measures": ["importe", "unidades"], "why": "reparto geografico"},
|
|
],
|
|
"pivots": [
|
|
{"index": "categoria", "columns": "region", "value": "importe", "why": "cruce clave"},
|
|
],
|
|
}
|
|
monkeypatch.setattr(M, "ask_llm", _fake_returner(json.dumps(llm_obj)))
|
|
|
|
out = suggest_aggregations_llm(_PROFILE, _CANDIDATES)
|
|
assert out["status"] == "ok"
|
|
assert out["source"] == "llm"
|
|
assert out["aggregations"] == llm_obj["aggregations"]
|
|
assert out["pivots"][0]["index"] == "categoria"
|
|
assert out["pivots"][0]["why"] == "cruce clave"
|
|
|
|
|
|
def test_llm_path_respects_max_aggs(monkeypatch):
|
|
llm_obj = {
|
|
"aggregations": [
|
|
{"group_by": "categoria", "measures": ["importe"], "why": "a"},
|
|
{"group_by": "region", "measures": ["importe"], "why": "b"},
|
|
{"group_by": "canal", "measures": ["unidades"], "why": "c"},
|
|
],
|
|
"pivots": [],
|
|
}
|
|
monkeypatch.setattr(M, "ask_llm", _fake_returner(json.dumps(llm_obj)))
|
|
|
|
out = suggest_aggregations_llm(_PROFILE, _CANDIDATES, max_aggs=2)
|
|
assert out["source"] == "llm"
|
|
assert len(out["aggregations"]) == 2
|
|
|
|
|
|
def test_llm_invented_column_is_discarded(monkeypatch):
|
|
# El LLM mezcla una agregacion valida con otra de columna inexistente.
|
|
llm_obj = {
|
|
"aggregations": [
|
|
{"group_by": "categoria", "measures": ["importe"], "why": "valida"},
|
|
{"group_by": "columna_fantasma", "measures": ["importe"], "why": "inventada"},
|
|
],
|
|
"pivots": [
|
|
{"index": "fantasma", "columns": "region", "value": "importe", "why": "mala"},
|
|
],
|
|
}
|
|
monkeypatch.setattr(M, "ask_llm", _fake_returner(json.dumps(llm_obj)))
|
|
|
|
out = suggest_aggregations_llm(_PROFILE, _CANDIDATES)
|
|
assert out["source"] == "llm"
|
|
# La agregacion inventada se descarta; queda solo la valida.
|
|
assert [a["group_by"] for a in out["aggregations"]] == ["categoria"]
|
|
# El pivot con index fantasma se descarta -> cae a los pivots de candidates.
|
|
assert all(p["index"] in {"categoria", "region", "canal"} for p in out["pivots"])
|
|
|
|
|
|
# --- suggest_aggregations_llm: fallback determinista (error paths) ---
|
|
|
|
|
|
def test_fallback_on_empty_llm_response(monkeypatch):
|
|
monkeypatch.setattr(M, "ask_llm", _fake_returner(""))
|
|
|
|
out = suggest_aggregations_llm(_PROFILE, _CANDIDATES, max_aggs=4)
|
|
assert out["status"] == "ok"
|
|
assert out["source"] == "fallback"
|
|
# Las agregaciones se derivan de candidates (una por group_key, con todas las medidas).
|
|
assert out["aggregations"][0]["group_by"] in {"categoria", "region", "canal"}
|
|
assert out["aggregations"][0]["measures"] == ["importe", "unidades"]
|
|
assert out["aggregations"][0]["why"] == "selección cuantitativa (sin LLM)"
|
|
# Pivots tal cual de candidates.
|
|
assert out["pivots"][0]["index"] == "categoria"
|
|
|
|
|
|
def test_fallback_on_unparseable_response(monkeypatch):
|
|
monkeypatch.setattr(M, "ask_llm", _fake_returner("esto no es JSON {roto"))
|
|
|
|
out = suggest_aggregations_llm(_PROFILE, _CANDIDATES)
|
|
assert out["source"] == "fallback"
|
|
assert len(out["aggregations"]) >= 1
|
|
|
|
|
|
def test_fallback_respects_max_aggs(monkeypatch):
|
|
monkeypatch.setattr(M, "ask_llm", _fake_returner(""))
|
|
|
|
out = suggest_aggregations_llm(_PROFILE, _CANDIDATES, max_aggs=2)
|
|
assert out["source"] == "fallback"
|
|
assert len(out["aggregations"]) == 2
|
|
|
|
|
|
def test_fallback_when_llm_raises(monkeypatch):
|
|
def _boom(*args, **kwargs):
|
|
raise RuntimeError("sin red")
|
|
|
|
monkeypatch.setattr(M, "ask_llm", _boom)
|
|
|
|
out = suggest_aggregations_llm(_PROFILE, _CANDIDATES)
|
|
assert out["source"] == "fallback"
|
|
assert out["aggregations"] # no vacio, no lanza
|
|
|
|
|
|
def test_no_candidates_returns_empty_fallback():
|
|
# Sin red porque ni siquiera se llama al LLM (no hay material).
|
|
out = suggest_aggregations_llm(_PROFILE, {"group_keys": [], "measures": [], "pivots": []})
|
|
assert out["status"] == "ok"
|
|
assert out["source"] == "fallback"
|
|
assert out["aggregations"] == []
|
|
|
|
|
|
def test_non_dict_candidates_does_not_raise():
|
|
out = suggest_aggregations_llm(_PROFILE, None)
|
|
assert out["status"] == "ok"
|
|
assert out["aggregations"] == []
|