"""Tests para eda_llm_insights. NO acceden a red ni a credenciales: _build_prompt y _parse_llm_json son puras y testeables aisladas; la unica via que llamaria al LLM (eda_llm_insights) se prueba monkeypatcheando ask_llm con una respuesta simulada. """ import json from datascience.eda_llm_insights import ( _build_prompt, _parse_llm_json, eda_llm_insights, ) # Perfil de ejemplo con la forma que produce profile_table. _PROFILE = { "table": "ventas", "n_rows": 1000, "columns": [ { "name": "importe", "inferred_type": "numeric", "semantic_type": "currency", "null_pct": 0.0, "distinct_count": 950, "numeric": {"min": 1.0, "max": 999.0, "mean": 50.5, "p50": 42.0}, "categorical": None, }, { "name": "categoria", "inferred_type": "categorical", "semantic_type": "", "null_pct": 0.05, "distinct_count": 3, "numeric": None, "categorical": { "top": [ {"value": "neumaticos", "count": 600, "pct": 0.6}, {"value": "frenos", "count": 300, "pct": 0.3}, {"value": "aceite", "count": 100, "pct": 0.1}, ], "mode": "neumaticos", }, }, ], "correlations": { "strong": [ {"a": "importe", "b": "categoria", "method": "correlation_ratio", "value": 0.72}, ], }, } def test_build_prompt_includes_table_and_columns(): prompt = _build_prompt(_PROFILE) assert isinstance(prompt, str) assert "ventas" in prompt assert "importe" in prompt assert "categoria" in prompt # n_rows presente. assert "1000" in prompt def test_build_prompt_includes_numeric_stats_and_top_values(): prompt = _build_prompt(_PROFILE) # Stats numericas de importe. assert "stats[" in prompt assert "mean=50.5" in prompt # Top valores de categorica. assert "neumaticos" in prompt # Correlaciones fuertes. assert "correlation_ratio" in prompt def test_build_prompt_handles_empty_profile(): prompt = _build_prompt({}) assert isinstance(prompt, str) assert "Columnas: 0" in prompt def test_parse_llm_json_plain(): payload = {"summary": "una tabla", "dictionary": [], "pii": []} text = json.dumps(payload) parsed = _parse_llm_json(text) assert parsed["summary"] == "una tabla" def test_parse_llm_json_with_fences(): payload = {"summary": "con fences", "analyses": ["a1"]} text = "```json\n" + json.dumps(payload) + "\n```" parsed = _parse_llm_json(text) assert parsed["summary"] == "con fences" assert parsed["analyses"] == ["a1"] def test_parse_llm_json_with_surrounding_text(): payload = {"summary": "rodeado"} text = "Aqui tienes el resultado:\n" + json.dumps(payload) + "\nEspero que sirva." parsed = _parse_llm_json(text) assert parsed["summary"] == "rodeado" def test_parse_llm_json_nested_braces_in_strings(): # Un valor string con llaves no debe romper el matching. text = '{"summary": "usa {placeholders}", "cleaning": ["fix {x}"]}' parsed = _parse_llm_json(text) assert parsed["summary"] == "usa {placeholders}" assert parsed["cleaning"] == ["fix {x}"] def test_parse_llm_json_raises_without_object(): try: _parse_llm_json("no hay json aqui") assert False, "esperaba ValueError" except ValueError: pass def test_eda_llm_insights_ok_with_monkeypatched_llm(monkeypatch): """Simula la respuesta del LLM y verifica el shape de salida (sin red).""" fake = { "summary": "Tabla de ventas", "row_meaning": "Una fila = una venta", "dictionary": [ { "column": "importe", "description": "monto", "business_meaning": "ingreso", "unit": "EUR", } ], "pii": [], "cleaning": ["normalizar categoria"], "analyses": ["ventas por categoria"], } import importlib mod = importlib.import_module("datascience.eda_llm_insights") monkeypatch.setattr( mod, "ask_llm", lambda prompt, model="x", system="", echo=True: json.dumps(fake) ) out = eda_llm_insights(_PROFILE) assert out["status"] == "ok" llm = out["llm"] assert set(llm.keys()) == { "summary", "row_meaning", "dictionary", "pii", "cleaning", "analyses", } assert llm["summary"] == "Tabla de ventas" assert llm["dictionary"][0]["unit"] == "EUR" def test_eda_llm_insights_fills_missing_keys(monkeypatch): """Si el LLM omite claves, se rellenan con defaults vacios.""" import importlib mod = importlib.import_module("datascience.eda_llm_insights") monkeypatch.setattr( mod, "ask_llm", lambda prompt, model="x", system="", echo=True: '{"summary": "solo summary"}', ) out = eda_llm_insights(_PROFILE) assert out["status"] == "ok" llm = out["llm"] assert llm["summary"] == "solo summary" assert llm["dictionary"] == [] assert llm["pii"] == [] assert llm["cleaning"] == [] assert llm["analyses"] == [] assert llm["row_meaning"] == "" def test_eda_llm_insights_error_on_empty_profile(): out = eda_llm_insights({}) assert out["status"] == "error" assert "profile" in out["error"] def test_eda_llm_insights_error_on_empty_llm_response(monkeypatch): import importlib mod = importlib.import_module("datascience.eda_llm_insights") monkeypatch.setattr( mod, "ask_llm", lambda prompt, model="x", system="", echo=True: "" ) out = eda_llm_insights(_PROFILE) assert out["status"] == "error" def test_eda_llm_insights_error_on_unparseable_llm_response(monkeypatch): import importlib mod = importlib.import_module("datascience.eda_llm_insights") monkeypatch.setattr( mod, "ask_llm", lambda prompt, model="x", system="", echo=True: "sin json" ) out = eda_llm_insights(_PROFILE) assert out["status"] == "error"