"""Tests para describe_clusters_llm. NO acceden a red ni a credenciales: _parse_clusters_json es testeable aislada y la unica via que llamaria al LLM (describe_clusters_llm) se prueba monkeypatcheando ask_llm con respuestas simuladas. Cubre golden (LLM ok), edge (cluster faltante, array envuelto en basura, lista vacia / input no-lista) y error (LLM caido, texto no parseable) — todos sin tocar la red. """ import importlib import json from datascience.describe_clusters_llm import ( _parse_clusters_json, describe_clusters_llm, ) # Perfiles de ejemplo con la forma que produce project_clusters_2d. _PROFILES = [ { "cluster": 0, "size": 60, "pct": 60.0, "centroid_original": {"acidez": 8.5, "alcohol": 9.2}, "distinctive": ["acidez", "alcohol"], "centroid_z": {"acidez": 1.4, "alcohol": -0.9}, }, { "cluster": 1, "size": 40, "pct": 40.0, "centroid_original": {"acidez": 5.1, "alcohol": 13.0}, "distinctive": ["alcohol"], "centroid_z": {"acidez": -0.7, "alcohol": 1.6}, }, ] _FEATURES = ["acidez", "alcohol", "azucar"] def _patch_ask_llm(monkeypatch, returner): """Monkeypatchea ask_llm en el modulo bajo prueba con un callable simulado.""" mod = importlib.import_module("datascience.describe_clusters_llm") monkeypatch.setattr( mod, "ask_llm", lambda prompt, model="x", system="", echo=True: returner ) # --- _parse_clusters_json (parser puro, sin red) --- def test_parse_clusters_json_valid_array(): text = json.dumps( [ {"cluster": 0, "title": "A", "description": "desc a"}, {"cluster": 1, "title": "B", "description": "desc b"}, ] ) parsed = _parse_clusters_json(text, 2) assert parsed == [ {"cluster": 0, "title": "A", "description": "desc a"}, {"cluster": 1, "title": "B", "description": "desc b"}, ] def test_parse_clusters_json_wrapped_in_junk_text(): payload = [{"cluster": 0, "title": "Solo uno", "description": "d"}] text = "Claro, aqui tienes el resultado:\n" + json.dumps(payload) + "\nEspero que sirva." parsed = _parse_clusters_json(text, 1) assert parsed[0]["title"] == "Solo uno" assert parsed[0]["cluster"] == 0 def test_parse_clusters_json_non_json_returns_none(): # Texto sin array JSON -> degradacion (None) sin lanzar. assert _parse_clusters_json("no hay json aqui", 2) is None assert _parse_clusters_json("", 2) is None assert _parse_clusters_json("{solo un objeto}", 2) is None def test_parse_clusters_json_fills_missing_cluster_by_index(): text = json.dumps( [ {"title": "A", "description": "d"}, {"title": "B", "description": "e"}, ] ) parsed = _parse_clusters_json(text, 2) assert parsed[0]["cluster"] == 0 assert parsed[1]["cluster"] == 1 assert parsed[0]["title"] == "A" # --- describe_clusters_llm (con ask_llm monkeypatcheado, sin red) --- def test_describe_clusters_llm_ok_with_monkeypatched_llm(monkeypatch): fake = json.dumps( [ { "cluster": 0, "title": "Vinos de alta acidez", "description": "Acidez por encima de la media y graduacion baja.", }, { "cluster": 1, "title": "Vinos de alta graduacion", "description": "Alcohol claramente por encima de la media.", }, ] ) _patch_ask_llm(monkeypatch, fake) out = describe_clusters_llm(_PROFILES, _FEATURES) assert out["note"] == "" assert out["model"] == "claude-haiku-4-5-20251001" assert len(out["clusters"]) == 2 assert out["clusters"][0]["title"] == "Vinos de alta acidez" assert set(out["clusters"][0].keys()) == {"cluster", "title", "description"} def test_describe_clusters_llm_degrades_on_empty_response(monkeypatch): # ask_llm devuelve "" (error/red caida) -> titulos genericos + note. _patch_ask_llm(monkeypatch, "") out = describe_clusters_llm(_PROFILES, _FEATURES) assert out["clusters"][0]["title"] == "Cluster 0" assert out["clusters"][1]["title"] == "Cluster 1" assert out["clusters"][0]["description"] == "" assert out["note"] == "LLM no disponible" assert out["model"] == "claude-haiku-4-5-20251001" def test_describe_clusters_llm_degrades_on_unparseable_response(monkeypatch): _patch_ask_llm(monkeypatch, "lo siento, no puedo ayudarte con eso") out = describe_clusters_llm(_PROFILES, _FEATURES) assert out["clusters"][0]["title"] == "Cluster 0" assert out["clusters"][1]["title"] == "Cluster 1" assert out["note"] == "parse fallido" def test_describe_clusters_llm_empty_list_skips_llm(monkeypatch): # Con lista vacia NO debe llamarse al LLM en absoluto. def boom(*args, **kwargs): raise AssertionError("ask_llm no debe llamarse con lista vacia") mod = importlib.import_module("datascience.describe_clusters_llm") monkeypatch.setattr(mod, "ask_llm", boom) out = describe_clusters_llm([], _FEATURES) assert out["clusters"] == [] assert out["note"] == "sin clusters" def test_describe_clusters_llm_non_list_input_skips_llm(): # Input no-lista (None) -> clusters vacio sin tocar la red. out = describe_clusters_llm(None, _FEATURES) assert out["clusters"] == [] assert out["note"] == "sin clusters" assert out["model"] == "claude-haiku-4-5-20251001"