"""Tests para describe_clusters_llm.

NO acceden a red ni a credenciales: _parse_clusters_json es testeable aislada y la
unica via que llamaria al LLM (describe_clusters_llm) se prueba monkeypatcheando
ask_llm con respuestas simuladas. Cubre golden (LLM ok), edge (cluster faltante,
array envuelto en basura, lista vacia / input no-lista) y error (LLM caido, texto
no parseable) — todos sin tocar la red.
"""

import importlib
import json

from datascience.describe_clusters_llm import (
    _parse_clusters_json,
    describe_clusters_llm,
)

# Perfiles de ejemplo con la forma que produce project_clusters_2d.
_PROFILES = [
    {
        "cluster": 0,
        "size": 60,
        "pct": 60.0,
        "centroid_original": {"acidez": 8.5, "alcohol": 9.2},
        "distinctive": ["acidez", "alcohol"],
        "centroid_z": {"acidez": 1.4, "alcohol": -0.9},
    },
    {
        "cluster": 1,
        "size": 40,
        "pct": 40.0,
        "centroid_original": {"acidez": 5.1, "alcohol": 13.0},
        "distinctive": ["alcohol"],
        "centroid_z": {"acidez": -0.7, "alcohol": 1.6},
    },
]
_FEATURES = ["acidez", "alcohol", "azucar"]


def _patch_ask_llm(monkeypatch, returner):
    """Monkeypatchea ask_llm en el modulo bajo prueba con un callable simulado."""
    mod = importlib.import_module("datascience.describe_clusters_llm")
    monkeypatch.setattr(
        mod, "ask_llm", lambda prompt, model="x", system="", echo=True: returner
    )


# --- _parse_clusters_json (parser puro, sin red) ---


def test_parse_clusters_json_valid_array():
    text = json.dumps(
        [
            {"cluster": 0, "title": "A", "description": "desc a"},
            {"cluster": 1, "title": "B", "description": "desc b"},
        ]
    )
    parsed = _parse_clusters_json(text, 2)
    assert parsed == [
        {"cluster": 0, "title": "A", "description": "desc a"},
        {"cluster": 1, "title": "B", "description": "desc b"},
    ]


def test_parse_clusters_json_wrapped_in_junk_text():
    payload = [{"cluster": 0, "title": "Solo uno", "description": "d"}]
    text = "Claro, aqui tienes el resultado:\n" + json.dumps(payload) + "\nEspero que sirva."
    parsed = _parse_clusters_json(text, 1)
    assert parsed[0]["title"] == "Solo uno"
    assert parsed[0]["cluster"] == 0


def test_parse_clusters_json_non_json_returns_none():
    # Texto sin array JSON -> degradacion (None) sin lanzar.
    assert _parse_clusters_json("no hay json aqui", 2) is None
    assert _parse_clusters_json("", 2) is None
    assert _parse_clusters_json("{solo un objeto}", 2) is None


def test_parse_clusters_json_fills_missing_cluster_by_index():
    text = json.dumps(
        [
            {"title": "A", "description": "d"},
            {"title": "B", "description": "e"},
        ]
    )
    parsed = _parse_clusters_json(text, 2)
    assert parsed[0]["cluster"] == 0
    assert parsed[1]["cluster"] == 1
    assert parsed[0]["title"] == "A"


# --- describe_clusters_llm (con ask_llm monkeypatcheado, sin red) ---


def test_describe_clusters_llm_ok_with_monkeypatched_llm(monkeypatch):
    fake = json.dumps(
        [
            {
                "cluster": 0,
                "title": "Vinos de alta acidez",
                "description": "Acidez por encima de la media y graduacion baja.",
            },
            {
                "cluster": 1,
                "title": "Vinos de alta graduacion",
                "description": "Alcohol claramente por encima de la media.",
            },
        ]
    )
    _patch_ask_llm(monkeypatch, fake)

    out = describe_clusters_llm(_PROFILES, _FEATURES)
    assert out["note"] == ""
    assert out["model"] == "claude-haiku-4-5-20251001"
    assert len(out["clusters"]) == 2
    assert out["clusters"][0]["title"] == "Vinos de alta acidez"
    assert set(out["clusters"][0].keys()) == {"cluster", "title", "description"}


def test_describe_clusters_llm_degrades_on_empty_response(monkeypatch):
    # ask_llm devuelve "" (error/red caida) -> titulos genericos + note.
    _patch_ask_llm(monkeypatch, "")

    out = describe_clusters_llm(_PROFILES, _FEATURES)
    assert out["clusters"][0]["title"] == "Cluster 0"
    assert out["clusters"][1]["title"] == "Cluster 1"
    assert out["clusters"][0]["description"] == ""
    assert out["note"] == "LLM no disponible"
    assert out["model"] == "claude-haiku-4-5-20251001"


def test_describe_clusters_llm_degrades_on_unparseable_response(monkeypatch):
    _patch_ask_llm(monkeypatch, "lo siento, no puedo ayudarte con eso")

    out = describe_clusters_llm(_PROFILES, _FEATURES)
    assert out["clusters"][0]["title"] == "Cluster 0"
    assert out["clusters"][1]["title"] == "Cluster 1"
    assert out["note"] == "parse fallido"


def test_describe_clusters_llm_empty_list_skips_llm(monkeypatch):
    # Con lista vacia NO debe llamarse al LLM en absoluto.
    def boom(*args, **kwargs):
        raise AssertionError("ask_llm no debe llamarse con lista vacia")

    mod = importlib.import_module("datascience.describe_clusters_llm")
    monkeypatch.setattr(mod, "ask_llm", boom)

    out = describe_clusters_llm([], _FEATURES)
    assert out["clusters"] == []
    assert out["note"] == "sin clusters"


def test_describe_clusters_llm_non_list_input_skips_llm():
    # Input no-lista (None) -> clusters vacio sin tocar la red.
    out = describe_clusters_llm(None, _FEATURES)
    assert out["clusters"] == []
    assert out["note"] == "sin clusters"
    assert out["model"] == "claude-haiku-4-5-20251001"