"""Tests para resample_timeseries (grupo eda)."""

import datetime
import os
import sys

sys.path.insert(0, os.path.dirname(__file__))

from resample_timeseries import resample_timeseries


def test_daily_a_mensual_mean():
    # Serie diaria agregada a buckets mensuales con agg="mean".
    t = [
        "2020-01-01", "2020-01-15",
        "2020-02-01", "2020-02-10", "2020-02-20",
    ]
    v = [10.0, 20.0, 30.0, 40.0, 50.0]
    r = resample_timeseries(t, v, freq="monthly", agg="mean")

    assert r["t"] == ["2020-01-01", "2020-02-01"]
    assert r["v"] == [15.0, 40.0]      # (10+20)/2 ; (30+40+50)/3
    assert r["count"] == [2, 3]
    assert r["freq"] == "monthly"
    assert r["agg"] == "mean"
    assert r["n_in"] == 5
    assert r["n_buckets"] == 2
    assert r["downsampled"] is False
    assert r["note"] == ""


def test_agg_sum_y_last():
    t = [
        "2020-01-01", "2020-01-15",
        "2020-02-01", "2020-02-10", "2020-02-20",
    ]
    v = [10.0, 20.0, 30.0, 40.0, 50.0]

    r_sum = resample_timeseries(t, v, freq="monthly", agg="sum")
    assert r_sum["v"] == [30.0, 120.0]
    assert r_sum["agg"] == "sum"

    # last = valor de la observacion cronologicamente mas reciente del bucket,
    # aunque el orden de entrada este desordenado.
    t2 = ["2020-02-20", "2020-02-01", "2020-02-10", "2020-01-15", "2020-01-01"]
    v2 = [50.0, 30.0, 40.0, 20.0, 10.0]
    r_last = resample_timeseries(t2, v2, freq="monthly", agg="last")
    assert r_last["t"] == ["2020-01-01", "2020-02-01"]
    assert r_last["v"] == [20.0, 50.0]   # Jan->2020-01-15=20 ; Feb->2020-02-20=50
    assert r_last["agg"] == "last"


def test_count_cuenta_observacion_con_valor_none():
    # Un bucket con un valor None: count cuenta la fila, v ignora el None.
    t = ["2020-03-05", "2020-03-06", "2020-03-20"]
    v = [None, 7.0, 9.0]
    r = resample_timeseries(t, v, freq="monthly", agg="mean")

    assert r["t"] == ["2020-03-01"]
    assert r["count"] == [3]            # 3 filas con fecha valida
    assert r["v"] == [8.0]             # media de los validos: (7+9)/2
    assert r["n_in"] == 3

    # Bucket entero sin ningun valor numerico valido -> v = None, count sigue.
    r2 = resample_timeseries(
        ["2020-04-01", "2020-04-02"], [None, "n/a"], freq="monthly"
    )
    assert r2["t"] == ["2020-04-01"]
    assert r2["count"] == [2]
    assert r2["v"] == [None]


def test_downsampling_respeta_max_points_y_extremos():
    base = datetime.date(2021, 1, 1)
    t = [(base + datetime.timedelta(days=i)).isoformat() for i in range(500)]
    v = [float(i) for i in range(500)]
    r = resample_timeseries(t, v, freq="daily", agg="mean", max_points=400)

    assert r["n_buckets"] == 500
    assert r["downsampled"] is True
    assert len(r["t"]) <= 400
    assert len(r["t"]) == len(r["v"]) == len(r["count"])
    # Primero y ultimo bucket conservados.
    assert r["t"][0] == "2021-01-01"
    assert r["t"][-1] == (base + datetime.timedelta(days=499)).isoformat()


def test_freq_auto_infiere_mensual():
    # Fechas separadas ~1 mes -> auto infiere "monthly".
    t = [f"2022-{m:02d}-01" for m in range(1, 13)]
    v = [float(m) for m in range(1, 13)]
    r = resample_timeseries(t, v, freq="auto", agg="mean")

    assert r["freq"] == "monthly"
    assert r["n_buckets"] == 12
    assert r["count"] == [1] * 12

    # Fechas diarias consecutivas -> auto infiere "daily".
    base = datetime.date(2023, 1, 1)
    td = [(base + datetime.timedelta(days=i)).isoformat() for i in range(20)]
    rd = resample_timeseries(td, [float(i) for i in range(20)], freq="auto")
    assert rd["freq"] == "daily"


def test_edge_listas_vacias_o_desiguales():
    vacio = resample_timeseries([], [])
    assert vacio["t"] == [] and vacio["v"] == [] and vacio["count"] == []
    assert vacio["note"] == "datos insuficientes"
    assert vacio["n_in"] == 0 and vacio["n_buckets"] == 0

    desigual = resample_timeseries(["2020-01-01", "2020-01-02"], [1.0])
    assert desigual["note"] == "datos insuficientes"
    assert desigual["t"] == []

    # Todas las fechas invalidas -> tambien insuficiente.
    invalidas = resample_timeseries(["no-fecha", "tampoco"], [1.0, 2.0])
    assert invalidas["note"] == "datos insuficientes"
    assert invalidas["n_in"] == 0