"""Tests para resample_timeseries (grupo eda).""" import datetime import os import sys sys.path.insert(0, os.path.dirname(__file__)) from resample_timeseries import resample_timeseries def test_daily_a_mensual_mean(): # Serie diaria agregada a buckets mensuales con agg="mean". t = [ "2020-01-01", "2020-01-15", "2020-02-01", "2020-02-10", "2020-02-20", ] v = [10.0, 20.0, 30.0, 40.0, 50.0] r = resample_timeseries(t, v, freq="monthly", agg="mean") assert r["t"] == ["2020-01-01", "2020-02-01"] assert r["v"] == [15.0, 40.0] # (10+20)/2 ; (30+40+50)/3 assert r["count"] == [2, 3] assert r["freq"] == "monthly" assert r["agg"] == "mean" assert r["n_in"] == 5 assert r["n_buckets"] == 2 assert r["downsampled"] is False assert r["note"] == "" def test_agg_sum_y_last(): t = [ "2020-01-01", "2020-01-15", "2020-02-01", "2020-02-10", "2020-02-20", ] v = [10.0, 20.0, 30.0, 40.0, 50.0] r_sum = resample_timeseries(t, v, freq="monthly", agg="sum") assert r_sum["v"] == [30.0, 120.0] assert r_sum["agg"] == "sum" # last = valor de la observacion cronologicamente mas reciente del bucket, # aunque el orden de entrada este desordenado. t2 = ["2020-02-20", "2020-02-01", "2020-02-10", "2020-01-15", "2020-01-01"] v2 = [50.0, 30.0, 40.0, 20.0, 10.0] r_last = resample_timeseries(t2, v2, freq="monthly", agg="last") assert r_last["t"] == ["2020-01-01", "2020-02-01"] assert r_last["v"] == [20.0, 50.0] # Jan->2020-01-15=20 ; Feb->2020-02-20=50 assert r_last["agg"] == "last" def test_count_cuenta_observacion_con_valor_none(): # Un bucket con un valor None: count cuenta la fila, v ignora el None. t = ["2020-03-05", "2020-03-06", "2020-03-20"] v = [None, 7.0, 9.0] r = resample_timeseries(t, v, freq="monthly", agg="mean") assert r["t"] == ["2020-03-01"] assert r["count"] == [3] # 3 filas con fecha valida assert r["v"] == [8.0] # media de los validos: (7+9)/2 assert r["n_in"] == 3 # Bucket entero sin ningun valor numerico valido -> v = None, count sigue. r2 = resample_timeseries( ["2020-04-01", "2020-04-02"], [None, "n/a"], freq="monthly" ) assert r2["t"] == ["2020-04-01"] assert r2["count"] == [2] assert r2["v"] == [None] def test_downsampling_respeta_max_points_y_extremos(): base = datetime.date(2021, 1, 1) t = [(base + datetime.timedelta(days=i)).isoformat() for i in range(500)] v = [float(i) for i in range(500)] r = resample_timeseries(t, v, freq="daily", agg="mean", max_points=400) assert r["n_buckets"] == 500 assert r["downsampled"] is True assert len(r["t"]) <= 400 assert len(r["t"]) == len(r["v"]) == len(r["count"]) # Primero y ultimo bucket conservados. assert r["t"][0] == "2021-01-01" assert r["t"][-1] == (base + datetime.timedelta(days=499)).isoformat() def test_freq_auto_infiere_mensual(): # Fechas separadas ~1 mes -> auto infiere "monthly". t = [f"2022-{m:02d}-01" for m in range(1, 13)] v = [float(m) for m in range(1, 13)] r = resample_timeseries(t, v, freq="auto", agg="mean") assert r["freq"] == "monthly" assert r["n_buckets"] == 12 assert r["count"] == [1] * 12 # Fechas diarias consecutivas -> auto infiere "daily". base = datetime.date(2023, 1, 1) td = [(base + datetime.timedelta(days=i)).isoformat() for i in range(20)] rd = resample_timeseries(td, [float(i) for i in range(20)], freq="auto") assert rd["freq"] == "daily" def test_edge_listas_vacias_o_desiguales(): vacio = resample_timeseries([], []) assert vacio["t"] == [] and vacio["v"] == [] and vacio["count"] == [] assert vacio["note"] == "datos insuficientes" assert vacio["n_in"] == 0 and vacio["n_buckets"] == 0 desigual = resample_timeseries(["2020-01-01", "2020-01-02"], [1.0]) assert desigual["note"] == "datos insuficientes" assert desigual["t"] == [] # Todas las fechas invalidas -> tambien insuficiente. invalidas = resample_timeseries(["no-fecha", "tampoco"], [1.0, 2.0]) assert invalidas["note"] == "datos insuficientes" assert invalidas["n_in"] == 0