a69d14d38e
Capítulo nuevo build_timeseries(profile, ctx) -> Chapter|None del motor AutomaticEDA. Cuando la tabla tiene columna de fecha/datetime, grafica la evolución de cada columna numérica por periodo (valor agregado + conteo de filas) y los paneles de descomposición STL y autocorrelación (ACF), con el análisis de la serie: estacionariedad (ADF+KPSS), autocorrelación (Ljung-Box), fuerzas de tendencia/estacionalidad (Hyndman) y la transformación sugerida (retornos o diferencias) para evitar correlaciones espurias. Sin columna temporal devuelve None. Consolida series OHLC casi idénticas en un único gráfico conservando el análisis de cada columna. La serie cruda llega por ctx['timeseries_raw'] (mismo patrón que modelos con raw_numeric); las figuras son perezosas (Figure.make) y el paginador del núcleo garantiza no-corte en PDF y PPTX. CHAPTER_VERSION 1.0.0. Cubre los MUST del diseño (report 2043): MUST-9.1 (línea valor-vs-tiempo + conteo por periodo), MUST-9.2 (paneles STL + ACF), MUST-9.3 (perfil datetime + consolidación OHLC). Funciones nuevas del registry (grupo eda), delegadas a fn-constructor, no inline: - detect_time_column (pure): detecta la columna temporal y las numéricas - profile_datetime (pure): rango/frecuencia/regularidad/huecos de la fecha - resample_timeseries (pure): agrega la serie por periodo + conteo - extract_timeseries_raw (impure): lee la serie cruda ordenada de DuckDB/PG Verificación: 69 tests verdes (capítulo 9 + funciones 28 + núcleo/renderers); golden real sobre seattle-weather (estacional) y aapl (OHLC) con PDF+PPTX sin cortar nada (cols_cortadas=[]). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
128 lines
3.8 KiB
Python
128 lines
3.8 KiB
Python
"""Tests para profile_datetime."""
|
|
|
|
from datetime import date, datetime, timedelta
|
|
|
|
from profile_datetime import profile_datetime
|
|
|
|
|
|
def test_serie_diaria_regular_golden():
|
|
# 30 dias consecutivos: frecuencia diaria, regular, sin huecos.
|
|
fechas = [date(2021, 1, 1) + timedelta(days=i) for i in range(30)]
|
|
res = profile_datetime(fechas)
|
|
assert res["n"] == 30
|
|
assert res["n_distinct"] == 30
|
|
assert res["min"] == "2021-01-01"
|
|
assert res["max"] == "2021-01-30"
|
|
assert res["span_days"] == 29.0
|
|
assert res["freq"] == "daily"
|
|
assert res["is_regular"] is True
|
|
assert res["n_gaps"] == 0
|
|
assert res["median_step_days"] == 1.0
|
|
assert res["note"] == ""
|
|
|
|
|
|
def test_serie_mensual_freq_monthly():
|
|
# Primero de mes durante 14 meses: paso mediano ~30/31 dias -> monthly.
|
|
fechas = []
|
|
y, m = 2021, 1
|
|
for _ in range(14):
|
|
fechas.append(date(y, m, 1))
|
|
m += 1
|
|
if m > 12:
|
|
m = 1
|
|
y += 1
|
|
res = profile_datetime(fechas)
|
|
assert res["n"] == 14
|
|
assert res["freq"] == "monthly"
|
|
assert res["min"] == "2021-01-01"
|
|
assert res["max"] == "2022-02-01"
|
|
assert 28.0 <= res["median_step_days"] <= 31.0
|
|
|
|
|
|
def test_serie_con_hueco_cuenta_gaps():
|
|
# Serie diaria con un hueco de 3 dias (faltan i=7,8,9) -> n_gaps >= 1.
|
|
fechas = [
|
|
date(2021, 1, 1) + timedelta(days=i)
|
|
for i in range(20)
|
|
if i not in (7, 8, 9)
|
|
]
|
|
res = profile_datetime(fechas)
|
|
assert res["freq"] == "daily"
|
|
assert res["n_gaps"] >= 1
|
|
assert res["median_step_days"] == 1.0
|
|
|
|
|
|
def test_strings_iso_mezclados_con_datetime():
|
|
# Mezcla de strings ISO (varios formatos) y objetos datetime/date.
|
|
valores = [
|
|
"2021-06-28",
|
|
datetime(2021, 6, 29, 12, 0, 0),
|
|
"2021-06-30T00:00:00",
|
|
date(2021, 7, 1),
|
|
]
|
|
res = profile_datetime(valores)
|
|
assert res["n"] == 4
|
|
assert res["n_distinct"] == 4
|
|
assert res["min"] == "2021-06-28"
|
|
assert res["max"] == "2021-07-01"
|
|
assert res["freq"] == "daily"
|
|
assert res["note"] == ""
|
|
|
|
|
|
def test_lista_vacia_y_none_devuelve_unknown():
|
|
for entrada in ([], None):
|
|
res = profile_datetime(entrada)
|
|
assert res["n"] == 0
|
|
assert res["n_distinct"] == 0
|
|
assert res["min"] is None
|
|
assert res["max"] is None
|
|
assert res["span_days"] is None
|
|
assert res["freq"] == "unknown"
|
|
assert res["is_regular"] is False
|
|
assert res["n_gaps"] == 0
|
|
assert res["median_step_days"] is None
|
|
assert res["note"] == "datos insuficientes"
|
|
|
|
|
|
def test_valores_no_parseables_ignorados():
|
|
# Strings basura, None, ints y un date valido mezclados: ignora lo no fecha.
|
|
valores = [
|
|
"no es una fecha",
|
|
None,
|
|
"2021-01-01",
|
|
"2021-01-02",
|
|
12345,
|
|
"tampoco",
|
|
date(2021, 1, 3),
|
|
"",
|
|
]
|
|
res = profile_datetime(valores)
|
|
assert res["n"] == 3 # solo 3 fechas parseables
|
|
assert res["n_distinct"] == 3
|
|
assert res["freq"] == "daily"
|
|
assert res["min"] == "2021-01-01"
|
|
assert res["max"] == "2021-01-03"
|
|
|
|
|
|
def test_span_days_correcto():
|
|
# Dos fechas a un anio de distancia: span 365 dias -> yearly.
|
|
res = profile_datetime([date(2020, 1, 1), date(2020, 12, 31)])
|
|
assert res["n"] == 2
|
|
assert res["n_distinct"] == 2
|
|
assert res["span_days"] == 365.0
|
|
assert res["median_step_days"] == 365.0
|
|
assert res["freq"] == "yearly"
|
|
|
|
|
|
def test_una_sola_fecha_es_coherente():
|
|
# Un unico valor: min == max, span 0, freq unknown, nota datos insuficientes.
|
|
res = profile_datetime(["2021-06-28"])
|
|
assert res["n"] == 1
|
|
assert res["n_distinct"] == 1
|
|
assert res["min"] == "2021-06-28"
|
|
assert res["max"] == "2021-06-28"
|
|
assert res["span_days"] == 0.0
|
|
assert res["freq"] == "unknown"
|
|
assert res["median_step_days"] is None
|
|
assert res["note"] == "datos insuficientes"
|