Files
fn_registry/python/functions/datascience/detect_time_column_test.py
T
egutierrez a69d14d38e feat(eda): capítulo TIMESERIES del AutomaticEDA (evolución + análisis de serie)
Capítulo nuevo build_timeseries(profile, ctx) -> Chapter|None del motor
AutomaticEDA. Cuando la tabla tiene columna de fecha/datetime, grafica la
evolución de cada columna numérica por periodo (valor agregado + conteo de filas)
y los paneles de descomposición STL y autocorrelación (ACF), con el análisis de
la serie: estacionariedad (ADF+KPSS), autocorrelación (Ljung-Box), fuerzas de
tendencia/estacionalidad (Hyndman) y la transformación sugerida (retornos o
diferencias) para evitar correlaciones espurias. Sin columna temporal devuelve
None. Consolida series OHLC casi idénticas en un único gráfico conservando el
análisis de cada columna.

La serie cruda llega por ctx['timeseries_raw'] (mismo patrón que modelos con
raw_numeric); las figuras son perezosas (Figure.make) y el paginador del núcleo
garantiza no-corte en PDF y PPTX. CHAPTER_VERSION 1.0.0.

Cubre los MUST del diseño (report 2043): MUST-9.1 (línea valor-vs-tiempo + conteo
por periodo), MUST-9.2 (paneles STL + ACF), MUST-9.3 (perfil datetime +
consolidación OHLC).

Funciones nuevas del registry (grupo eda), delegadas a fn-constructor, no inline:
- detect_time_column (pure): detecta la columna temporal y las numéricas
- profile_datetime (pure): rango/frecuencia/regularidad/huecos de la fecha
- resample_timeseries (pure): agrega la serie por periodo + conteo
- extract_timeseries_raw (impure): lee la serie cruda ordenada de DuckDB/PG

Verificación: 69 tests verdes (capítulo 9 + funciones 28 + núcleo/renderers);
golden real sobre seattle-weather (estacional) y aapl (OHLC) con PDF+PPTX sin
cortar nada (cols_cortadas=[]).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 15:35:42 +02:00

103 lines
3.6 KiB
Python

"""Tests para detect_time_column (grupo eda). Self-contained, sin DuckDB."""
from detect_time_column import detect_time_column
def test_golden_datetime_y_numericas():
columns = [
{"name": "fecha", "inferred_type": "datetime", "semantic_type": "datetime_iso"},
{"name": "ventas", "inferred_type": "numeric"},
{"name": "unidades", "inferred_type": "numeric"},
{"name": "region", "inferred_type": "text"},
]
res = detect_time_column(columns)
assert res["time_col"] == "fecha"
assert res["time_semantic"] == "datetime_iso"
assert res["numeric_cols"] == ["ventas", "unidades"]
assert res["n_datetime_cols"] == 1
assert res["datetime_cols"] == ["fecha"]
assert isinstance(res["reason"], str) and res["reason"]
def test_deteccion_por_semantic_type_date_eu():
# inferred_type no es datetime, pero semantic_type date_eu => temporal.
columns = [
{"name": "id", "inferred_type": "numeric"},
{"name": "dia", "inferred_type": "text", "semantic_type": "date_eu"},
{"name": "importe", "inferred_type": "numeric"},
]
res = detect_time_column(columns)
assert res["time_col"] == "dia"
assert res["time_semantic"] == "date_eu"
assert res["numeric_cols"] == ["id", "importe"]
assert res["n_datetime_cols"] == 1
assert res["datetime_cols"] == ["dia"]
def test_sin_columna_temporal():
columns = [
{"name": "id", "inferred_type": "numeric"},
{"name": "nombre", "inferred_type": "text"},
{"name": "activo", "inferred_type": "boolean"},
]
res = detect_time_column(columns)
assert res["time_col"] is None
assert res["time_semantic"] == ""
assert res["numeric_cols"] == ["id"]
assert res["n_datetime_cols"] == 0
assert res["datetime_cols"] == []
assert res["reason"] == "no se detecto columna de fecha/datetime"
def test_columns_none_no_revienta():
res = detect_time_column(None)
assert res["time_col"] is None
assert res["time_semantic"] == ""
assert res["numeric_cols"] == []
assert res["n_datetime_cols"] == 0
assert res["datetime_cols"] == []
assert res["reason"] == "no se detecto columna de fecha/datetime"
def test_columns_vacia_no_revienta():
res = detect_time_column([])
assert res["time_col"] is None
assert res["numeric_cols"] == []
assert res["n_datetime_cols"] == 0
def test_columns_no_lista_no_revienta():
# Un dict (no lista) tambien debe caer en el caso "no aplica".
res = detect_time_column({"name": "fecha", "inferred_type": "datetime"})
assert res["time_col"] is None
assert res["numeric_cols"] == []
def test_elementos_basura_se_ignoran():
columns = [
None,
"no soy un dict",
42,
{"name": "ts", "inferred_type": "datetime"},
{"name": "valor", "inferred_type": "numeric"},
]
res = detect_time_column(columns)
assert res["time_col"] == "ts"
assert res["numeric_cols"] == ["valor"]
assert res["n_datetime_cols"] == 1
def test_varias_datetime_elige_la_primera():
columns = [
{"name": "created_at", "inferred_type": "datetime", "semantic_type": "datetime_iso"},
{"name": "metric", "inferred_type": "numeric"},
{"name": "updated_at", "inferred_type": "datetime", "semantic_type": "datetime_iso"},
{"name": "fecha_baja", "inferred_type": "text", "semantic_type": "date_eu"},
]
res = detect_time_column(columns)
assert res["time_col"] == "created_at"
assert res["time_semantic"] == "datetime_iso"
assert res["n_datetime_cols"] == 3
assert res["datetime_cols"] == ["created_at", "updated_at", "fecha_baja"]
assert res["numeric_cols"] == ["metric"]