a69d14d38e
Capítulo nuevo build_timeseries(profile, ctx) -> Chapter|None del motor AutomaticEDA. Cuando la tabla tiene columna de fecha/datetime, grafica la evolución de cada columna numérica por periodo (valor agregado + conteo de filas) y los paneles de descomposición STL y autocorrelación (ACF), con el análisis de la serie: estacionariedad (ADF+KPSS), autocorrelación (Ljung-Box), fuerzas de tendencia/estacionalidad (Hyndman) y la transformación sugerida (retornos o diferencias) para evitar correlaciones espurias. Sin columna temporal devuelve None. Consolida series OHLC casi idénticas en un único gráfico conservando el análisis de cada columna. La serie cruda llega por ctx['timeseries_raw'] (mismo patrón que modelos con raw_numeric); las figuras son perezosas (Figure.make) y el paginador del núcleo garantiza no-corte en PDF y PPTX. CHAPTER_VERSION 1.0.0. Cubre los MUST del diseño (report 2043): MUST-9.1 (línea valor-vs-tiempo + conteo por periodo), MUST-9.2 (paneles STL + ACF), MUST-9.3 (perfil datetime + consolidación OHLC). Funciones nuevas del registry (grupo eda), delegadas a fn-constructor, no inline: - detect_time_column (pure): detecta la columna temporal y las numéricas - profile_datetime (pure): rango/frecuencia/regularidad/huecos de la fecha - resample_timeseries (pure): agrega la serie por periodo + conteo - extract_timeseries_raw (impure): lee la serie cruda ordenada de DuckDB/PG Verificación: 69 tests verdes (capítulo 9 + funciones 28 + núcleo/renderers); golden real sobre seattle-weather (estacional) y aapl (OHLC) con PDF+PPTX sin cortar nada (cols_cortadas=[]). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
103 lines
3.6 KiB
Python
103 lines
3.6 KiB
Python
"""Tests para detect_time_column (grupo eda). Self-contained, sin DuckDB."""
|
|
|
|
from detect_time_column import detect_time_column
|
|
|
|
|
|
def test_golden_datetime_y_numericas():
|
|
columns = [
|
|
{"name": "fecha", "inferred_type": "datetime", "semantic_type": "datetime_iso"},
|
|
{"name": "ventas", "inferred_type": "numeric"},
|
|
{"name": "unidades", "inferred_type": "numeric"},
|
|
{"name": "region", "inferred_type": "text"},
|
|
]
|
|
res = detect_time_column(columns)
|
|
assert res["time_col"] == "fecha"
|
|
assert res["time_semantic"] == "datetime_iso"
|
|
assert res["numeric_cols"] == ["ventas", "unidades"]
|
|
assert res["n_datetime_cols"] == 1
|
|
assert res["datetime_cols"] == ["fecha"]
|
|
assert isinstance(res["reason"], str) and res["reason"]
|
|
|
|
|
|
def test_deteccion_por_semantic_type_date_eu():
|
|
# inferred_type no es datetime, pero semantic_type date_eu => temporal.
|
|
columns = [
|
|
{"name": "id", "inferred_type": "numeric"},
|
|
{"name": "dia", "inferred_type": "text", "semantic_type": "date_eu"},
|
|
{"name": "importe", "inferred_type": "numeric"},
|
|
]
|
|
res = detect_time_column(columns)
|
|
assert res["time_col"] == "dia"
|
|
assert res["time_semantic"] == "date_eu"
|
|
assert res["numeric_cols"] == ["id", "importe"]
|
|
assert res["n_datetime_cols"] == 1
|
|
assert res["datetime_cols"] == ["dia"]
|
|
|
|
|
|
def test_sin_columna_temporal():
|
|
columns = [
|
|
{"name": "id", "inferred_type": "numeric"},
|
|
{"name": "nombre", "inferred_type": "text"},
|
|
{"name": "activo", "inferred_type": "boolean"},
|
|
]
|
|
res = detect_time_column(columns)
|
|
assert res["time_col"] is None
|
|
assert res["time_semantic"] == ""
|
|
assert res["numeric_cols"] == ["id"]
|
|
assert res["n_datetime_cols"] == 0
|
|
assert res["datetime_cols"] == []
|
|
assert res["reason"] == "no se detecto columna de fecha/datetime"
|
|
|
|
|
|
def test_columns_none_no_revienta():
|
|
res = detect_time_column(None)
|
|
assert res["time_col"] is None
|
|
assert res["time_semantic"] == ""
|
|
assert res["numeric_cols"] == []
|
|
assert res["n_datetime_cols"] == 0
|
|
assert res["datetime_cols"] == []
|
|
assert res["reason"] == "no se detecto columna de fecha/datetime"
|
|
|
|
|
|
def test_columns_vacia_no_revienta():
|
|
res = detect_time_column([])
|
|
assert res["time_col"] is None
|
|
assert res["numeric_cols"] == []
|
|
assert res["n_datetime_cols"] == 0
|
|
|
|
|
|
def test_columns_no_lista_no_revienta():
|
|
# Un dict (no lista) tambien debe caer en el caso "no aplica".
|
|
res = detect_time_column({"name": "fecha", "inferred_type": "datetime"})
|
|
assert res["time_col"] is None
|
|
assert res["numeric_cols"] == []
|
|
|
|
|
|
def test_elementos_basura_se_ignoran():
|
|
columns = [
|
|
None,
|
|
"no soy un dict",
|
|
42,
|
|
{"name": "ts", "inferred_type": "datetime"},
|
|
{"name": "valor", "inferred_type": "numeric"},
|
|
]
|
|
res = detect_time_column(columns)
|
|
assert res["time_col"] == "ts"
|
|
assert res["numeric_cols"] == ["valor"]
|
|
assert res["n_datetime_cols"] == 1
|
|
|
|
|
|
def test_varias_datetime_elige_la_primera():
|
|
columns = [
|
|
{"name": "created_at", "inferred_type": "datetime", "semantic_type": "datetime_iso"},
|
|
{"name": "metric", "inferred_type": "numeric"},
|
|
{"name": "updated_at", "inferred_type": "datetime", "semantic_type": "datetime_iso"},
|
|
{"name": "fecha_baja", "inferred_type": "text", "semantic_type": "date_eu"},
|
|
]
|
|
res = detect_time_column(columns)
|
|
assert res["time_col"] == "created_at"
|
|
assert res["time_semantic"] == "datetime_iso"
|
|
assert res["n_datetime_cols"] == 3
|
|
assert res["datetime_cols"] == ["created_at", "updated_at", "fecha_baja"]
|
|
assert res["numeric_cols"] == ["metric"]
|