chore: auto-commit (26 archivos)
- python/functions/bigquery/bq_auth.md - python/functions/bigquery/bq_load_from_file.md - python/functions/bigquery/bq_load_from_gcs.md - python/functions/bigquery/client.py - python/functions/bigquery/queries.py - python/functions/datascience/__init__.py - python/functions/datascience/decode_qr_image.py - python/functions/datascience/load_bq_table_to_duckdb.md - python/functions/datascience/load_bq_table_to_duckdb.py - python/functions/pipelines/profile_bq_table.md - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,298 @@
|
||||
"""run_sales_forecast — forecast diario de ventas Aurgi a BigQuery (one-shot).
|
||||
|
||||
Pipeline IMPURO que produce el forecast diario de ventas (dia x centro x
|
||||
subcategoria CGQ) y lo escribe en `autingo-159109.sales_forecast.predictions`.
|
||||
Compone funciones del registry sin reimplementar su logica:
|
||||
|
||||
- bq_auth(..., drop_quota_project=True): cliente BigQuery sin quota project ajeno
|
||||
(evita el 403 USER_PROJECT_DENIED del ADC del usuario).
|
||||
- bq_query: lee la historia agregada del mart `bi_ventas_mart.base_margenes_aa`
|
||||
y ejecuta el DELETE de idempotencia (parametros tipados).
|
||||
- forecast_seasonal_median: modelo PURO (mediana estacional + tendencia acotada)
|
||||
que genera todas las predicciones de golpe.
|
||||
- bq_load_from_file: carga las filas (JSONL) a la tabla de predicciones.
|
||||
|
||||
Cron previsto: 21:00. Por eso la historia utilizable llega hasta as_of - 1 dia
|
||||
(el dia as_of aun esta parcial) y se predice as_of + 1 .. as_of + horizon.
|
||||
|
||||
Estilo dict-no-throw: nunca lanza; captura errores y devuelve
|
||||
{status:'error', error, stage}. Idempotente por (run_date, model, author):
|
||||
borra las predicciones previas de esa combinacion antes de cargar.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from bigquery import bq_auth, bq_query, bq_load_from_file
|
||||
from datascience import forecast_seasonal_median
|
||||
|
||||
PROJECT_ID = "autingo-159109"
|
||||
SOURCE_TABLE = "autingo-159109.bi_ventas_mart.base_margenes_aa"
|
||||
DEST_DATASET = "sales_forecast"
|
||||
DEST_TABLE = "predictions"
|
||||
|
||||
HISTORY_SQL = f"""
|
||||
SELECT fecha, idCentro, subcat_cqq,
|
||||
ANY_VALUE(NombreCentro) AS center_name, ANY_VALUE(Ambito) AS ambito,
|
||||
SUM(CAST(venta_n AS FLOAT64)) AS venta
|
||||
FROM `{SOURCE_TABLE}`
|
||||
WHERE fecha BETWEEN DATE_SUB(@as_of, INTERVAL 18 WEEK) AND DATE_SUB(@as_of, INTERVAL 1 DAY)
|
||||
AND venta_n IS NOT NULL AND ABS(CAST(venta_n AS FLOAT64)) < 1e9
|
||||
AND subcat_cqq IS NOT NULL AND idCentro IS NOT NULL
|
||||
GROUP BY fecha, idCentro, subcat_cqq
|
||||
"""
|
||||
|
||||
DELETE_SQL = (
|
||||
f"DELETE FROM `{PROJECT_ID}.{DEST_DATASET}.{DEST_TABLE}` "
|
||||
"WHERE run_date = @d AND model = @m AND author = @a"
|
||||
)
|
||||
|
||||
# Refresh de la tabla fisica de reales (sales_forecast.actuals_daily), consumida
|
||||
# por la vista forecast_eval y por los dashboards de competicion. Ventana movil
|
||||
# para recoger correcciones retroactivas del mart.
|
||||
ACTUALS_DELETE_SQL = (
|
||||
f"DELETE FROM `{PROJECT_ID}.{DEST_DATASET}.actuals_daily` "
|
||||
"WHERE fecha BETWEEN DATE_SUB(@as_of, INTERVAL @w DAY) AND DATE_SUB(@as_of, INTERVAL 1 DAY)"
|
||||
)
|
||||
|
||||
ACTUALS_INSERT_SQL = f"""
|
||||
INSERT INTO `{PROJECT_ID}.{DEST_DATASET}.actuals_daily`
|
||||
(fecha, center_id, center_name, ambito, subcat_cgq, y_real, unidades, loaded_ts)
|
||||
SELECT forecast_date, IFNULL(center_id, 'SIN_CENTRO'), center_name, ambito,
|
||||
IFNULL(subcat_cgq, 'Sin subcategoria'),
|
||||
y_real, unidades, CURRENT_TIMESTAMP()
|
||||
FROM `{PROJECT_ID}.{DEST_DATASET}.actuals`
|
||||
WHERE forecast_date BETWEEN DATE_SUB(@as_of, INTERVAL @w DAY) AND DATE_SUB(@as_of, INTERVAL 1 DAY)
|
||||
"""
|
||||
|
||||
|
||||
def _refresh_actuals(client, as_of: date, window_days: int = 10) -> None:
|
||||
"""Rehace los ultimos `window_days` dias de actuals_daily desde la vista actuals."""
|
||||
params = [
|
||||
{"name": "as_of", "type": "DATE", "value": as_of},
|
||||
{"name": "w", "type": "INT64", "value": window_days},
|
||||
]
|
||||
bq_query(client, ACTUALS_DELETE_SQL, params=params)
|
||||
bq_query(client, ACTUALS_INSERT_SQL, params=params)
|
||||
|
||||
|
||||
def _as_date(value) -> date:
|
||||
if isinstance(value, date) and not isinstance(value, datetime):
|
||||
return value
|
||||
if isinstance(value, datetime):
|
||||
return value.date()
|
||||
return datetime.strptime(str(value)[:10], "%Y-%m-%d").date()
|
||||
|
||||
|
||||
def run_sales_forecast(
|
||||
as_of: str = "",
|
||||
horizon: int = 7,
|
||||
model: str = "baseline_v1",
|
||||
author: str = "egutierrez",
|
||||
dry_run: bool = False,
|
||||
) -> dict:
|
||||
"""Genera el forecast diario de ventas y lo escribe en BigQuery.
|
||||
|
||||
Args:
|
||||
as_of: fecha de corte 'YYYY-MM-DD' (dia de la corrida). Vacio = hoy. La
|
||||
historia utilizable llega hasta as_of - 1 dia; se predice
|
||||
as_of + 1 .. as_of + horizon. run_date = as_of.
|
||||
horizon: numero de dias futuros a predecir. Default 7.
|
||||
model: etiqueta del modelo escrita en cada fila (columna model). Default
|
||||
'baseline_v1'.
|
||||
author: autor de la corrida (columna author). Default 'egutierrez'.
|
||||
dry_run: si True no escribe en BigQuery; devuelve el resumen + una muestra
|
||||
de filas.
|
||||
|
||||
Returns:
|
||||
dict dict-no-throw. En exito {status:'ok', run_date, series, rows, model,
|
||||
author, (sample si dry_run)}. En error {status:'error', error, stage}.
|
||||
"""
|
||||
try:
|
||||
run_d = _as_date(as_of) if as_of else date.today()
|
||||
# Ultimo dia de historia utilizable (inclusive): as_of - 1 dia.
|
||||
hist_as_of = run_d - timedelta(days=1)
|
||||
horizon_dates = [
|
||||
(run_d + timedelta(days=k)).isoformat() for k in range(1, horizon + 1)
|
||||
]
|
||||
|
||||
# 1) Cliente BigQuery sin quota project (evita 403 USER_PROJECT_DENIED).
|
||||
client = bq_auth(PROJECT_ID, drop_quota_project=True)
|
||||
|
||||
# 2) Historia agregada del mart (hasta run_d - 1 via el WHERE de la query).
|
||||
q = bq_query(
|
||||
client,
|
||||
HISTORY_SQL,
|
||||
params=[{"name": "as_of", "type": "DATE", "value": run_d}],
|
||||
)
|
||||
cols = {name: i for i, name in enumerate(q["columns"])}
|
||||
|
||||
# Historia por serie + ultimos center_name/ambito conocidos + venta 8 semanas.
|
||||
history = []
|
||||
last_meta = {} # series_id -> (max_date, center_name, ambito, center_id, subcat)
|
||||
recent_sum = {} # series_id -> venta acumulada en las ultimas 8 semanas
|
||||
active_cutoff = hist_as_of - timedelta(weeks=8)
|
||||
for row in q["rows"]:
|
||||
fecha = _as_date(row[cols["fecha"]])
|
||||
center_id = str(row[cols["idCentro"]])
|
||||
subcat = row[cols["subcat_cqq"]]
|
||||
center_name = row[cols["center_name"]]
|
||||
ambito = row[cols["ambito"]]
|
||||
venta = float(row[cols["venta"]] or 0.0)
|
||||
series_id = f"{center_id}|{subcat}"
|
||||
|
||||
history.append(
|
||||
{"series_id": series_id, "date": fecha.isoformat(), "value": venta}
|
||||
)
|
||||
prev = last_meta.get(series_id)
|
||||
if prev is None or fecha > prev[0]:
|
||||
last_meta[series_id] = (fecha, center_name, ambito, center_id, subcat)
|
||||
if fecha > active_cutoff:
|
||||
recent_sum[series_id] = recent_sum.get(series_id, 0.0) + venta
|
||||
|
||||
# 3) Series activas: venta > 0 en las ultimas 8 semanas.
|
||||
active = {sid for sid, s in recent_sum.items() if s > 0.0}
|
||||
history = [h for h in history if h["series_id"] in active]
|
||||
|
||||
if not history:
|
||||
result = {
|
||||
"status": "ok",
|
||||
"run_date": run_d.isoformat(),
|
||||
"series": 0,
|
||||
"rows": 0,
|
||||
"model": model,
|
||||
"author": author,
|
||||
}
|
||||
if dry_run:
|
||||
result["sample"] = []
|
||||
return result
|
||||
|
||||
# 4) Modelo puro: todas las predicciones de golpe.
|
||||
preds = forecast_seasonal_median(
|
||||
history, horizon_dates, as_of=hist_as_of.isoformat()
|
||||
)
|
||||
|
||||
# 5) Filas para la tabla de predicciones.
|
||||
run_ts = datetime.now(timezone.utc).isoformat()
|
||||
rows_out = []
|
||||
for p in preds:
|
||||
sid = p["series_id"]
|
||||
meta = last_meta.get(sid)
|
||||
_, center_name, ambito, center_id, subcat = meta
|
||||
forecast_date = _as_date(p["date"])
|
||||
rows_out.append(
|
||||
{
|
||||
"run_ts": run_ts,
|
||||
"run_date": run_d.isoformat(),
|
||||
"forecast_date": forecast_date.isoformat(),
|
||||
"lag_days": (forecast_date - run_d).days,
|
||||
"center_id": center_id,
|
||||
"center_name": center_name,
|
||||
"ambito": ambito,
|
||||
"subcat_cgq": subcat,
|
||||
"model": model,
|
||||
"author": author,
|
||||
"y_pred": round(float(p["y_pred"]), 4),
|
||||
}
|
||||
)
|
||||
|
||||
summary = {
|
||||
"status": "ok",
|
||||
"run_date": run_d.isoformat(),
|
||||
"series": len(active),
|
||||
"rows": len(rows_out),
|
||||
"model": model,
|
||||
"author": author,
|
||||
}
|
||||
|
||||
# 6) dry-run: no escribe; devuelve resumen + muestra.
|
||||
if dry_run:
|
||||
summary["sample"] = rows_out[:5]
|
||||
return summary
|
||||
|
||||
# 7) Idempotencia: borra las predicciones previas de (run_date, model, author).
|
||||
bq_query(
|
||||
client,
|
||||
DELETE_SQL,
|
||||
params=[
|
||||
{"name": "d", "type": "DATE", "value": run_d},
|
||||
{"name": "m", "type": "STRING", "value": model},
|
||||
{"name": "a", "type": "STRING", "value": author},
|
||||
],
|
||||
)
|
||||
|
||||
# 8) Carga JSONL a la tabla (WRITE_APPEND, schema fijo de la tabla).
|
||||
tmp_path = None
|
||||
try:
|
||||
fd, tmp_path = tempfile.mkstemp(prefix="sales_forecast_", suffix=".jsonl")
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
||||
for r in rows_out:
|
||||
fh.write(json.dumps(r, ensure_ascii=False) + "\n")
|
||||
load = bq_load_from_file(
|
||||
client,
|
||||
tmp_path,
|
||||
DEST_DATASET,
|
||||
DEST_TABLE,
|
||||
source_format="NEWLINE_DELIMITED_JSON",
|
||||
write_disposition="WRITE_APPEND",
|
||||
autodetect=False,
|
||||
)
|
||||
finally:
|
||||
if tmp_path and os.path.exists(tmp_path):
|
||||
os.remove(tmp_path)
|
||||
|
||||
if load.get("status") != "DONE":
|
||||
return {
|
||||
"status": "error",
|
||||
"error": f"load job no termino DONE: {load}",
|
||||
"stage": "load",
|
||||
}
|
||||
|
||||
summary["rows_loaded"] = load.get("rows_loaded")
|
||||
summary["job_id"] = load.get("job_id")
|
||||
|
||||
# 9) Refresca la tabla fisica de reales (ventana movil de 10 dias) para
|
||||
# que forecast_eval y el dashboard de competicion comparen contra el
|
||||
# ultimo estado del mart.
|
||||
try:
|
||||
_refresh_actuals(client, run_d)
|
||||
summary["actuals_refreshed"] = True
|
||||
except Exception as e: # noqa: BLE001
|
||||
# No invalida las predicciones ya cargadas: se reporta y se sigue.
|
||||
summary["actuals_refreshed"] = False
|
||||
summary["actuals_error"] = str(e)
|
||||
|
||||
return summary
|
||||
except Exception as e: # noqa: BLE001
|
||||
return {"status": "error", "error": str(e), "stage": "unexpected"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Forecast diario de ventas Aurgi -> BigQuery sales_forecast.predictions."
|
||||
)
|
||||
parser.add_argument("--as-of", default="", help="Fecha de corte YYYY-MM-DD (vacio = hoy).")
|
||||
parser.add_argument("--horizon", type=int, default=7, help="Dias a predecir. Default 7.")
|
||||
parser.add_argument("--model", default="baseline_v1", help="Etiqueta del modelo.")
|
||||
parser.add_argument("--author", default="egutierrez", help="Autor de la corrida.")
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true", help="No escribe en BigQuery; imprime muestra."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
out = run_sales_forecast(
|
||||
as_of=args.as_of,
|
||||
horizon=args.horizon,
|
||||
model=args.model,
|
||||
author=args.author,
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
print(json.dumps(out, ensure_ascii=False, default=str))
|
||||
sys.exit(0 if out.get("status") == "ok" else 1)
|
||||
Reference in New Issue
Block a user