Files
fn_registry/apps/auto_metabase/sync_push.py
T
egutierrez 310b409ae0 feat(auto_metabase): push-all + describe/sql + auto-inject de dashcards
- push_all(): pushea todos los YAMLs de un proyecto (cards primero,
  dashboards despues), solo CREATE/UPDATE, resiliente a fallos por item
- explore.py: comandos describe (schema de DB) y sql (query ad-hoc con
  limite, cap 5MB, bloqueo de escrituras destructivas)
- payload.py: auto-inyecta id:-N, visualization_settings:{} y
  parameter_mappings:[] en dashcards nuevas para evitar 500 en push
- test_local: 11 cards + 3 dashboards sobre Sample Database de Metabase
- registry.db regenerado con auto_metabase_py_analytics indexada

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-13 13:14:05 +02:00

407 lines
15 KiB
Python

"""Push per-item: aplica UN cambio a Metabase. Implementa las 20 reglas duras.
Resumen de las reglas que este modulo garantiza:
- R1: target unico (validado por argparse en main.py).
- R2: 1 sola request HTTP por invocacion en la fase de apply
(excepcion: dashboards nuevos con dashcards = POST + PUT, documentado
en metabase_create_dashboard_raw).
- R3: push de dashboard NO toca cards. Solo dashcards refs + layout + meta.
- R4: push de card NO toca dashboards.
- R5: dry-run por defecto. --apply requerido para enviar.
- R6: backup obligatorio antes de UPDATE (no en CREATE).
- R7: payload se construye solo desde el YAML del item.
- R8: payload PUT/POST contiene solo lo del YAML, sin merge con remoto.
- R9: _meta.kind/slug deben coincidir con args (validado en validate_one).
- R10: _refs deben resolver a ids del index (validado en validate_one).
- R11: _meta.id debe coincidir con index (validado en validate_one).
- R12: cap de tamano de payload — pide confirmacion si supera 100KB.
- R13: log de cada push en state/push.log (jsonl).
- R14, R15, R16: garantizadas en sync_pull.py.
- R17: freshness check (compara remote.updated_at vs _meta.remote_updated_at).
- R18: count check para dashboards (dashcards/tabs/parameters no menores en local).
- R19: --force-overwrite para saltar R17 + R18 explicitamente.
- R20: cubierto por R18 (cuenta tabs y parameters tambien).
"""
from __future__ import annotations
import datetime as dt
import json
import shutil
import sys
from pathlib import Path
from typing import Any
import yaml
from metabase.cards import (
metabase_create_card_raw,
metabase_get_card,
metabase_update_card,
)
from metabase.dashboards import (
metabase_create_dashboard_raw,
metabase_get_dashboard,
metabase_update_dashboard,
)
from payload import item_path, load_item_yaml
from sync_pull import pull_one
from sync_validate import print_result, validate_one
# Limite del payload para R12
_PAYLOAD_SIZE_WARN_BYTES = 100_000
# ---------------------------------------------------------------- Helpers
def _utc_now_iso() -> str:
return dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _ts_for_path() -> str:
return dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%d_%H%M%S")
def _yaml_dump(path: Path, data: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w") as f:
yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True, default_flow_style=False, width=120)
def _log_push(project, entry: dict) -> None:
"""R13: append-only jsonl log de cada push (dry-run o apply)."""
log_path = project.state_dir / "push.log"
project.state_dir.mkdir(exist_ok=True)
with log_path.open("a") as f:
f.write(json.dumps(entry) + "\n")
_active_log_entry: dict | None = None
_active_project = None
def _abort(msg: str) -> None:
"""Aborta con exit 2. Si hay un log_entry activo, lo persiste como 'aborted'."""
print(f"\nABORT — {msg}", file=sys.stderr)
if _active_log_entry is not None and _active_project is not None:
_active_log_entry["status"] = "aborted"
_active_log_entry["abort_reason"] = msg.split("\n", 1)[0]
_log_push(_active_project, _active_log_entry)
sys.exit(2)
# ---------------------------------------------------------------- R6: backup
def _backup_or_abort(project, kind: str, slug: str, current_remote: dict) -> Path:
"""R6: serializa el estado remoto actual a state/backups/{ts}/{kind}/{slug}.yaml.
Si la escritura falla, aborta antes de tocar Metabase."""
ts = _ts_for_path()
backup_path = project.state_dir / "backups" / ts / (kind + "s") / f"{slug}.yaml"
try:
backup_path.parent.mkdir(parents=True, exist_ok=True)
with backup_path.open("w") as f:
yaml.safe_dump(
{"_backup_of": {"kind": kind, "slug": slug, "ts": ts},
"remote_state": current_remote},
f, sort_keys=False, allow_unicode=True, default_flow_style=False, width=120,
)
except Exception as e:
_abort(f"R6 backup fallo, no se aplicara nada. Error: {e}")
print(f" backup: {backup_path.relative_to(project.dir.parent.parent)}")
return backup_path
# ---------------------------------------------------------------- R17 + R18 + R20
def _freshness_check_or_abort(
kind: str, slug: str, local_doc: dict, remote: dict, force: bool,
) -> None:
"""R17: si remote.updated_at != _meta.remote_updated_at, abortar (salvo --force)."""
local_remote_ts = local_doc.get("_meta", {}).get("remote_updated_at")
current_remote_ts = remote.get("updated_at")
if local_remote_ts is None:
# Item nuevo o pull antiguo sin metadata — proceder con cuidado
return
if current_remote_ts != local_remote_ts:
if force:
print(
f" ! force-overwrite ACTIVO: ignorando R17 — "
f"local snapshot={local_remote_ts}, metabase={current_remote_ts}"
)
return
_abort(
f"R17 freshness check fallido para {kind} {slug}.\n"
f" Tu snapshot: remote_updated_at = {local_remote_ts}\n"
f" Metabase ahora: updated_at = {current_remote_ts}\n"
f" → alguien (o tu mismo) cambio este {kind} en Metabase entre tu pull y este push.\n"
f" → para no sobrescribir esos cambios: python main.py pull {kind} {slug}\n"
f" → para sobrescribir igualmente: --force-overwrite (NO recomendado)"
)
def _count_check_or_abort(
slug: str, local_payload: dict, remote: dict, force: bool,
) -> None:
"""R18+R20: para dashboards, si remoto tiene mas dashcards/tabs/parameters
que el YAML local, abortar (salvo --force)."""
keys = ("dashcards", "tabs", "parameters")
losses = []
for k in keys:
local_count = len(local_payload.get(k, []) or [])
remote_count = len(remote.get(k, []) or [])
if remote_count > local_count:
losses.append(f"{k}: local={local_count}, metabase={remote_count} (perderias {remote_count - local_count})")
if losses:
if force:
print(f" ! force-overwrite ACTIVO: ignorando R18 — perdidas: {losses}")
return
_abort(
f"R18 count check fallido para dashboard {slug}:\n "
+ "\n ".join(losses)
+ f"\n → si genuinamente quieres eliminar elementos: --force-overwrite (NO recomendado)\n"
f" → si no, haz pull primero: python main.py pull dashboard {slug}"
)
# ---------------------------------------------------------------- Push paths
def _push_card_create(client, payload: dict) -> dict:
"""R2: 1 request, POST /api/card. Devuelve la card creada con su id."""
return metabase_create_card_raw(client, payload)
def _push_card_update(client, card_id: int, payload: dict) -> dict:
"""R2: 1 request, PUT /api/card/:id. R4: solo toca esta card."""
return metabase_update_card(client, card_id, **payload)
def _push_dashboard_create(client, payload: dict) -> dict:
"""POST + PUT (si hay dashcards) — documentado en metabase_create_dashboard_raw."""
return metabase_create_dashboard_raw(client, payload)
def _push_dashboard_update(client, dash_id: int, payload: dict) -> dict:
"""R2: 1 request, PUT /api/dashboard/:id. R3: solo toca este dashboard."""
return metabase_update_dashboard(client, dash_id, **payload)
# ---------------------------------------------------------------- Orchestrator
def push_one(
project, client, kind: str, slug: str,
*, apply: bool = False, force_overwrite: bool = False, allow_warnings: bool = False,
) -> dict:
"""Punto de entrada. Devuelve un dict con el resultado."""
log_entry: dict = {
"ts": _utc_now_iso(), "kind": kind, "slug": slug,
"apply": apply, "force_overwrite": force_overwrite,
}
# Hacer el entry visible para _abort() para que pueda loguear si aborta
global _active_log_entry, _active_project
_active_log_entry = log_entry
_active_project = project
# Fase 1: validate (R7+R9+R10+R11+estructura+SQL opcional)
val_client = client if (apply and kind == "card") else None
val_result = validate_one(
project, kind, slug,
check_sql=(apply and kind == "card"),
client=val_client,
)
print_result(kind, slug, val_result)
if val_result.errors:
log_entry["status"] = "validation_errors"
log_entry["issues"] = val_result.errors
_log_push(project, log_entry)
sys.exit(2)
if val_result.warnings and not allow_warnings:
if apply:
print(
f"\n ! hay {len(val_result.warnings)} warnings. "
f"Para aplicar igualmente: --allow-warnings"
)
log_entry["status"] = "warnings_blocking_apply"
log_entry["warnings"] = val_result.warnings
_log_push(project, log_entry)
sys.exit(1)
payload = val_result.payload
assert payload is not None
# R12: tamano del payload
payload_size = len(json.dumps(payload, default=str))
log_entry["payload_bytes"] = payload_size
if payload_size > _PAYLOAD_SIZE_WARN_BYTES:
print(f"\n ! payload size = {payload_size} bytes (>{_PAYLOAD_SIZE_WARN_BYTES})")
if apply:
resp = input(" ¿Continuar con apply? (escribir 'si'): ")
if resp.strip().lower() != "si":
_abort("usuario cancelo por tamano")
# Cargar el doc para tener _meta
doc = load_item_yaml(item_path(project.dir, kind, slug))
meta = doc.get("_meta", {})
item_id = meta.get("id")
is_create = item_id is None
# ---- Dry-run path
if not apply:
method, url = _resolve_method_url(kind, item_id)
print(f"\n--- DRY-RUN ({method} {url}) ---")
print(json.dumps(payload, indent=2, default=str))
print(f"\n payload: {payload_size} bytes")
print(f" para aplicar: añade --apply")
log_entry["status"] = "dry_run"
log_entry["method"] = method
log_entry["url"] = url
_log_push(project, log_entry)
return {"dry_run": True, "payload": payload}
# ---- Apply path
print(f"\n--- APPLY ---")
if is_create:
# R6 no aplica: nada que respaldar
print(" modo: CREATE (no hay backup, item nuevo)")
if kind == "card":
response = _push_card_create(client, payload)
elif kind == "dashboard":
response = _push_dashboard_create(client, payload)
else:
_abort(f"create por push de '{kind}' no soportado todavia (solo card/dashboard)")
new_id = response["id"]
print(f" creado con id={new_id}")
# Actualizar index + _meta del YAML local
idx = project.load_index()
idx.setdefault(kind + "s", {})[slug] = new_id
project.save_index(idx)
# Re-pull para refrescar _meta con synced_at + remote_updated_at + counts
print(" re-pull para refrescar _meta...")
pull_one(client, project, kind, new_id)
log_entry["status"] = "created"
log_entry["new_id"] = new_id
else:
# UPDATE path: R6 backup obligatorio + R17/R18 checks
print(f" modo: UPDATE (id={item_id})")
# Fetch estado remoto actual
if kind == "card":
remote = metabase_get_card(client, item_id)
elif kind == "dashboard":
remote = metabase_get_dashboard(client, item_id)
else:
_abort(f"update por push de '{kind}' no soportado todavia")
# R6: backup ANTES de hacer nada destructivo
backup_path = _backup_or_abort(project, kind, slug, remote)
log_entry["backup"] = str(backup_path.relative_to(project.dir.parent.parent))
# R17: freshness
_freshness_check_or_abort(kind, slug, doc, remote, force_overwrite)
# R18: count check (solo dashboards)
if kind == "dashboard":
_count_check_or_abort(slug, payload, remote, force_overwrite)
# Apply
if kind == "card":
response = _push_card_update(client, item_id, payload)
elif kind == "dashboard":
response = _push_dashboard_update(client, item_id, payload)
print(f" aplicado.")
# Re-pull para refrescar _meta
print(" re-pull para refrescar _meta...")
pull_one(client, project, kind, item_id)
log_entry["status"] = "updated"
log_entry["id"] = item_id
_log_push(project, log_entry)
print("OK")
return {"applied": True, "response": response}
def _resolve_method_url(kind: str, item_id: int | None) -> tuple[str, str]:
"""Devuelve (method, url) que usariamos en apply, para logs."""
if item_id is None:
return "POST", f"/api/{kind}"
return "PUT", f"/api/{kind}/{item_id}"
# ---------------------------------------------------------------- push_all
def _list_slugs(project, kind: str) -> list[str]:
"""Lista slugs (filenames sin .yaml) en projects/{name}/{kind}s/."""
sub = project.dir / (kind + "s")
if not sub.exists():
return []
return sorted(p.stem for p in sub.glob("*.yaml"))
def push_all(
project, client,
*, apply: bool = False, force_overwrite: bool = False, allow_warnings: bool = False,
kinds: tuple[str, ...] = ("card", "dashboard"),
) -> dict:
"""Pushea todos los YAMLs de cards y dashboards de un proyecto.
Solo CREATE o UPDATE (reusa push_one) — nunca DELETE.
Cards primero, dashboards despues, para que los slugs esten en el index
cuando se resuelven las dashcards.
Por defecto dry-run. Pasa apply=True para realmente enviar.
Si un item falla (SystemExit desde push_one), se captura y se continua
con el siguiente. Devuelve un resumen con el resultado por item.
"""
print(f"\n=== push all ({'APPLY' if apply else 'DRY-RUN'}) project={project.name} ===")
summary = {"ok": [], "failed": [], "skipped": []}
for kind in kinds:
slugs = _list_slugs(project, kind)
if not slugs:
print(f"\n[{kind}] (sin YAMLs en {kind}s/)")
continue
print(f"\n[{kind}] {len(slugs)} item(s): {', '.join(slugs)}")
for slug in slugs:
print(f"\n--- {kind} {slug} ---")
try:
push_one(
project, client, kind, slug,
apply=apply,
force_overwrite=force_overwrite,
allow_warnings=allow_warnings,
)
summary["ok"].append(f"{kind}:{slug}")
except SystemExit as e:
code = e.code if isinstance(e.code, int) else 1
print(f" ! {kind} {slug} fallo (exit_code={code}) — continuo")
summary["failed"].append(f"{kind}:{slug} (exit={code})")
except Exception as e:
print(f" ! {kind} {slug} excepcion: {type(e).__name__}: {e}")
summary["failed"].append(f"{kind}:{slug} ({type(e).__name__})")
print(f"\n=== resumen push all ===")
print(f" OK: {len(summary['ok'])} {summary['ok']}")
print(f" FAILED: {len(summary['failed'])} {summary['failed']}")
if not apply:
print(f" (dry-run — para aplicar de verdad: --apply)")
return summary