Files
fn_registry/apps/auto_metabase/main.py
T
egutierrez 310b409ae0 feat(auto_metabase): push-all + describe/sql + auto-inject de dashcards
- push_all(): pushea todos los YAMLs de un proyecto (cards primero,
  dashboards despues), solo CREATE/UPDATE, resiliente a fallos por item
- explore.py: comandos describe (schema de DB) y sql (query ad-hoc con
  limite, cap 5MB, bloqueo de escrituras destructivas)
- payload.py: auto-inyecta id:-N, visualization_settings:{} y
  parameter_mappings:[] en dashcards nuevas para evitar 500 en push
- test_local: 11 cards + 3 dashboards sobre Sample Database de Metabase
- registry.db regenerado con auto_metabase_py_analytics indexada

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-13 13:14:05 +02:00

451 lines
17 KiB
Python

"""auto_metabase — sincronizacion bidireccional Metabase ↔ archivos YAML.
Multi-proyecto: cada entorno Metabase (local, prod, staging...) es un proyecto
aislado bajo projects/{name}/ con su propio config.yaml, .env, state/ y YAMLs.
Uso:
python main.py projects # lista proyectos
python main.py init-project NAME --base-url URL
python main.py [-p PROJECT] login
python main.py [-p PROJECT] status
python main.py [-p PROJECT] pull [--types databases,collections,cards,dashboards]
python main.py [-p PROJECT] push [--dry-run]
python main.py [-p PROJECT] diff
Si no se pasa --project, se usa default_project del config.yaml top-level.
"""
import argparse
import json
import os
import sys
from pathlib import Path
# Hacer accesibles las funciones del registry
APP_DIR = Path(__file__).resolve().parent
REGISTRY_ROOT = APP_DIR.parent.parent
sys.path.insert(0, str(REGISTRY_ROOT / "python" / "functions"))
import yaml # noqa: E402
from metabase.client import MetabaseClient, metabase_auth # noqa: E402
# ---------------------------------------------------------------- Top-level config
TOP_CONFIG_PATH = APP_DIR / "config.yaml"
def load_top_config() -> dict:
if not TOP_CONFIG_PATH.exists():
return {"default_project": "test_local", "projects_dir": "projects"}
with TOP_CONFIG_PATH.open() as f:
return yaml.safe_load(f) or {}
def projects_root() -> Path:
return APP_DIR / load_top_config().get("projects_dir", "projects")
def list_projects() -> list[str]:
root = projects_root()
if not root.exists():
return []
return sorted(p.name for p in root.iterdir() if p.is_dir() and (p / "config.yaml").exists())
# ---------------------------------------------------------------- Project context
class Project:
"""Contexto inmutable de un proyecto."""
def __init__(self, name: str):
self.name = name
self.dir = projects_root() / name
if not self.dir.exists():
raise SystemExit(
f"Proyecto '{name}' no existe. Ejecuta: "
f"python main.py init-project {name} --base-url URL"
)
self.config_path = self.dir / "config.yaml"
self.env_path = self.dir / ".env"
self.state_dir = self.dir / "state"
self.session_path = self.state_dir / "session.json"
self.index_path = self.state_dir / "index.json"
@property
def config(self) -> dict:
with self.config_path.open() as f:
return yaml.safe_load(f) or {}
@property
def base_url(self) -> str:
return self.config["base_url"]
def load_env(self) -> dict:
env = {}
if not self.env_path.exists():
return env
for line in self.env_path.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, _, v = line.partition("=")
env[k.strip()] = v.strip().strip('"').strip("'")
return env
def load_session(self) -> dict | None:
if not self.session_path.exists():
return None
try:
return json.loads(self.session_path.read_text())
except json.JSONDecodeError:
return None
def save_session(self, base_url: str, token: str) -> None:
self.state_dir.mkdir(exist_ok=True)
self.session_path.write_text(
json.dumps({"base_url": base_url, "token": token}, indent=2)
)
def load_index(self) -> dict:
if not self.index_path.exists():
return {"databases": {}, "collections": {}, "cards": {}, "dashboards": {}, "documents": {}}
return json.loads(self.index_path.read_text())
def save_index(self, idx: dict) -> None:
self.state_dir.mkdir(exist_ok=True)
self.index_path.write_text(json.dumps(idx, indent=2, sort_keys=True))
def resolve_project(arg_name: str | None) -> Project:
name = arg_name or load_top_config().get("default_project")
if not name:
raise SystemExit("No hay default_project en config.yaml. Pasa --project NAME.")
return Project(name)
# ---------------------------------------------------------------- Client
def get_client(project: Project, force_login: bool = False) -> MetabaseClient:
base_url = project.base_url
sess = None if force_login else project.load_session()
if sess and sess.get("base_url") == base_url:
client = MetabaseClient(base_url, sess["token"])
try:
client.request("GET", "/api/user/current")
return client
except Exception:
pass # token caducado
env = {**os.environ, **project.load_env()}
auth_cfg = project.config.get("auth", {})
email = env.get(auth_cfg.get("email_env", "METABASE_EMAIL"))
password = env.get(auth_cfg.get("password_env", "METABASE_PASSWORD"))
if not email or not password:
raise SystemExit(
f"Faltan credenciales para proyecto '{project.name}'. "
f"Define {auth_cfg.get('email_env')} y {auth_cfg.get('password_env')} "
f"en {project.env_path.relative_to(APP_DIR)}"
)
client = metabase_auth(base_url, email, password)
project.save_session(base_url, client.token)
return client
# ---------------------------------------------------------------- Commands
def cmd_projects(_args):
top = load_top_config()
default = top.get("default_project")
projs = list_projects()
if not projs:
print("(sin proyectos. Crea uno con: init-project NAME --base-url URL)")
return
print(f"default: {default}\n")
for name in projs:
marker = "*" if name == default else " "
try:
cfg = (projects_root() / name / "config.yaml").read_text()
base = next(
(l.split(":", 1)[1].strip() for l in cfg.splitlines() if l.startswith("base_url")),
"?",
)
except Exception:
base = "?"
print(f" {marker} {name:20s} {base}")
def cmd_init_project(args):
name = args.name
pdir = projects_root() / name
if pdir.exists():
raise SystemExit(f"Proyecto '{name}' ya existe en {pdir}")
for sub in ("databases", "collections", "cards", "dashboards", "documents", "state"):
(pdir / sub).mkdir(parents=True, exist_ok=True)
cfg = {
"name": name,
"description": args.description or f"Proyecto Metabase: {name}",
"base_url": args.base_url,
"auth": {
"email_env": "METABASE_EMAIL",
"password_env": "METABASE_PASSWORD",
},
"sync": {
"ignore_databases": [1],
"ignore_collections": [],
"prefer_archive": True,
},
}
with (pdir / "config.yaml").open("w") as f:
yaml.safe_dump(cfg, f, sort_keys=False, default_flow_style=False)
(pdir / ".env.example").write_text(
"METABASE_EMAIL=admin@example.com\nMETABASE_PASSWORD=changeme\n"
)
(pdir / "state" / "index.json").write_text(
json.dumps({"databases": {}, "collections": {}, "cards": {}, "dashboards": {}, "documents": {}}, indent=2)
)
print(f"Proyecto '{name}' creado en {pdir.relative_to(APP_DIR)}")
print(f"Siguiente paso: cp {pdir.relative_to(APP_DIR)}/.env.example {pdir.relative_to(APP_DIR)}/.env y edita credenciales")
def cmd_login(args):
project = resolve_project(args.project)
client = get_client(project, force_login=True)
me = client.request("GET", "/api/user/current")
print(f"[{project.name}] login OK — {me['email']} (id={me['id']}, super={me.get('is_superuser')})")
def cmd_status(args):
project = resolve_project(args.project)
print(f"project: {project.name}")
print(f" base_url: {project.base_url}")
print(f" session: {'present' if project.load_session() else 'missing'}")
idx = project.load_index()
for kind in ("databases", "collections", "cards", "dashboards", "documents"):
n = len(idx.get(kind, {}))
print(f" indexed {kind:12s} {n}")
for sub in ("databases", "collections", "cards", "dashboards", "documents"):
p = project.dir / sub
n = len(list(p.glob("*.yaml"))) if p.exists() else 0
print(f" on disk {sub:12s} {n} archivos")
def cmd_pull(args):
from sync_pull import pull_one
project = resolve_project(args.project)
client = get_client(project)
pull_one(client, project, args.kind, args.ref)
def cmd_remote(args):
from sync_pull import remote_list
project = resolve_project(args.project)
client = get_client(project)
items = remote_list(client, args.kind, filter_name=args.filter)
if not items:
print("(sin resultados)")
return
print(f"{'ID':>5} {'NAME':40s} {'COL':>4} ARCH UPDATED_AT")
for i in items[:200]:
n = (i["name"] or "")[:40]
print(f"{i['id']:>5} {n:40s} {str(i.get('collection_id') or ''):>4} {'X' if i.get('archived') else ' '} {i.get('updated_at') or ''}")
if len(items) > 200:
print(f"... ({len(items) - 200} mas)")
def cmd_push(args):
from sync_push import push_one
project = resolve_project(args.project)
client = get_client(project)
push_one(
project, client, args.kind, args.slug,
apply=args.apply,
force_overwrite=args.force_overwrite,
allow_warnings=args.allow_warnings,
)
def cmd_push_all(args):
from sync_push import push_all
project = resolve_project(args.project)
client = get_client(project)
summary = push_all(
project, client,
apply=args.apply,
force_overwrite=args.force_overwrite,
allow_warnings=args.allow_warnings,
kinds=tuple(args.kinds),
)
# Exit 1 si hubo fallos, 0 si todo OK
sys.exit(1 if summary["failed"] else 0)
def cmd_restore(args):
from sync_restore import list_backups, restore_one
project = resolve_project(args.project)
if args.list:
backups = list_backups(project, args.kind, args.slug)
if not backups:
print(f"(sin backups para {args.kind} {args.slug})")
return
print(f"Backups disponibles para {args.kind} {args.slug} (mas reciente primero):")
for b in backups:
print(f" {b.relative_to(project.dir.parent.parent)}")
return
restore_one(project, args.kind, args.slug, from_ts=args.from_ts)
def cmd_validate(args):
from sync_validate import print_result, validate_one
project = resolve_project(args.project)
client = get_client(project) if args.check_sql else None
result = validate_one(project, args.kind, args.slug, check_sql=args.check_sql, client=client)
print_result(args.kind, args.slug, result)
if args.show_payload and result.payload is not None:
import json
print("\n--- payload ---")
print(json.dumps(result.payload, indent=2, default=str))
sys.exit(result.exit_code())
def cmd_diff(args):
print(f"diff: usa `validate {args.kind} {args.slug} --show-payload` por ahora")
def cmd_describe(args):
from explore import cmd_describe as _impl
project = resolve_project(args.project)
client = get_client(project)
_impl(args, project, client)
def cmd_sql(args):
from explore import cmd_sql as _impl
project = resolve_project(args.project)
client = get_client(project)
_impl(args, project, client)
# ---------------------------------------------------------------- Entrypoint
def main():
p = argparse.ArgumentParser(description="auto_metabase — Metabase as code, multi-proyecto")
p.add_argument("-p", "--project", help="Nombre del proyecto (default: del config top-level)")
sub = p.add_subparsers(dest="cmd", required=True)
sub.add_parser("projects", help="Lista proyectos").set_defaults(func=cmd_projects)
ip = sub.add_parser("init-project", help="Crea un proyecto nuevo")
ip.add_argument("name")
ip.add_argument("--base-url", required=True, help="URL del Metabase (ej: http://localhost:3000)")
ip.add_argument("--description")
ip.set_defaults(func=cmd_init_project)
sub.add_parser("login", help="Autentica y guarda token").set_defaults(func=cmd_login)
sub.add_parser("status", help="Estado del proyecto").set_defaults(func=cmd_status)
pp = sub.add_parser("pull", help="Trae UN item de Metabase a disco (per-item, nunca bulk)")
pp.add_argument("kind", choices=["card", "dashboard", "database", "collection"])
pp.add_argument("ref", help="Slug del index, o id Metabase (numerico)")
pp.set_defaults(func=cmd_pull)
rl = sub.add_parser("remote", help="Lista items en Metabase sin descargar nada")
rl.add_argument("kind", choices=["card", "dashboard", "database", "collection"])
rl.add_argument("--filter", help="Substring case-insensitive sobre name")
rl.set_defaults(func=cmd_remote)
va = sub.add_parser("validate", help="Valida un YAML local (read-only, no toca Metabase)")
va.add_argument("kind", choices=["card", "dashboard", "database", "collection"])
va.add_argument("slug")
va.add_argument("--check-sql", action="store_true",
help="Ejecuta la SQL contra Metabase para validar (solo cards native)")
va.add_argument("--show-payload", action="store_true",
help="Imprime el payload final que se enviaria")
va.set_defaults(func=cmd_validate)
pu = sub.add_parser("push", help="Aplica UN item a Metabase. Dry-run por defecto.")
pu.add_argument("kind", choices=["card", "dashboard"])
pu.add_argument("slug")
pu.add_argument("--apply", action="store_true",
help="Realmente envia a Metabase (sin esto solo dry-run)")
pu.add_argument("--force-overwrite", action="store_true",
help="Salta R17 (freshness) y R18 (count) — perdida de trabajo posible")
pu.add_argument("--allow-warnings", action="store_true",
help="Aplica aunque la validacion estructural genere warnings")
pu.set_defaults(func=cmd_push)
pa = sub.add_parser(
"push-all",
help="Pushea TODOS los YAMLs del proyecto (cards primero, dashboards despues). "
"Solo CREATE/UPDATE — nunca DELETE. Dry-run por defecto.",
)
pa.add_argument("--apply", action="store_true",
help="Realmente envia (sin esto, dry-run de cada item)")
pa.add_argument("--force-overwrite", action="store_true",
help="Salta R17 (freshness) y R18 (count) en cada item")
pa.add_argument("--allow-warnings", action="store_true",
help="Aplica aunque la validacion estructural genere warnings")
pa.add_argument("--kinds", nargs="+", default=["card", "dashboard"],
choices=["card", "dashboard"],
help="Que tipos pushear y en que orden (default: card dashboard)")
pa.set_defaults(func=cmd_push_all)
re_ = sub.add_parser("restore", help="Restaura YAML local desde backup (no aplica a Metabase)")
re_.add_argument("kind", choices=["card", "dashboard"])
re_.add_argument("slug")
re_.add_argument("--from", dest="from_ts", help="Timestamp del backup (default: mas reciente)")
re_.add_argument("--list", action="store_true", help="Lista backups disponibles")
re_.set_defaults(func=cmd_restore)
di = sub.add_parser("diff", help="Alias temporal de validate --show-payload")
di.add_argument("kind", choices=["card", "dashboard", "database", "collection"])
di.add_argument("slug")
di.set_defaults(func=cmd_diff)
de = sub.add_parser(
"describe",
help="Describe un database: tablas, columnas y tipos. Util para escribir cards sin adivinar.",
)
de.add_argument("db", help="slug de database (del index) o id numerico")
de.add_argument("--filter", help="Substring case-insensitive sobre nombre de tabla")
de.add_argument("--samples", action="store_true",
help="Muestra 3 filas de ejemplo por tabla (1 query SELECT * LIMIT 3 por tabla)")
de.add_argument("--tables-only", action="store_true",
help="Solo nombre de tabla y row count, sin columnas")
de.set_defaults(func=cmd_describe)
sq = sub.add_parser(
"sql",
help="Ejecuta SQL ad-hoc contra un database (read-only). NO crea card, "
"limite obligatorio para no explotar.",
)
sq.add_argument("db", help="slug de database (del index) o id numerico")
sq.add_argument("query", help="SQL a ejecutar (entre comillas)")
sq.add_argument("--limit", type=int, default=100,
help=f"Maximo filas a traer (default: 100, hard ceiling: 10000)")
sq.add_argument("--allow-write", action="store_true",
help="Permite queries que empiecen por INSERT/UPDATE/DELETE/etc (Metabase suele bloquearlas)")
sq.set_defaults(func=cmd_sql)
args = p.parse_args()
args.func(args)
if __name__ == "__main__":
main()