feat(auto_metabase): push-all + describe/sql + auto-inject de dashcards

- push_all(): pushea todos los YAMLs de un proyecto (cards primero,
  dashboards despues), solo CREATE/UPDATE, resiliente a fallos por item
- explore.py: comandos describe (schema de DB) y sql (query ad-hoc con
  limite, cap 5MB, bloqueo de escrituras destructivas)
- payload.py: auto-inyecta id:-N, visualization_settings:{} y
  parameter_mappings:[] en dashcards nuevas para evitar 500 en push
- test_local: 11 cards + 3 dashboards sobre Sample Database de Metabase
- registry.db regenerado con auto_metabase_py_analytics indexada

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-13 13:14:05 +02:00
parent 386a5471e0
commit 310b409ae0
32 changed files with 3116 additions and 0 deletions
+372
View File
@@ -0,0 +1,372 @@
"""Pull per-item: trae UN item de Metabase a disco. Nunca bulk.
R14: pull de dashboard SIEMPRE completo (todas las dashcards, tabs, parameters).
R15: para cada card_id referenciado en dashcards no presente en index, registra
slug→id en index sin escribir el YAML (option C: tracked sin file).
R16: cada YAML lleva en _meta los campos:
- synced_at: timestamp del momento del pull (ISO UTC)
- remote_updated_at: updated_at que Metabase reportaba en ese momento
- dashcards_count, tabs_count, parameters_count: snapshots para R18/R20
Funciones publicas:
pull_one(client, project, kind, ref) -> dict # ref: int id o str slug
"""
from __future__ import annotations
import datetime as dt
import re
from pathlib import Path
from typing import Any
import yaml
from metabase.cards import metabase_get_card, metabase_list_cards
from metabase.dashboards import metabase_get_dashboard, metabase_list_dashboards
from metabase.databases import metabase_get_database, metabase_list_databases
# Campos volatiles a descartar del payload (mismos que ya teniamos)
_VOLATILE_KEYS = frozenset({
"created_at", "updated_at", "last_used_at", "last_viewed_at",
"last_query_start", "last_used_param_values", "view_count",
"dashboard_count", "parameter_usage_count", "average_query_time",
"creator_id", "creator", "made_public_by_id", "last-edit-info",
"public_uuid", "entity_id", "card_schema", "metabase_version",
"result_metadata", "legacy_query", "source_card_id",
"can_write", "can_restore", "can_delete", "can_run_adhoc_query",
"can_manage_db", "can_set_cache_policy", "can-manage", "can_upload",
"archived_directly", "moderation_reviews", "embedding_type",
"dependency_analysis_version", "initially_published_at",
"param_fields", "is_remote_synced", "show_in_getting_started",
"collection_position", "position", "cache_invalidated_at",
"is_sample", "is_audit", "is_attached_dwh", "is_on_demand",
"is_full_sync", "initial_sync_status", "dbms_version",
"router_database_id", "router_user_attribute",
"uploads_enabled", "uploads_schema_name", "uploads_table_prefix",
"refingerprint", "schedules", "metadata_sync_schedule",
"cache_field_values_schedule", "write_data_details", "provider_name",
"workspace_permissions_status", "features", "id",
"dashboard", "dashboard_id", "table_id",
})
def _utc_now_iso() -> str:
return dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _slugify(name: str) -> str:
s = re.sub(r"[^a-z0-9]+", "_", (name or "").lower()).strip("_")
return s or "untitled"
def _strip_volatile(value: Any) -> Any:
if isinstance(value, dict):
out = {}
for k, v in value.items():
if k in _VOLATILE_KEYS:
continue
cleaned = _strip_volatile(v)
if cleaned is None:
continue
out[k] = cleaned
return out
if isinstance(value, list):
return [_strip_volatile(x) for x in value]
return value
def _yaml_dump(path: Path, data: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w") as f:
yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True, default_flow_style=False, width=120)
def _id_to_slug(id_: int | None, mapping: dict[str, int]) -> str | None:
if id_ is None:
return None
for slug, mid in mapping.items():
if mid == id_:
return slug
return None
def _resolve_ref(ref: str | int, kind_plural: str, index: dict) -> int:
"""Devuelve el id Metabase a partir de un id int o slug str."""
if isinstance(ref, int):
return ref
if isinstance(ref, str) and ref.isdigit():
return int(ref)
mapping = index.get(kind_plural, {})
if ref not in mapping:
raise SystemExit(
f"Ref '{ref}' no encontrado en index.{kind_plural}. "
f"Conocidos: {sorted(mapping.keys()) or '(vacio)'}. "
f"Si es un id Metabase nuevo, pasa el numero directamente."
)
return mapping[ref]
def _slug_for(name: str, existing_mapping: dict[str, int], item_id: int) -> str:
"""Reusa el slug del index si ya esta mapeado al mismo id, sino genera uno nuevo."""
for slug, mid in existing_mapping.items():
if mid == item_id:
return slug
base = _slugify(name)
if base not in existing_mapping:
return base
i = 2
while f"{base}_{i}" in existing_mapping:
i += 1
return f"{base}_{i}"
# ---------------------------------------------------------------- Per-kind
def pull_database(client, project, ref: str | int) -> dict:
index = project.load_index()
db_id = _resolve_ref(ref, "databases", index)
full = metabase_get_database(client, db_id)
slug = _slug_for(full.get("name", "db"), index.get("databases", {}), db_id)
payload = _strip_volatile(full)
if "details" in payload and "password" in payload["details"]:
payload["details"]["password"] = f"${{METABASE_DB_PASSWORD_{slug.upper()}}}"
body = {
"_meta": {
"kind": "database",
"id": db_id,
"slug": slug,
"synced_at": _utc_now_iso(),
"remote_updated_at": full.get("updated_at"),
},
"_refs": {},
"payload": payload,
}
path = project.dir / "databases" / f"{slug}.yaml"
_yaml_dump(path, body)
index.setdefault("databases", {})[slug] = db_id
project.save_index(index)
print(f"[{project.name}] pull database {slug} (id={db_id}) -> {path.relative_to(project.dir.parent.parent)}")
return body
def pull_collection(client, project, ref: str | int) -> dict:
index = project.load_index()
coll_id = _resolve_ref(ref, "collections", index)
full = client.request("GET", f"/api/collection/{coll_id}")
slug = _slug_for(full.get("name", "col"), index.get("collections", {}), coll_id)
parent_id = full.get("parent_id")
parent_slug = _id_to_slug(parent_id, index.get("collections", {}))
payload = _strip_volatile(full)
payload.pop("parent_id", None)
body = {
"_meta": {
"kind": "collection",
"id": coll_id,
"slug": slug,
"synced_at": _utc_now_iso(),
"remote_updated_at": full.get("updated_at"),
},
"_refs": {"parent": parent_slug},
"payload": payload,
}
path = project.dir / "collections" / f"{slug}.yaml"
_yaml_dump(path, body)
index.setdefault("collections", {})[slug] = coll_id
project.save_index(index)
print(f"[{project.name}] pull collection {slug} (id={coll_id}) -> {path.relative_to(project.dir.parent.parent)}")
return body
def pull_card(client, project, ref: str | int) -> dict:
index = project.load_index()
card_id = _resolve_ref(ref, "cards", index)
full = metabase_get_card(client, card_id)
slug = _slug_for(full.get("name", "card"), index.get("cards", {}), card_id)
refs = {
"database": _id_to_slug(full.get("database_id"), index.get("databases", {})),
"collection": _id_to_slug(full.get("collection_id"), index.get("collections", {})),
}
if refs["database"] is None and full.get("database_id") is not None:
# Card apunta a una database que no esta en nuestro index todavia
print(
f" ! warning: database_id={full['database_id']} no esta en index. "
f"El push de esta card fallara hasta que pullees esa database."
)
payload = _strip_volatile(full)
payload.pop("database_id", None)
payload.pop("collection_id", None)
payload.pop("collection", None)
if isinstance(payload.get("dataset_query"), dict) and "database" in payload["dataset_query"]:
payload["dataset_query"]["database"] = refs["database"]
body = {
"_meta": {
"kind": "card",
"id": card_id,
"slug": slug,
"synced_at": _utc_now_iso(),
"remote_updated_at": full.get("updated_at"),
},
"_refs": refs,
"payload": payload,
}
path = project.dir / "cards" / f"{slug}.yaml"
_yaml_dump(path, body)
index.setdefault("cards", {})[slug] = card_id
project.save_index(index)
print(f"[{project.name}] pull card {slug} (id={card_id}) -> {path.relative_to(project.dir.parent.parent)}")
return body
def pull_dashboard(client, project, ref: str | int) -> dict:
"""R14: pull SIEMPRE completo. R15: registra card refs en index sin escribir files."""
index = project.load_index()
dash_id = _resolve_ref(ref, "dashboards", index)
full = metabase_get_dashboard(client, dash_id)
slug = _slug_for(full.get("name", "dashboard"), index.get("dashboards", {}), dash_id)
coll_slug = _id_to_slug(full.get("collection_id"), index.get("collections", {}))
refs = {"collection": coll_slug}
payload = _strip_volatile(full)
payload.pop("collection_id", None)
payload.pop("collection", None)
# Procesar dashcards: registrar cada card_id en index si no esta (R15)
cards_idx = index.setdefault("cards", {})
clean_dashcards = []
tracked_count = 0
for dc in payload.get("dashcards", []) or []:
dc = dict(dc)
cid = dc.pop("card_id", None)
dc.pop("card", None)
dc.pop("dashboard_id", None)
card_slug: str | None = None
if cid is not None:
card_slug = _id_to_slug(cid, cards_idx)
if card_slug is None:
# Card no esta en index: la registramos sin descargarla
# Solo necesitamos el name para slugify
try:
card_meta = metabase_get_card(client, cid)
card_slug = _slug_for(card_meta.get("name", f"card_{cid}"), cards_idx, cid)
cards_idx[card_slug] = cid
tracked_count += 1
except Exception as e:
print(f" ! warning: card_id={cid} en dashcards no se pudo trackear: {e}")
card_slug = f"_unknown_card_{cid}"
dc["card"] = card_slug
# series: lista de cards extra
series = dc.get("series") or []
if series:
new_series = []
for s in series:
sid = s.get("id") if isinstance(s, dict) else s
s_slug = _id_to_slug(sid, cards_idx)
if s_slug is None and sid is not None:
try:
sm = metabase_get_card(client, sid)
s_slug = _slug_for(sm.get("name", f"card_{sid}"), cards_idx, sid)
cards_idx[s_slug] = sid
tracked_count += 1
except Exception:
s_slug = f"_unknown_card_{sid}"
new_series.append(s_slug)
dc["series"] = new_series
clean_dashcards.append({k: v for k, v in dc.items() if v not in (None, [], {})})
payload["dashcards"] = clean_dashcards
body = {
"_meta": {
"kind": "dashboard",
"id": dash_id,
"slug": slug,
"synced_at": _utc_now_iso(),
"remote_updated_at": full.get("updated_at"),
"dashcards_count": len(clean_dashcards),
"tabs_count": len(payload.get("tabs", []) or []),
"parameters_count": len(payload.get("parameters", []) or []),
},
"_refs": refs,
"payload": payload,
}
path = project.dir / "dashboards" / f"{slug}.yaml"
_yaml_dump(path, body)
index.setdefault("dashboards", {})[slug] = dash_id
project.save_index(index)
msg = f"[{project.name}] pull dashboard {slug} (id={dash_id}) -> {path.relative_to(project.dir.parent.parent)}"
if tracked_count:
msg += f" [+{tracked_count} cards trackeadas en index sin file]"
print(msg)
return body
# ---------------------------------------------------------------- Dispatch
_PULLERS = {
"card": pull_card,
"dashboard": pull_dashboard,
"database": pull_database,
"collection": pull_collection,
}
def pull_one(client, project, kind: str, ref: str | int) -> dict:
if kind not in _PULLERS:
raise SystemExit(f"kind '{kind}' invalido. Validos: {sorted(_PULLERS)}")
return _PULLERS[kind](client, project, ref)
# ---------------------------------------------------------------- Remote list (descubrir sin descargar)
def remote_list(client, kind: str, *, filter_name: str | None = None) -> list[dict]:
"""Lista items en Metabase sin tocar disco. Resumen ligero."""
if kind == "card":
items = metabase_list_cards(client)
elif kind == "dashboard":
items = metabase_list_dashboards(client)
elif kind == "database":
raw = metabase_list_databases(client)
items = raw["data"] if isinstance(raw, dict) and "data" in raw else raw
elif kind == "collection":
items = client.request("GET", "/api/collection") or []
else:
raise SystemExit(f"kind '{kind}' invalido")
if filter_name:
f = filter_name.lower()
items = [i for i in items if f in (i.get("name") or "").lower()]
out = []
for i in items:
out.append({
"id": i.get("id"),
"name": i.get("name"),
"collection_id": i.get("collection_id"),
"archived": i.get("archived", False),
"updated_at": i.get("updated_at"),
})
return out