feat(auto_metabase): push-all + describe/sql + auto-inject de dashcards
- push_all(): pushea todos los YAMLs de un proyecto (cards primero,
dashboards despues), solo CREATE/UPDATE, resiliente a fallos por item
- explore.py: comandos describe (schema de DB) y sql (query ad-hoc con
limite, cap 5MB, bloqueo de escrituras destructivas)
- payload.py: auto-inyecta id:-N, visualization_settings:{} y
parameter_mappings:[] en dashcards nuevas para evitar 500 en push
- test_local: 11 cards + 3 dashboards sobre Sample Database de Metabase
- registry.db regenerado con auto_metabase_py_analytics indexada
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,372 @@
|
||||
"""Pull per-item: trae UN item de Metabase a disco. Nunca bulk.
|
||||
|
||||
R14: pull de dashboard SIEMPRE completo (todas las dashcards, tabs, parameters).
|
||||
R15: para cada card_id referenciado en dashcards no presente en index, registra
|
||||
slug→id en index sin escribir el YAML (option C: tracked sin file).
|
||||
R16: cada YAML lleva en _meta los campos:
|
||||
- synced_at: timestamp del momento del pull (ISO UTC)
|
||||
- remote_updated_at: updated_at que Metabase reportaba en ese momento
|
||||
- dashcards_count, tabs_count, parameters_count: snapshots para R18/R20
|
||||
|
||||
Funciones publicas:
|
||||
pull_one(client, project, kind, ref) -> dict # ref: int id o str slug
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from metabase.cards import metabase_get_card, metabase_list_cards
|
||||
from metabase.dashboards import metabase_get_dashboard, metabase_list_dashboards
|
||||
from metabase.databases import metabase_get_database, metabase_list_databases
|
||||
|
||||
|
||||
# Campos volatiles a descartar del payload (mismos que ya teniamos)
|
||||
_VOLATILE_KEYS = frozenset({
|
||||
"created_at", "updated_at", "last_used_at", "last_viewed_at",
|
||||
"last_query_start", "last_used_param_values", "view_count",
|
||||
"dashboard_count", "parameter_usage_count", "average_query_time",
|
||||
"creator_id", "creator", "made_public_by_id", "last-edit-info",
|
||||
"public_uuid", "entity_id", "card_schema", "metabase_version",
|
||||
"result_metadata", "legacy_query", "source_card_id",
|
||||
"can_write", "can_restore", "can_delete", "can_run_adhoc_query",
|
||||
"can_manage_db", "can_set_cache_policy", "can-manage", "can_upload",
|
||||
"archived_directly", "moderation_reviews", "embedding_type",
|
||||
"dependency_analysis_version", "initially_published_at",
|
||||
"param_fields", "is_remote_synced", "show_in_getting_started",
|
||||
"collection_position", "position", "cache_invalidated_at",
|
||||
"is_sample", "is_audit", "is_attached_dwh", "is_on_demand",
|
||||
"is_full_sync", "initial_sync_status", "dbms_version",
|
||||
"router_database_id", "router_user_attribute",
|
||||
"uploads_enabled", "uploads_schema_name", "uploads_table_prefix",
|
||||
"refingerprint", "schedules", "metadata_sync_schedule",
|
||||
"cache_field_values_schedule", "write_data_details", "provider_name",
|
||||
"workspace_permissions_status", "features", "id",
|
||||
"dashboard", "dashboard_id", "table_id",
|
||||
})
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
return dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
def _slugify(name: str) -> str:
|
||||
s = re.sub(r"[^a-z0-9]+", "_", (name or "").lower()).strip("_")
|
||||
return s or "untitled"
|
||||
|
||||
|
||||
def _strip_volatile(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
out = {}
|
||||
for k, v in value.items():
|
||||
if k in _VOLATILE_KEYS:
|
||||
continue
|
||||
cleaned = _strip_volatile(v)
|
||||
if cleaned is None:
|
||||
continue
|
||||
out[k] = cleaned
|
||||
return out
|
||||
if isinstance(value, list):
|
||||
return [_strip_volatile(x) for x in value]
|
||||
return value
|
||||
|
||||
|
||||
def _yaml_dump(path: Path, data: dict) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w") as f:
|
||||
yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True, default_flow_style=False, width=120)
|
||||
|
||||
|
||||
def _id_to_slug(id_: int | None, mapping: dict[str, int]) -> str | None:
|
||||
if id_ is None:
|
||||
return None
|
||||
for slug, mid in mapping.items():
|
||||
if mid == id_:
|
||||
return slug
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_ref(ref: str | int, kind_plural: str, index: dict) -> int:
|
||||
"""Devuelve el id Metabase a partir de un id int o slug str."""
|
||||
if isinstance(ref, int):
|
||||
return ref
|
||||
if isinstance(ref, str) and ref.isdigit():
|
||||
return int(ref)
|
||||
mapping = index.get(kind_plural, {})
|
||||
if ref not in mapping:
|
||||
raise SystemExit(
|
||||
f"Ref '{ref}' no encontrado en index.{kind_plural}. "
|
||||
f"Conocidos: {sorted(mapping.keys()) or '(vacio)'}. "
|
||||
f"Si es un id Metabase nuevo, pasa el numero directamente."
|
||||
)
|
||||
return mapping[ref]
|
||||
|
||||
|
||||
def _slug_for(name: str, existing_mapping: dict[str, int], item_id: int) -> str:
|
||||
"""Reusa el slug del index si ya esta mapeado al mismo id, sino genera uno nuevo."""
|
||||
for slug, mid in existing_mapping.items():
|
||||
if mid == item_id:
|
||||
return slug
|
||||
base = _slugify(name)
|
||||
if base not in existing_mapping:
|
||||
return base
|
||||
i = 2
|
||||
while f"{base}_{i}" in existing_mapping:
|
||||
i += 1
|
||||
return f"{base}_{i}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- Per-kind
|
||||
|
||||
|
||||
def pull_database(client, project, ref: str | int) -> dict:
|
||||
index = project.load_index()
|
||||
db_id = _resolve_ref(ref, "databases", index)
|
||||
|
||||
full = metabase_get_database(client, db_id)
|
||||
slug = _slug_for(full.get("name", "db"), index.get("databases", {}), db_id)
|
||||
|
||||
payload = _strip_volatile(full)
|
||||
if "details" in payload and "password" in payload["details"]:
|
||||
payload["details"]["password"] = f"${{METABASE_DB_PASSWORD_{slug.upper()}}}"
|
||||
|
||||
body = {
|
||||
"_meta": {
|
||||
"kind": "database",
|
||||
"id": db_id,
|
||||
"slug": slug,
|
||||
"synced_at": _utc_now_iso(),
|
||||
"remote_updated_at": full.get("updated_at"),
|
||||
},
|
||||
"_refs": {},
|
||||
"payload": payload,
|
||||
}
|
||||
path = project.dir / "databases" / f"{slug}.yaml"
|
||||
_yaml_dump(path, body)
|
||||
|
||||
index.setdefault("databases", {})[slug] = db_id
|
||||
project.save_index(index)
|
||||
print(f"[{project.name}] pull database {slug} (id={db_id}) -> {path.relative_to(project.dir.parent.parent)}")
|
||||
return body
|
||||
|
||||
|
||||
def pull_collection(client, project, ref: str | int) -> dict:
|
||||
index = project.load_index()
|
||||
coll_id = _resolve_ref(ref, "collections", index)
|
||||
|
||||
full = client.request("GET", f"/api/collection/{coll_id}")
|
||||
slug = _slug_for(full.get("name", "col"), index.get("collections", {}), coll_id)
|
||||
|
||||
parent_id = full.get("parent_id")
|
||||
parent_slug = _id_to_slug(parent_id, index.get("collections", {}))
|
||||
|
||||
payload = _strip_volatile(full)
|
||||
payload.pop("parent_id", None)
|
||||
|
||||
body = {
|
||||
"_meta": {
|
||||
"kind": "collection",
|
||||
"id": coll_id,
|
||||
"slug": slug,
|
||||
"synced_at": _utc_now_iso(),
|
||||
"remote_updated_at": full.get("updated_at"),
|
||||
},
|
||||
"_refs": {"parent": parent_slug},
|
||||
"payload": payload,
|
||||
}
|
||||
path = project.dir / "collections" / f"{slug}.yaml"
|
||||
_yaml_dump(path, body)
|
||||
|
||||
index.setdefault("collections", {})[slug] = coll_id
|
||||
project.save_index(index)
|
||||
print(f"[{project.name}] pull collection {slug} (id={coll_id}) -> {path.relative_to(project.dir.parent.parent)}")
|
||||
return body
|
||||
|
||||
|
||||
def pull_card(client, project, ref: str | int) -> dict:
|
||||
index = project.load_index()
|
||||
card_id = _resolve_ref(ref, "cards", index)
|
||||
|
||||
full = metabase_get_card(client, card_id)
|
||||
slug = _slug_for(full.get("name", "card"), index.get("cards", {}), card_id)
|
||||
|
||||
refs = {
|
||||
"database": _id_to_slug(full.get("database_id"), index.get("databases", {})),
|
||||
"collection": _id_to_slug(full.get("collection_id"), index.get("collections", {})),
|
||||
}
|
||||
if refs["database"] is None and full.get("database_id") is not None:
|
||||
# Card apunta a una database que no esta en nuestro index todavia
|
||||
print(
|
||||
f" ! warning: database_id={full['database_id']} no esta en index. "
|
||||
f"El push de esta card fallara hasta que pullees esa database."
|
||||
)
|
||||
|
||||
payload = _strip_volatile(full)
|
||||
payload.pop("database_id", None)
|
||||
payload.pop("collection_id", None)
|
||||
payload.pop("collection", None)
|
||||
if isinstance(payload.get("dataset_query"), dict) and "database" in payload["dataset_query"]:
|
||||
payload["dataset_query"]["database"] = refs["database"]
|
||||
|
||||
body = {
|
||||
"_meta": {
|
||||
"kind": "card",
|
||||
"id": card_id,
|
||||
"slug": slug,
|
||||
"synced_at": _utc_now_iso(),
|
||||
"remote_updated_at": full.get("updated_at"),
|
||||
},
|
||||
"_refs": refs,
|
||||
"payload": payload,
|
||||
}
|
||||
path = project.dir / "cards" / f"{slug}.yaml"
|
||||
_yaml_dump(path, body)
|
||||
|
||||
index.setdefault("cards", {})[slug] = card_id
|
||||
project.save_index(index)
|
||||
print(f"[{project.name}] pull card {slug} (id={card_id}) -> {path.relative_to(project.dir.parent.parent)}")
|
||||
return body
|
||||
|
||||
|
||||
def pull_dashboard(client, project, ref: str | int) -> dict:
|
||||
"""R14: pull SIEMPRE completo. R15: registra card refs en index sin escribir files."""
|
||||
index = project.load_index()
|
||||
dash_id = _resolve_ref(ref, "dashboards", index)
|
||||
|
||||
full = metabase_get_dashboard(client, dash_id)
|
||||
slug = _slug_for(full.get("name", "dashboard"), index.get("dashboards", {}), dash_id)
|
||||
|
||||
coll_slug = _id_to_slug(full.get("collection_id"), index.get("collections", {}))
|
||||
refs = {"collection": coll_slug}
|
||||
|
||||
payload = _strip_volatile(full)
|
||||
payload.pop("collection_id", None)
|
||||
payload.pop("collection", None)
|
||||
|
||||
# Procesar dashcards: registrar cada card_id en index si no esta (R15)
|
||||
cards_idx = index.setdefault("cards", {})
|
||||
clean_dashcards = []
|
||||
tracked_count = 0
|
||||
for dc in payload.get("dashcards", []) or []:
|
||||
dc = dict(dc)
|
||||
cid = dc.pop("card_id", None)
|
||||
dc.pop("card", None)
|
||||
dc.pop("dashboard_id", None)
|
||||
|
||||
card_slug: str | None = None
|
||||
if cid is not None:
|
||||
card_slug = _id_to_slug(cid, cards_idx)
|
||||
if card_slug is None:
|
||||
# Card no esta en index: la registramos sin descargarla
|
||||
# Solo necesitamos el name para slugify
|
||||
try:
|
||||
card_meta = metabase_get_card(client, cid)
|
||||
card_slug = _slug_for(card_meta.get("name", f"card_{cid}"), cards_idx, cid)
|
||||
cards_idx[card_slug] = cid
|
||||
tracked_count += 1
|
||||
except Exception as e:
|
||||
print(f" ! warning: card_id={cid} en dashcards no se pudo trackear: {e}")
|
||||
card_slug = f"_unknown_card_{cid}"
|
||||
dc["card"] = card_slug
|
||||
|
||||
# series: lista de cards extra
|
||||
series = dc.get("series") or []
|
||||
if series:
|
||||
new_series = []
|
||||
for s in series:
|
||||
sid = s.get("id") if isinstance(s, dict) else s
|
||||
s_slug = _id_to_slug(sid, cards_idx)
|
||||
if s_slug is None and sid is not None:
|
||||
try:
|
||||
sm = metabase_get_card(client, sid)
|
||||
s_slug = _slug_for(sm.get("name", f"card_{sid}"), cards_idx, sid)
|
||||
cards_idx[s_slug] = sid
|
||||
tracked_count += 1
|
||||
except Exception:
|
||||
s_slug = f"_unknown_card_{sid}"
|
||||
new_series.append(s_slug)
|
||||
dc["series"] = new_series
|
||||
|
||||
clean_dashcards.append({k: v for k, v in dc.items() if v not in (None, [], {})})
|
||||
|
||||
payload["dashcards"] = clean_dashcards
|
||||
|
||||
body = {
|
||||
"_meta": {
|
||||
"kind": "dashboard",
|
||||
"id": dash_id,
|
||||
"slug": slug,
|
||||
"synced_at": _utc_now_iso(),
|
||||
"remote_updated_at": full.get("updated_at"),
|
||||
"dashcards_count": len(clean_dashcards),
|
||||
"tabs_count": len(payload.get("tabs", []) or []),
|
||||
"parameters_count": len(payload.get("parameters", []) or []),
|
||||
},
|
||||
"_refs": refs,
|
||||
"payload": payload,
|
||||
}
|
||||
path = project.dir / "dashboards" / f"{slug}.yaml"
|
||||
_yaml_dump(path, body)
|
||||
|
||||
index.setdefault("dashboards", {})[slug] = dash_id
|
||||
project.save_index(index)
|
||||
msg = f"[{project.name}] pull dashboard {slug} (id={dash_id}) -> {path.relative_to(project.dir.parent.parent)}"
|
||||
if tracked_count:
|
||||
msg += f" [+{tracked_count} cards trackeadas en index sin file]"
|
||||
print(msg)
|
||||
return body
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- Dispatch
|
||||
|
||||
|
||||
_PULLERS = {
|
||||
"card": pull_card,
|
||||
"dashboard": pull_dashboard,
|
||||
"database": pull_database,
|
||||
"collection": pull_collection,
|
||||
}
|
||||
|
||||
|
||||
def pull_one(client, project, kind: str, ref: str | int) -> dict:
|
||||
if kind not in _PULLERS:
|
||||
raise SystemExit(f"kind '{kind}' invalido. Validos: {sorted(_PULLERS)}")
|
||||
return _PULLERS[kind](client, project, ref)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- Remote list (descubrir sin descargar)
|
||||
|
||||
|
||||
def remote_list(client, kind: str, *, filter_name: str | None = None) -> list[dict]:
|
||||
"""Lista items en Metabase sin tocar disco. Resumen ligero."""
|
||||
if kind == "card":
|
||||
items = metabase_list_cards(client)
|
||||
elif kind == "dashboard":
|
||||
items = metabase_list_dashboards(client)
|
||||
elif kind == "database":
|
||||
raw = metabase_list_databases(client)
|
||||
items = raw["data"] if isinstance(raw, dict) and "data" in raw else raw
|
||||
elif kind == "collection":
|
||||
items = client.request("GET", "/api/collection") or []
|
||||
else:
|
||||
raise SystemExit(f"kind '{kind}' invalido")
|
||||
|
||||
if filter_name:
|
||||
f = filter_name.lower()
|
||||
items = [i for i in items if f in (i.get("name") or "").lower()]
|
||||
|
||||
out = []
|
||||
for i in items:
|
||||
out.append({
|
||||
"id": i.get("id"),
|
||||
"name": i.get("name"),
|
||||
"collection_id": i.get("collection_id"),
|
||||
"archived": i.get("archived", False),
|
||||
"updated_at": i.get("updated_at"),
|
||||
})
|
||||
return out
|
||||
Reference in New Issue
Block a user