"""Pull per-item: trae UN item de Metabase a disco. Nunca bulk. R14: pull de dashboard SIEMPRE completo (todas las dashcards, tabs, parameters). R15: para cada card_id referenciado en dashcards no presente en index, registra slug→id en index sin escribir el YAML (option C: tracked sin file). R16: cada YAML lleva en _meta los campos: - synced_at: timestamp del momento del pull (ISO UTC) - remote_updated_at: updated_at que Metabase reportaba en ese momento - dashcards_count, tabs_count, parameters_count: snapshots para R18/R20 Funciones publicas: pull_one(client, project, kind, ref) -> dict # ref: int id o str slug """ from __future__ import annotations import datetime as dt import re from pathlib import Path from typing import Any import yaml from metabase.cards import metabase_get_card, metabase_list_cards from metabase.dashboards import metabase_get_dashboard, metabase_list_dashboards from metabase.databases import metabase_get_database, metabase_list_databases # Campos volatiles a descartar del payload (mismos que ya teniamos) _VOLATILE_KEYS = frozenset({ "created_at", "updated_at", "last_used_at", "last_viewed_at", "last_query_start", "last_used_param_values", "view_count", "dashboard_count", "parameter_usage_count", "average_query_time", "creator_id", "creator", "made_public_by_id", "last-edit-info", "public_uuid", "entity_id", "card_schema", "metabase_version", "result_metadata", "legacy_query", "source_card_id", "can_write", "can_restore", "can_delete", "can_run_adhoc_query", "can_manage_db", "can_set_cache_policy", "can-manage", "can_upload", "archived_directly", "moderation_reviews", "embedding_type", "dependency_analysis_version", "initially_published_at", "param_fields", "is_remote_synced", "show_in_getting_started", "collection_position", "position", "cache_invalidated_at", "is_sample", "is_audit", "is_attached_dwh", "is_on_demand", "is_full_sync", "initial_sync_status", "dbms_version", "router_database_id", "router_user_attribute", "uploads_enabled", "uploads_schema_name", "uploads_table_prefix", "refingerprint", "schedules", "metadata_sync_schedule", "cache_field_values_schedule", "write_data_details", "provider_name", "workspace_permissions_status", "features", "id", "dashboard", "dashboard_id", "table_id", }) def _utc_now_iso() -> str: return dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") def _slugify(name: str) -> str: s = re.sub(r"[^a-z0-9]+", "_", (name or "").lower()).strip("_") return s or "untitled" def _strip_volatile(value: Any) -> Any: if isinstance(value, dict): out = {} for k, v in value.items(): if k in _VOLATILE_KEYS: continue cleaned = _strip_volatile(v) if cleaned is None: continue out[k] = cleaned return out if isinstance(value, list): return [_strip_volatile(x) for x in value] return value def _yaml_dump(path: Path, data: dict) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("w") as f: yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True, default_flow_style=False, width=120) def _id_to_slug(id_: int | None, mapping: dict[str, int]) -> str | None: if id_ is None: return None for slug, mid in mapping.items(): if mid == id_: return slug return None def _resolve_ref(ref: str | int, kind_plural: str, index: dict) -> int: """Devuelve el id Metabase a partir de un id int o slug str.""" if isinstance(ref, int): return ref if isinstance(ref, str) and ref.isdigit(): return int(ref) mapping = index.get(kind_plural, {}) if ref not in mapping: raise SystemExit( f"Ref '{ref}' no encontrado en index.{kind_plural}. " f"Conocidos: {sorted(mapping.keys()) or '(vacio)'}. " f"Si es un id Metabase nuevo, pasa el numero directamente." ) return mapping[ref] def _slug_for(name: str, existing_mapping: dict[str, int], item_id: int) -> str: """Reusa el slug del index si ya esta mapeado al mismo id, sino genera uno nuevo.""" for slug, mid in existing_mapping.items(): if mid == item_id: return slug base = _slugify(name) if base not in existing_mapping: return base i = 2 while f"{base}_{i}" in existing_mapping: i += 1 return f"{base}_{i}" # ---------------------------------------------------------------- Per-kind def pull_database(client, project, ref: str | int) -> dict: index = project.load_index() db_id = _resolve_ref(ref, "databases", index) full = metabase_get_database(client, db_id) slug = _slug_for(full.get("name", "db"), index.get("databases", {}), db_id) payload = _strip_volatile(full) if "details" in payload and "password" in payload["details"]: payload["details"]["password"] = f"${{METABASE_DB_PASSWORD_{slug.upper()}}}" body = { "_meta": { "kind": "database", "id": db_id, "slug": slug, "synced_at": _utc_now_iso(), "remote_updated_at": full.get("updated_at"), }, "_refs": {}, "payload": payload, } path = project.dir / "databases" / f"{slug}.yaml" _yaml_dump(path, body) index.setdefault("databases", {})[slug] = db_id project.save_index(index) print(f"[{project.name}] pull database {slug} (id={db_id}) -> {path.relative_to(project.dir.parent.parent)}") return body def pull_collection(client, project, ref: str | int) -> dict: index = project.load_index() coll_id = _resolve_ref(ref, "collections", index) full = client.request("GET", f"/api/collection/{coll_id}") slug = _slug_for(full.get("name", "col"), index.get("collections", {}), coll_id) parent_id = full.get("parent_id") parent_slug = _id_to_slug(parent_id, index.get("collections", {})) payload = _strip_volatile(full) payload.pop("parent_id", None) body = { "_meta": { "kind": "collection", "id": coll_id, "slug": slug, "synced_at": _utc_now_iso(), "remote_updated_at": full.get("updated_at"), }, "_refs": {"parent": parent_slug}, "payload": payload, } path = project.dir / "collections" / f"{slug}.yaml" _yaml_dump(path, body) index.setdefault("collections", {})[slug] = coll_id project.save_index(index) print(f"[{project.name}] pull collection {slug} (id={coll_id}) -> {path.relative_to(project.dir.parent.parent)}") return body def pull_card(client, project, ref: str | int) -> dict: index = project.load_index() card_id = _resolve_ref(ref, "cards", index) full = metabase_get_card(client, card_id) slug = _slug_for(full.get("name", "card"), index.get("cards", {}), card_id) refs = { "database": _id_to_slug(full.get("database_id"), index.get("databases", {})), "collection": _id_to_slug(full.get("collection_id"), index.get("collections", {})), } if refs["database"] is None and full.get("database_id") is not None: # Card apunta a una database que no esta en nuestro index todavia print( f" ! warning: database_id={full['database_id']} no esta en index. " f"El push de esta card fallara hasta que pullees esa database." ) payload = _strip_volatile(full) payload.pop("database_id", None) payload.pop("collection_id", None) payload.pop("collection", None) if isinstance(payload.get("dataset_query"), dict) and "database" in payload["dataset_query"]: payload["dataset_query"]["database"] = refs["database"] body = { "_meta": { "kind": "card", "id": card_id, "slug": slug, "synced_at": _utc_now_iso(), "remote_updated_at": full.get("updated_at"), }, "_refs": refs, "payload": payload, } path = project.dir / "cards" / f"{slug}.yaml" _yaml_dump(path, body) index.setdefault("cards", {})[slug] = card_id project.save_index(index) print(f"[{project.name}] pull card {slug} (id={card_id}) -> {path.relative_to(project.dir.parent.parent)}") return body def pull_dashboard(client, project, ref: str | int) -> dict: """R14: pull SIEMPRE completo. R15: registra card refs en index sin escribir files.""" index = project.load_index() dash_id = _resolve_ref(ref, "dashboards", index) full = metabase_get_dashboard(client, dash_id) slug = _slug_for(full.get("name", "dashboard"), index.get("dashboards", {}), dash_id) coll_slug = _id_to_slug(full.get("collection_id"), index.get("collections", {})) refs = {"collection": coll_slug} payload = _strip_volatile(full) payload.pop("collection_id", None) payload.pop("collection", None) # Procesar dashcards: registrar cada card_id en index si no esta (R15) cards_idx = index.setdefault("cards", {}) clean_dashcards = [] tracked_count = 0 for dc in payload.get("dashcards", []) or []: dc = dict(dc) cid = dc.pop("card_id", None) dc.pop("card", None) dc.pop("dashboard_id", None) card_slug: str | None = None if cid is not None: card_slug = _id_to_slug(cid, cards_idx) if card_slug is None: # Card no esta en index: la registramos sin descargarla # Solo necesitamos el name para slugify try: card_meta = metabase_get_card(client, cid) card_slug = _slug_for(card_meta.get("name", f"card_{cid}"), cards_idx, cid) cards_idx[card_slug] = cid tracked_count += 1 except Exception as e: print(f" ! warning: card_id={cid} en dashcards no se pudo trackear: {e}") card_slug = f"_unknown_card_{cid}" dc["card"] = card_slug # series: lista de cards extra series = dc.get("series") or [] if series: new_series = [] for s in series: sid = s.get("id") if isinstance(s, dict) else s s_slug = _id_to_slug(sid, cards_idx) if s_slug is None and sid is not None: try: sm = metabase_get_card(client, sid) s_slug = _slug_for(sm.get("name", f"card_{sid}"), cards_idx, sid) cards_idx[s_slug] = sid tracked_count += 1 except Exception: s_slug = f"_unknown_card_{sid}" new_series.append(s_slug) dc["series"] = new_series clean_dashcards.append({k: v for k, v in dc.items() if v not in (None, [], {})}) payload["dashcards"] = clean_dashcards body = { "_meta": { "kind": "dashboard", "id": dash_id, "slug": slug, "synced_at": _utc_now_iso(), "remote_updated_at": full.get("updated_at"), "dashcards_count": len(clean_dashcards), "tabs_count": len(payload.get("tabs", []) or []), "parameters_count": len(payload.get("parameters", []) or []), }, "_refs": refs, "payload": payload, } path = project.dir / "dashboards" / f"{slug}.yaml" _yaml_dump(path, body) index.setdefault("dashboards", {})[slug] = dash_id project.save_index(index) msg = f"[{project.name}] pull dashboard {slug} (id={dash_id}) -> {path.relative_to(project.dir.parent.parent)}" if tracked_count: msg += f" [+{tracked_count} cards trackeadas en index sin file]" print(msg) return body # ---------------------------------------------------------------- Dispatch _PULLERS = { "card": pull_card, "dashboard": pull_dashboard, "database": pull_database, "collection": pull_collection, } def pull_one(client, project, kind: str, ref: str | int) -> dict: if kind not in _PULLERS: raise SystemExit(f"kind '{kind}' invalido. Validos: {sorted(_PULLERS)}") return _PULLERS[kind](client, project, ref) # ---------------------------------------------------------------- Remote list (descubrir sin descargar) def remote_list(client, kind: str, *, filter_name: str | None = None) -> list[dict]: """Lista items en Metabase sin tocar disco. Resumen ligero.""" if kind == "card": items = metabase_list_cards(client) elif kind == "dashboard": items = metabase_list_dashboards(client) elif kind == "database": raw = metabase_list_databases(client) items = raw["data"] if isinstance(raw, dict) and "data" in raw else raw elif kind == "collection": items = client.request("GET", "/api/collection") or [] else: raise SystemExit(f"kind '{kind}' invalido") if filter_name: f = filter_name.lower() items = [i for i in items if f in (i.get("name") or "").lower()] out = [] for i in items: out.append({ "id": i.get("id"), "name": i.get("name"), "collection_id": i.get("collection_id"), "archived": i.get("archived", False), "updated_at": i.get("updated_at"), }) return out