Files
fn_registry/python/functions/metabase/validation.py
T
egutierrez 4300f1242d feat(metabase): expansion de funciones Python — documents, collections, permissions, validation
Añade un conjunto amplio de funciones al paquete python/functions/metabase:
- Nuevos modulos: collections.py, documents.py, maintenance.py, permissions.py, validation.py (+ test).
- Ampliacion de cards.py, dashboards.py, client.py e __init__.py para exponer las nuevas operaciones.
- Funciones de documentos (create/get/update/delete/archive/copy/move + comentarios), grupos y memberships, permission/collection graphs, copy/move de cards y dashboards, validacion de MBQL/SQL y payloads, actualizacion segura de dashboards y fix_null_ratio.
- .md por funcion con frontmatter para que fn index los registre.
- Actualiza pyproject.toml y uv.lock con las dependencias resultantes.

Impacto: ampliamente mas cobertura de la API de Metabase desde el registry, reutilizable por apps y analisis. No toca Go ni frontend.
2026-04-13 23:31:42 +02:00

506 lines
20 KiB
Python

"""Validacion estructural de cards y dashboards de Metabase antes de pusharlos a la API."""
import httpx
from .client import MetabaseClient
from .cards import metabase_execute_query
_VALID_DISPLAYS = {
"scalar", "table", "line", "bar", "pie", "area", "row", "funnel",
"smartscalar", "gauge", "progress", "combo", "pivot", "map", "scatter",
"waterfall", "sankey", "object",
}
_VALID_TYPES = {"question", "model", "metric"}
def metabase_validate_card_payload(payload: dict) -> list[str]:
"""Valida la estructura de un payload de card antes de enviarlo a Metabase.
Comprueba invariantes estructurales sin necesidad de red. Recorre todos los
checks y acumula todos los issues en lugar de abortar al primero.
Args:
payload: dict con los campos de la card a validar (name, display,
dataset_query, type, visualization_settings, parameters, archived).
Returns:
Lista de strings describiendo cada issue encontrado. Lista vacia = payload valido.
Example:
>>> issues = metabase_validate_card_payload({"name": "Revenue", "display": "bar",
... "dataset_query": {"database": 1, "type": "native",
... "native": {"query": "SELECT 1"}}})
>>> assert issues == []
"""
issues: list[str] = []
# --- name ---
name = payload.get("name")
if name is None:
issues.append("campo 'name' ausente")
elif not isinstance(name, str) or not name.strip():
issues.append("campo 'name' debe ser un string no vacio")
# --- display ---
display = payload.get("display")
if display is None:
issues.append("campo 'display' ausente")
elif display not in _VALID_DISPLAYS:
validos = ", ".join(sorted(_VALID_DISPLAYS))
issues.append(f"display '{display}' invalido (validos: {validos})")
# --- type (opcional) ---
card_type = payload.get("type")
if card_type is not None and card_type not in _VALID_TYPES:
validos = ", ".join(sorted(_VALID_TYPES))
issues.append(f"type '{card_type}' invalido (validos: {validos})")
# --- dataset_query ---
dq = payload.get("dataset_query")
if dq is None:
issues.append("campo 'dataset_query' ausente")
elif not isinstance(dq, dict):
issues.append("campo 'dataset_query' debe ser un dict")
else:
# database presente
if "database" not in dq:
issues.append("'dataset_query.database' ausente")
# deteccion de query nativa
query_type = payload.get("query_type") or dq.get("type", "")
is_native = query_type == "native"
# Tambien chequear si stages[0] tiene clave "native" (formato MBQL5)
stages = dq.get("stages", [])
has_mbql5_native = (
isinstance(stages, list)
and len(stages) > 0
and isinstance(stages[0], dict)
and "native" in stages[0]
)
if is_native or has_mbql5_native:
# Formato legacy: dataset_query.native.query
legacy_sql = None
native_block = dq.get("native")
if isinstance(native_block, dict):
legacy_sql = native_block.get("query")
# Formato MBQL5: dataset_query.stages[0].native
mbql5_sql = None
if has_mbql5_native:
mbql5_sql = stages[0].get("native")
legacy_ok = isinstance(legacy_sql, str) and legacy_sql.strip()
mbql5_ok = isinstance(mbql5_sql, str) and mbql5_sql.strip()
if not legacy_ok and not mbql5_ok:
issues.append(
"query nativa sin SQL: falta 'dataset_query.native.query' "
"(legacy) o 'dataset_query.stages[0].native' (MBQL5)"
)
# --- visualization_settings (opcional) ---
vs = payload.get("visualization_settings")
if vs is not None and not isinstance(vs, dict):
issues.append("'visualization_settings' debe ser un dict")
# --- parameters (opcional) ---
params = payload.get("parameters")
if params is not None and not isinstance(params, list):
issues.append("'parameters' debe ser una list")
# --- archived (opcional) ---
archived = payload.get("archived")
if archived is not None and not isinstance(archived, bool):
issues.append("'archived' debe ser bool")
return issues
def metabase_validate_dashboard_payload(
payload: dict,
known_card_ids: set[int],
) -> list[str]:
"""Valida la estructura de un payload de dashboard antes de enviarlo a Metabase.
Verifica campos obligatorios, bounds de dashcards, referencias a cards y
solapamientos entre dashcards. Acumula todos los issues sin abortar.
Args:
payload: dict con los campos del dashboard (name, dashcards, tabs, parameters).
known_card_ids: conjunto de IDs de cards conocidos; las dashcards con
card_id entero deben referenciar un ID de este conjunto.
Returns:
Lista de strings describiendo cada issue encontrado. Lista vacia = payload valido.
Example:
>>> issues = metabase_validate_dashboard_payload(
... {"name": "KPIs", "dashcards": []},
... known_card_ids={1, 2, 3},
... )
>>> assert issues == []
"""
issues: list[str] = []
# --- name ---
name = payload.get("name")
if name is None:
issues.append("campo 'name' ausente")
elif not isinstance(name, str) or not name.strip():
issues.append("campo 'name' debe ser un string no vacio")
# --- dashcards (opcional, pero si esta, debe ser list) ---
dashcards = payload.get("dashcards")
if dashcards is not None:
if not isinstance(dashcards, list):
issues.append("'dashcards' debe ser una list")
else:
valid_rects: list[tuple[int, int, int, int, int]] = [] # (idx, row, col, sx, sy)
for i, dc in enumerate(dashcards):
if not isinstance(dc, dict):
issues.append(f"dashcard[{i}] debe ser un dict")
continue
# card_id: si es int debe estar en known_card_ids; null es virtual (ok)
card_id = dc.get("card_id")
if card_id is not None:
if not isinstance(card_id, int):
issues.append(f"dashcard[{i}].card_id debe ser int o null")
elif card_id not in known_card_ids:
issues.append(
f"dashcard[{i}].card_id={card_id} no existe en las cards conocidas"
)
# Campos de posicion y tamanio
missing = [f for f in ("row", "col", "size_x", "size_y") if f not in dc]
if missing:
issues.append(
f"dashcard[{i}] falta campos de layout: {', '.join(missing)}"
)
continue
row = dc["row"]
col = dc["col"]
size_x = dc["size_x"]
size_y = dc["size_y"]
for fname, val in (("row", row), ("col", col), ("size_x", size_x), ("size_y", size_y)):
if not isinstance(val, int):
issues.append(f"dashcard[{i}].{fname} debe ser int")
if not isinstance(row, int) or not isinstance(col, int) or \
not isinstance(size_x, int) or not isinstance(size_y, int):
continue # ya reportado arriba
# Bounds
if row < 0:
issues.append(f"dashcard[{i}].row={row} debe ser >= 0")
if not 0 <= col <= 23:
issues.append(f"dashcard[{i}].col={col} debe estar en [0, 23]")
if not 1 <= size_x <= 24:
issues.append(f"dashcard[{i}].size_x={size_x} debe estar en [1, 24]")
if not 1 <= size_y <= 100:
issues.append(f"dashcard[{i}].size_y={size_y} debe estar en [1, 100]")
if col + size_x > 24:
issues.append(
f"dashcard[{i}] excede el ancho del grid: col={col} + size_x={size_x} = {col + size_x} > 24"
)
valid_rects.append((i, row, col, size_x, size_y))
# Deteccion de solapamientos O(n^2) — dashboards tipicos tienen < 50 cards
for a in range(len(valid_rects)):
for b in range(a + 1, len(valid_rects)):
ia, ra, ca, sxa, sya = valid_rects[a]
ib, rb, cb, sxb, syb = valid_rects[b]
# Rectangulos: [ca, ca+sxa) x [ra, ra+sya) y [cb, cb+sxb) x [rb, rb+syb)
overlap_x = ca < cb + sxb and cb < ca + sxa
overlap_y = ra < rb + syb and rb < ra + sya
if overlap_x and overlap_y:
issues.append(
f"dashcards en posiciones (row={ra},col={ca},{sxa}x{sya}) "
f"y (row={rb},col={cb},{sxb}x{syb}) solapan"
)
# --- tabs (opcional) ---
tabs = payload.get("tabs")
if tabs is not None:
if not isinstance(tabs, list):
issues.append("'tabs' debe ser una list")
else:
for i, tab in enumerate(tabs):
if not isinstance(tab, dict):
issues.append(f"tabs[{i}] debe ser un dict")
continue
if "id" not in tab:
issues.append(f"tabs[{i}] falta campo 'id'")
if "name" not in tab:
issues.append(f"tabs[{i}] falta campo 'name'")
# --- parameters (opcional) ---
params = payload.get("parameters")
if params is not None and not isinstance(params, list):
issues.append("'parameters' debe ser una list")
return issues
# ---------------------------------------------------------------- Documents
# Nodos ProseMirror que el editor TipTap de Metabase (v0.59) sabe renderizar.
# Si un documento contiene nodos fuera de esta whitelist, el backend los acepta
# pero el frontend silenciosamente descarta contenido y el doc aparece vacio o
# incompleto.
_VALID_DOC_NODES = {
"doc", "paragraph", "text", "heading",
"bulletList", "orderedList", "listItem",
"blockquote", "codeBlock", "horizontalRule", "hardBreak",
"cardEmbed", "flexContainer", "smartLink", "resizeNode", "mention",
}
_VALID_DOC_MARKS = {"bold", "italic", "strike", "code", "link"}
# Rangos de attrs.level en headings
_HEADING_LEVELS = {1, 2, 3, 4, 5, 6}
def metabase_validate_document_payload(
payload: dict,
known_card_slugs: set[str] | None = None,
) -> list[str]:
"""Valida un payload de document antes de pusharlo a Metabase.
El editor TipTap de Metabase solo renderiza un subconjunto concreto de
nodos y marks ProseMirror. La API *acepta* cualquier arbol, pero el
frontend silenciosamente descarta lo que no conoce. Este validador
rechaza (como warning) cualquier nodo o mark fuera de la whitelist.
Comprueba tambien restricciones estructurales:
- `heading.attrs.level` en [1, 6].
- `flexContainer` solo contiene `cardEmbed` o `supportingText`,
y como maximo 3 hijos.
- `cardEmbed.attrs` debe resolverse a un card real (por `id` o por
`card` slug si el caller pasa `known_card_slugs`).
Args:
payload: dict del document listo para POST/PUT (con `name` y `document`).
known_card_slugs: set de slugs conocidos en el index (para validar
`cardEmbed.attrs.card`). None = skip check.
Returns:
Lista de warnings. Lista vacia = payload renderizable.
"""
issues: list[str] = []
# --- name ---
name = payload.get("name")
if name is None:
issues.append("campo 'name' ausente")
elif not isinstance(name, str) or not name.strip():
issues.append("campo 'name' debe ser un string no vacio")
elif len(name) > 254:
issues.append(f"'name' excede 254 chars ({len(name)})")
# --- archived ---
archived = payload.get("archived")
if archived is not None and not isinstance(archived, bool):
issues.append("'archived' debe ser bool")
# --- document (arbol ProseMirror) ---
tree = payload.get("document")
if tree is None:
issues.append("campo 'document' ausente")
return issues
if tree == "":
# Document vacio es valido (Metabase lo acepta)
return issues
if not isinstance(tree, dict):
issues.append(f"'document' debe ser dict o string vacia, no {type(tree).__name__}")
return issues
if tree.get("type") != "doc":
issues.append(f"'document.type' debe ser 'doc', no '{tree.get('type')}'")
# Walk recursivo acumulando issues con path
_walk_doc_node(tree, "document", issues, known_card_slugs or set())
return issues
def _walk_doc_node(
node: dict,
path: str,
issues: list[str],
known_card_slugs: set[str],
) -> None:
"""Valida un nodo ProseMirror y desciende por sus hijos."""
if not isinstance(node, dict):
issues.append(f"{path}: nodo no es dict ({type(node).__name__})")
return
ntype = node.get("type")
if not isinstance(ntype, str):
issues.append(f"{path}: campo 'type' ausente o no string")
return
if ntype not in _VALID_DOC_NODES:
issues.append(
f"{path}: nodo '{ntype}' no soportado por el editor de Metabase. "
f"Validos: {sorted(_VALID_DOC_NODES)}"
)
# Seguir igualmente — puede haber issues mas adentro
# --- Validaciones especificas por tipo ---
attrs = node.get("attrs") or {}
if ntype == "heading":
level = attrs.get("level")
if level not in _HEADING_LEVELS:
issues.append(f"{path}: heading.level={level!r} debe estar en {sorted(_HEADING_LEVELS)}")
if ntype == "cardEmbed":
# cardEmbed requiere o bien attrs.id (int) o attrs.card (slug del index)
cid = attrs.get("id")
cslug = attrs.get("card")
if cid is None and cslug is None:
issues.append(f"{path}: cardEmbed sin 'attrs.id' ni 'attrs.card'")
elif cid is not None and not isinstance(cid, int):
issues.append(f"{path}: cardEmbed.attrs.id debe ser int, no {type(cid).__name__}")
elif cslug is not None:
if not isinstance(cslug, str):
issues.append(f"{path}: cardEmbed.attrs.card debe ser string slug")
elif known_card_slugs and cslug not in known_card_slugs:
issues.append(
f"{path}: cardEmbed.attrs.card='{cslug}' no existe en el index "
f"(conocidos: {sorted(known_card_slugs)[:10]}...)"
)
if ntype == "flexContainer":
children = node.get("content") or []
if not isinstance(children, list):
issues.append(f"{path}: flexContainer.content debe ser lista")
else:
if not 1 <= len(children) <= 3:
issues.append(
f"{path}: flexContainer debe tener 1-3 hijos (tiene {len(children)})"
)
for i, ch in enumerate(children):
ct = ch.get("type") if isinstance(ch, dict) else None
if ct not in ("cardEmbed", "supportingText"):
issues.append(
f"{path}.content[{i}]: flexContainer solo acepta 'cardEmbed' "
f"o 'supportingText' como hijos (tiene '{ct}')"
)
cw = attrs.get("columnWidths")
if cw is not None:
if not isinstance(cw, list) or not all(isinstance(x, (int, float)) for x in cw):
issues.append(f"{path}: flexContainer.attrs.columnWidths debe ser lista de numeros")
elif isinstance(children, list) and len(cw) != len(children):
issues.append(
f"{path}: columnWidths tiene {len(cw)} valores pero hay {len(children)} hijos"
)
if ntype == "smartLink":
# smartLink necesita entityId (id numerico del card en Metabase)
if attrs.get("entityId") is None:
issues.append(f"{path}: smartLink sin 'attrs.entityId'")
if ntype == "text":
if not isinstance(node.get("text"), str):
issues.append(f"{path}: text sin campo 'text' string")
# Validar marks
marks = node.get("marks") or []
if not isinstance(marks, list):
issues.append(f"{path}: 'marks' debe ser lista")
else:
for i, m in enumerate(marks):
if not isinstance(m, dict):
issues.append(f"{path}.marks[{i}]: mark no es dict")
continue
mt = m.get("type")
if mt not in _VALID_DOC_MARKS:
issues.append(
f"{path}.marks[{i}]: mark '{mt}' no soportado. "
f"Validos: {sorted(_VALID_DOC_MARKS)}"
)
# --- Recursion sobre content ---
content = node.get("content")
if isinstance(content, list):
for i, child in enumerate(content):
_walk_doc_node(child, f"{path}.content[{i}]", issues, known_card_slugs)
def metabase_validate_sql(
client: MetabaseClient,
database_id: int,
sql: str,
max_rows: int = 0,
) -> dict:
"""Valida sintaxis y referencias de SQL ejecutandolo contra Metabase.
Ejecuta la query via POST /api/dataset con LIMIT implicito (max_rows=1 si
el caller no especifica nada, para minimizar carga). Captura tanto errores
HTTP como errores embebidos en el body (Metabase a veces devuelve 200 + status failed).
Args:
client: instancia autenticada de MetabaseClient.
database_id: ID de la base de datos donde ejecutar el SQL.
sql: sentencia SQL a validar (SELECT, WITH, etc.).
max_rows: limite de filas a retornar para la validacion (0 = default de Metabase).
Returns:
dict con:
ok (bool): True si la query se ejecuto sin errores.
error (str|None): mensaje de error si ok=False, None si ok=True.
rows_returned (int): numero de filas devueltas si ok=True.
Example:
>>> result = metabase_validate_sql(client, 1, "SELECT id FROM orders LIMIT 1")
>>> if not result["ok"]:
... print("SQL invalido:", result["error"])
"""
try:
response = metabase_execute_query(client, database_id, sql, max_rows)
except httpx.HTTPStatusError as exc:
# Intentar extraer mensaje del body JSON de Metabase
error_msg = _extract_metabase_error(exc)
return {"ok": False, "error": error_msg, "rows_returned": 0}
except Exception as exc:
return {"ok": False, "error": str(exc), "rows_returned": 0}
# Metabase puede devolver 200 con status: "failed" en el body
status = response.get("status") if isinstance(response, dict) else None
if status == "failed":
error_msg = response.get("error") or "query fallida (sin mensaje)"
return {"ok": False, "error": error_msg, "rows_returned": 0}
# Contar filas retornadas
rows_returned = 0
if isinstance(response, dict):
data = response.get("data", {})
if isinstance(data, dict):
rows = data.get("rows", [])
if isinstance(rows, list):
rows_returned = len(rows)
return {"ok": True, "error": None, "rows_returned": rows_returned}
def _extract_metabase_error(exc: httpx.HTTPStatusError) -> str:
"""Extrae el mensaje de error legible del response de Metabase."""
try:
body = exc.response.json()
if isinstance(body, dict):
return body.get("error") or body.get("message") or str(exc)
except Exception:
pass
return str(exc)