"""Validacion estructural de cards y dashboards de Metabase antes de pusharlos a la API.""" import httpx from .client import MetabaseClient from .cards import metabase_execute_query _VALID_DISPLAYS = { "scalar", "table", "line", "bar", "pie", "area", "row", "funnel", "smartscalar", "gauge", "progress", "combo", "pivot", "map", "scatter", "waterfall", "sankey", "object", } _VALID_TYPES = {"question", "model", "metric"} def metabase_validate_card_payload(payload: dict) -> list[str]: """Valida la estructura de un payload de card antes de enviarlo a Metabase. Comprueba invariantes estructurales sin necesidad de red. Recorre todos los checks y acumula todos los issues en lugar de abortar al primero. Args: payload: dict con los campos de la card a validar (name, display, dataset_query, type, visualization_settings, parameters, archived). Returns: Lista de strings describiendo cada issue encontrado. Lista vacia = payload valido. Example: >>> issues = metabase_validate_card_payload({"name": "Revenue", "display": "bar", ... "dataset_query": {"database": 1, "type": "native", ... "native": {"query": "SELECT 1"}}}) >>> assert issues == [] """ issues: list[str] = [] # --- name --- name = payload.get("name") if name is None: issues.append("campo 'name' ausente") elif not isinstance(name, str) or not name.strip(): issues.append("campo 'name' debe ser un string no vacio") # --- display --- display = payload.get("display") if display is None: issues.append("campo 'display' ausente") elif display not in _VALID_DISPLAYS: validos = ", ".join(sorted(_VALID_DISPLAYS)) issues.append(f"display '{display}' invalido (validos: {validos})") # --- type (opcional) --- card_type = payload.get("type") if card_type is not None and card_type not in _VALID_TYPES: validos = ", ".join(sorted(_VALID_TYPES)) issues.append(f"type '{card_type}' invalido (validos: {validos})") # --- dataset_query --- dq = payload.get("dataset_query") if dq is None: issues.append("campo 'dataset_query' ausente") elif not isinstance(dq, dict): issues.append("campo 'dataset_query' debe ser un dict") else: # database presente if "database" not in dq: issues.append("'dataset_query.database' ausente") # deteccion de query nativa query_type = payload.get("query_type") or dq.get("type", "") is_native = query_type == "native" # Tambien chequear si stages[0] tiene clave "native" (formato MBQL5) stages = dq.get("stages", []) has_mbql5_native = ( isinstance(stages, list) and len(stages) > 0 and isinstance(stages[0], dict) and "native" in stages[0] ) if is_native or has_mbql5_native: # Formato legacy: dataset_query.native.query legacy_sql = None native_block = dq.get("native") if isinstance(native_block, dict): legacy_sql = native_block.get("query") # Formato MBQL5: dataset_query.stages[0].native mbql5_sql = None if has_mbql5_native: mbql5_sql = stages[0].get("native") legacy_ok = isinstance(legacy_sql, str) and legacy_sql.strip() mbql5_ok = isinstance(mbql5_sql, str) and mbql5_sql.strip() if not legacy_ok and not mbql5_ok: issues.append( "query nativa sin SQL: falta 'dataset_query.native.query' " "(legacy) o 'dataset_query.stages[0].native' (MBQL5)" ) # --- visualization_settings (opcional) --- vs = payload.get("visualization_settings") if vs is not None and not isinstance(vs, dict): issues.append("'visualization_settings' debe ser un dict") # --- parameters (opcional) --- params = payload.get("parameters") if params is not None and not isinstance(params, list): issues.append("'parameters' debe ser una list") # --- archived (opcional) --- archived = payload.get("archived") if archived is not None and not isinstance(archived, bool): issues.append("'archived' debe ser bool") return issues def metabase_validate_dashboard_payload( payload: dict, known_card_ids: set[int], ) -> list[str]: """Valida la estructura de un payload de dashboard antes de enviarlo a Metabase. Verifica campos obligatorios, bounds de dashcards, referencias a cards y solapamientos entre dashcards. Acumula todos los issues sin abortar. Args: payload: dict con los campos del dashboard (name, dashcards, tabs, parameters). known_card_ids: conjunto de IDs de cards conocidos; las dashcards con card_id entero deben referenciar un ID de este conjunto. Returns: Lista de strings describiendo cada issue encontrado. Lista vacia = payload valido. Example: >>> issues = metabase_validate_dashboard_payload( ... {"name": "KPIs", "dashcards": []}, ... known_card_ids={1, 2, 3}, ... ) >>> assert issues == [] """ issues: list[str] = [] # --- name --- name = payload.get("name") if name is None: issues.append("campo 'name' ausente") elif not isinstance(name, str) or not name.strip(): issues.append("campo 'name' debe ser un string no vacio") # --- dashcards (opcional, pero si esta, debe ser list) --- dashcards = payload.get("dashcards") if dashcards is not None: if not isinstance(dashcards, list): issues.append("'dashcards' debe ser una list") else: valid_rects: list[tuple[int, int, int, int, int]] = [] # (idx, row, col, sx, sy) for i, dc in enumerate(dashcards): if not isinstance(dc, dict): issues.append(f"dashcard[{i}] debe ser un dict") continue # card_id: si es int debe estar en known_card_ids; null es virtual (ok) card_id = dc.get("card_id") if card_id is not None: if not isinstance(card_id, int): issues.append(f"dashcard[{i}].card_id debe ser int o null") elif card_id not in known_card_ids: issues.append( f"dashcard[{i}].card_id={card_id} no existe en las cards conocidas" ) # Campos de posicion y tamanio missing = [f for f in ("row", "col", "size_x", "size_y") if f not in dc] if missing: issues.append( f"dashcard[{i}] falta campos de layout: {', '.join(missing)}" ) continue row = dc["row"] col = dc["col"] size_x = dc["size_x"] size_y = dc["size_y"] for fname, val in (("row", row), ("col", col), ("size_x", size_x), ("size_y", size_y)): if not isinstance(val, int): issues.append(f"dashcard[{i}].{fname} debe ser int") if not isinstance(row, int) or not isinstance(col, int) or \ not isinstance(size_x, int) or not isinstance(size_y, int): continue # ya reportado arriba # Bounds if row < 0: issues.append(f"dashcard[{i}].row={row} debe ser >= 0") if not 0 <= col <= 23: issues.append(f"dashcard[{i}].col={col} debe estar en [0, 23]") if not 1 <= size_x <= 24: issues.append(f"dashcard[{i}].size_x={size_x} debe estar en [1, 24]") if not 1 <= size_y <= 100: issues.append(f"dashcard[{i}].size_y={size_y} debe estar en [1, 100]") if col + size_x > 24: issues.append( f"dashcard[{i}] excede el ancho del grid: col={col} + size_x={size_x} = {col + size_x} > 24" ) valid_rects.append((i, row, col, size_x, size_y)) # Deteccion de solapamientos O(n^2) — dashboards tipicos tienen < 50 cards for a in range(len(valid_rects)): for b in range(a + 1, len(valid_rects)): ia, ra, ca, sxa, sya = valid_rects[a] ib, rb, cb, sxb, syb = valid_rects[b] # Rectangulos: [ca, ca+sxa) x [ra, ra+sya) y [cb, cb+sxb) x [rb, rb+syb) overlap_x = ca < cb + sxb and cb < ca + sxa overlap_y = ra < rb + syb and rb < ra + sya if overlap_x and overlap_y: issues.append( f"dashcards en posiciones (row={ra},col={ca},{sxa}x{sya}) " f"y (row={rb},col={cb},{sxb}x{syb}) solapan" ) # --- tabs (opcional) --- tabs = payload.get("tabs") if tabs is not None: if not isinstance(tabs, list): issues.append("'tabs' debe ser una list") else: for i, tab in enumerate(tabs): if not isinstance(tab, dict): issues.append(f"tabs[{i}] debe ser un dict") continue if "id" not in tab: issues.append(f"tabs[{i}] falta campo 'id'") if "name" not in tab: issues.append(f"tabs[{i}] falta campo 'name'") # --- parameters (opcional) --- params = payload.get("parameters") if params is not None and not isinstance(params, list): issues.append("'parameters' debe ser una list") return issues # ---------------------------------------------------------------- Documents # Nodos ProseMirror que el editor TipTap de Metabase (v0.59) sabe renderizar. # Si un documento contiene nodos fuera de esta whitelist, el backend los acepta # pero el frontend silenciosamente descarta contenido y el doc aparece vacio o # incompleto. _VALID_DOC_NODES = { "doc", "paragraph", "text", "heading", "bulletList", "orderedList", "listItem", "blockquote", "codeBlock", "horizontalRule", "hardBreak", "cardEmbed", "flexContainer", "smartLink", "resizeNode", "mention", } _VALID_DOC_MARKS = {"bold", "italic", "strike", "code", "link"} # Rangos de attrs.level en headings _HEADING_LEVELS = {1, 2, 3, 4, 5, 6} def metabase_validate_document_payload( payload: dict, known_card_slugs: set[str] | None = None, ) -> list[str]: """Valida un payload de document antes de pusharlo a Metabase. El editor TipTap de Metabase solo renderiza un subconjunto concreto de nodos y marks ProseMirror. La API *acepta* cualquier arbol, pero el frontend silenciosamente descarta lo que no conoce. Este validador rechaza (como warning) cualquier nodo o mark fuera de la whitelist. Comprueba tambien restricciones estructurales: - `heading.attrs.level` en [1, 6]. - `flexContainer` solo contiene `cardEmbed` o `supportingText`, y como maximo 3 hijos. - `cardEmbed.attrs` debe resolverse a un card real (por `id` o por `card` slug si el caller pasa `known_card_slugs`). Args: payload: dict del document listo para POST/PUT (con `name` y `document`). known_card_slugs: set de slugs conocidos en el index (para validar `cardEmbed.attrs.card`). None = skip check. Returns: Lista de warnings. Lista vacia = payload renderizable. """ issues: list[str] = [] # --- name --- name = payload.get("name") if name is None: issues.append("campo 'name' ausente") elif not isinstance(name, str) or not name.strip(): issues.append("campo 'name' debe ser un string no vacio") elif len(name) > 254: issues.append(f"'name' excede 254 chars ({len(name)})") # --- archived --- archived = payload.get("archived") if archived is not None and not isinstance(archived, bool): issues.append("'archived' debe ser bool") # --- document (arbol ProseMirror) --- tree = payload.get("document") if tree is None: issues.append("campo 'document' ausente") return issues if tree == "": # Document vacio es valido (Metabase lo acepta) return issues if not isinstance(tree, dict): issues.append(f"'document' debe ser dict o string vacia, no {type(tree).__name__}") return issues if tree.get("type") != "doc": issues.append(f"'document.type' debe ser 'doc', no '{tree.get('type')}'") # Walk recursivo acumulando issues con path _walk_doc_node(tree, "document", issues, known_card_slugs or set()) return issues def _walk_doc_node( node: dict, path: str, issues: list[str], known_card_slugs: set[str], ) -> None: """Valida un nodo ProseMirror y desciende por sus hijos.""" if not isinstance(node, dict): issues.append(f"{path}: nodo no es dict ({type(node).__name__})") return ntype = node.get("type") if not isinstance(ntype, str): issues.append(f"{path}: campo 'type' ausente o no string") return if ntype not in _VALID_DOC_NODES: issues.append( f"{path}: nodo '{ntype}' no soportado por el editor de Metabase. " f"Validos: {sorted(_VALID_DOC_NODES)}" ) # Seguir igualmente — puede haber issues mas adentro # --- Validaciones especificas por tipo --- attrs = node.get("attrs") or {} if ntype == "heading": level = attrs.get("level") if level not in _HEADING_LEVELS: issues.append(f"{path}: heading.level={level!r} debe estar en {sorted(_HEADING_LEVELS)}") if ntype == "cardEmbed": # cardEmbed requiere o bien attrs.id (int) o attrs.card (slug del index) cid = attrs.get("id") cslug = attrs.get("card") if cid is None and cslug is None: issues.append(f"{path}: cardEmbed sin 'attrs.id' ni 'attrs.card'") elif cid is not None and not isinstance(cid, int): issues.append(f"{path}: cardEmbed.attrs.id debe ser int, no {type(cid).__name__}") elif cslug is not None: if not isinstance(cslug, str): issues.append(f"{path}: cardEmbed.attrs.card debe ser string slug") elif known_card_slugs and cslug not in known_card_slugs: issues.append( f"{path}: cardEmbed.attrs.card='{cslug}' no existe en el index " f"(conocidos: {sorted(known_card_slugs)[:10]}...)" ) if ntype == "flexContainer": children = node.get("content") or [] if not isinstance(children, list): issues.append(f"{path}: flexContainer.content debe ser lista") else: if not 1 <= len(children) <= 3: issues.append( f"{path}: flexContainer debe tener 1-3 hijos (tiene {len(children)})" ) for i, ch in enumerate(children): ct = ch.get("type") if isinstance(ch, dict) else None if ct not in ("cardEmbed", "supportingText"): issues.append( f"{path}.content[{i}]: flexContainer solo acepta 'cardEmbed' " f"o 'supportingText' como hijos (tiene '{ct}')" ) cw = attrs.get("columnWidths") if cw is not None: if not isinstance(cw, list) or not all(isinstance(x, (int, float)) for x in cw): issues.append(f"{path}: flexContainer.attrs.columnWidths debe ser lista de numeros") elif isinstance(children, list) and len(cw) != len(children): issues.append( f"{path}: columnWidths tiene {len(cw)} valores pero hay {len(children)} hijos" ) if ntype == "smartLink": # smartLink necesita entityId (id numerico del card en Metabase) if attrs.get("entityId") is None: issues.append(f"{path}: smartLink sin 'attrs.entityId'") if ntype == "text": if not isinstance(node.get("text"), str): issues.append(f"{path}: text sin campo 'text' string") # Validar marks marks = node.get("marks") or [] if not isinstance(marks, list): issues.append(f"{path}: 'marks' debe ser lista") else: for i, m in enumerate(marks): if not isinstance(m, dict): issues.append(f"{path}.marks[{i}]: mark no es dict") continue mt = m.get("type") if mt not in _VALID_DOC_MARKS: issues.append( f"{path}.marks[{i}]: mark '{mt}' no soportado. " f"Validos: {sorted(_VALID_DOC_MARKS)}" ) # --- Recursion sobre content --- content = node.get("content") if isinstance(content, list): for i, child in enumerate(content): _walk_doc_node(child, f"{path}.content[{i}]", issues, known_card_slugs) def metabase_validate_sql( client: MetabaseClient, database_id: int, sql: str, max_rows: int = 0, ) -> dict: """Valida sintaxis y referencias de SQL ejecutandolo contra Metabase. Ejecuta la query via POST /api/dataset con LIMIT implicito (max_rows=1 si el caller no especifica nada, para minimizar carga). Captura tanto errores HTTP como errores embebidos en el body (Metabase a veces devuelve 200 + status failed). Args: client: instancia autenticada de MetabaseClient. database_id: ID de la base de datos donde ejecutar el SQL. sql: sentencia SQL a validar (SELECT, WITH, etc.). max_rows: limite de filas a retornar para la validacion (0 = default de Metabase). Returns: dict con: ok (bool): True si la query se ejecuto sin errores. error (str|None): mensaje de error si ok=False, None si ok=True. rows_returned (int): numero de filas devueltas si ok=True. Example: >>> result = metabase_validate_sql(client, 1, "SELECT id FROM orders LIMIT 1") >>> if not result["ok"]: ... print("SQL invalido:", result["error"]) """ try: response = metabase_execute_query(client, database_id, sql, max_rows) except httpx.HTTPStatusError as exc: # Intentar extraer mensaje del body JSON de Metabase error_msg = _extract_metabase_error(exc) return {"ok": False, "error": error_msg, "rows_returned": 0} except Exception as exc: return {"ok": False, "error": str(exc), "rows_returned": 0} # Metabase puede devolver 200 con status: "failed" en el body status = response.get("status") if isinstance(response, dict) else None if status == "failed": error_msg = response.get("error") or "query fallida (sin mensaje)" return {"ok": False, "error": error_msg, "rows_returned": 0} # Contar filas retornadas rows_returned = 0 if isinstance(response, dict): data = response.get("data", {}) if isinstance(data, dict): rows = data.get("rows", []) if isinstance(rows, list): rows_returned = len(rows) return {"ok": True, "error": None, "rows_returned": rows_returned} def _extract_metabase_error(exc: httpx.HTTPStatusError) -> str: """Extrae el mensaje de error legible del response de Metabase.""" try: body = exc.response.json() if isinstance(body, dict): return body.get("error") or body.get("message") or str(exc) except Exception: pass return str(exc)