"""Comandos de exploracion: describe + sql. - describe Lista tablas, columnas, tipos y conteo de filas. - sql "SELECT ..." Ejecuta SQL ad-hoc con limites de seguridad. Ambos resuelven el slug de database via state/index.json del proyecto activo. No tocan disco ni crean cards — son herramientas de inspeccion pura. """ from __future__ import annotations import sys from typing import Any import httpx from metabase.cards import metabase_execute_query # ---------------------------------------------------------------- Limites # Hard ceiling: ni con --limit muy alto se exceden estas filas/celdas. HARD_MAX_ROWS = 10_000 DEFAULT_MAX_ROWS = 100 MAX_CELL_CHARS = 60 # truncar celdas largas en stdout MAX_TOTAL_BYTES = 5_000_000 # 5 MB de payload de respuesta — corta antes # ---------------------------------------------------------------- Pretty-print def _truncate(s: str, n: int = MAX_CELL_CHARS) -> str: if len(s) <= n: return s return s[: n - 1] + "…" def _format_cell(v: Any) -> str: if v is None: return "" if isinstance(v, float): # evitar 1.5000000001 return f"{v:.4g}" if abs(v) < 1e6 else f"{v:.2f}" return _truncate(str(v)) def _print_table(headers: list[str], rows: list[list[Any]], total_rows: int | None = None) -> None: """Imprime una tabla simple en stdout. Calcula anchos por columna.""" if not rows: print(" (sin filas)") if total_rows: print(f" total en BD: {total_rows}") return formatted = [[_format_cell(c) for c in row] for row in rows] widths = [len(h) for h in headers] for row in formatted: for i, cell in enumerate(row): widths[i] = max(widths[i], len(cell)) sep = " ".join("-" * w for w in widths) print(" " + " ".join(h.ljust(widths[i]) for i, h in enumerate(headers))) print(" " + sep) for row in formatted: print(" " + " ".join(row[i].ljust(widths[i]) for i in range(len(row)))) print() n = len(rows) if total_rows is not None and total_rows > n: print(f" ({n} filas mostradas, {total_rows} en BD)") else: print(f" ({n} filas)") # ---------------------------------------------------------------- describe def _resolve_db_id(project, db_slug: str) -> int: idx = project.load_index() dbs = idx.get("databases", {}) if db_slug not in dbs: # tambien aceptar id numerico try: return int(db_slug) except ValueError: raise SystemExit( f"database slug '{db_slug}' no esta en index. " f"Conocidos: {sorted(dbs.keys())}" ) return dbs[db_slug] def cmd_describe(args, project, client) -> None: """Describe un database: tablas, columnas, tipos.""" db_id = _resolve_db_id(project, args.db) meta = client.request("GET", f"/api/database/{db_id}/metadata") print(f"\ndatabase: {meta.get('name')} (id={db_id}, engine={meta.get('engine')})") if meta.get("description"): print(f" {meta['description']}") tables = meta.get("tables", []) or [] if args.filter: f = args.filter.lower() tables = [t for t in tables if f in (t.get("name") or "").lower()] print(f"\ntablas: {len(tables)}") for t in tables: name = t.get("name") schema = t.get("schema") or "" rows = t.get("rows") rows_str = f"~{rows} filas" if rows is not None else "" prefix = f"{schema}." if schema and schema not in ("public", "PUBLIC") else "" print(f"\n {prefix}{name} ({rows_str})") if t.get("description"): print(f" {t['description']}") if args.tables_only: continue fields = t.get("fields", []) or [] max_name_len = max((len(f.get("name") or "") for f in fields), default=0) for f in fields: fname = (f.get("name") or "").ljust(max_name_len) ftype = f.get("base_type", "").replace("type/", "") extras = [] if f.get("semantic_type"): extras.append(f.get("semantic_type").replace("type/", "")) if f.get("fk_target_field_id"): extras.append("FK") extra_str = f" [{', '.join(extras)}]" if extras else "" print(f" {fname} {ftype}{extra_str}") if args.samples and not args.tables_only: try: sql = f'SELECT * FROM "{name}" LIMIT 3' # Adapta al engine: H2/postgres usan dobles comillas; mysql backticks if meta.get("engine") == "mysql": sql = f"SELECT * FROM `{name}` LIMIT 3" result = metabase_execute_query(client, db_id, sql, max_results=3) cols = [c["display_name"] for c in result["data"]["cols"]] rows_data = result["data"]["rows"][:3] print(f" sample (3 rows):") for row in rows_data: pairs = [f"{cols[i]}={_format_cell(v)}" for i, v in enumerate(row)] print(f" - {', '.join(pairs[:6])}{'...' if len(pairs) > 6 else ''}") except Exception as e: print(f" (sample fallo: {type(e).__name__})") # ---------------------------------------------------------------- sql def cmd_sql(args, project, client) -> None: """Ejecuta SQL ad-hoc contra un database. Limite de filas obligatorio.""" db_id = _resolve_db_id(project, args.db) sql = args.query.strip().rstrip(";") if not sql: raise SystemExit("query vacia") # Aviso si la query es claramente destructiva — solo lectura via /api/dataset upper = sql.upper().lstrip() destructive = ("INSERT", "UPDATE", "DELETE", "DROP", "TRUNCATE", "ALTER", "CREATE") if any(upper.startswith(kw) for kw in destructive): if not args.allow_write: raise SystemExit( "query empieza con keyword destructiva. " "/api/dataset suele bloquearlas, pero si quieres seguir: --allow-write" ) limit = min(max(1, args.limit), HARD_MAX_ROWS) if args.limit > HARD_MAX_ROWS: print(f" (--limit {args.limit} capado al hard ceiling {HARD_MAX_ROWS})") print(f"\nsql: {sql[:200]}{'...' if len(sql) > 200 else ''}") print(f"db: {args.db} (id={db_id}) limit: {limit}") try: result = metabase_execute_query(client, db_id, sql, max_results=limit) except httpx.HTTPStatusError as e: # Metabase mete el error en el JSON body incluso con 4xx try: body = e.response.json() err = body.get("error") or body.get("message") or e.response.text[:500] except Exception: err = e.response.text[:500] print(f"\nERROR ({e.response.status_code}): {err}", file=sys.stderr) sys.exit(1) status = result.get("status") if status != "completed": err = result.get("error") or result.get("message") or "(sin mensaje)" print(f"\nERROR de Metabase: {err}", file=sys.stderr) sys.exit(1) cols_meta = result["data"]["cols"] rows = result["data"]["rows"] headers = [c.get("display_name") or c.get("name") for c in cols_meta] rt = result.get("running_time", 0) rc = result.get("row_count", len(rows)) print(f"running_time: {rt}ms row_count: {rc}\n") # Cap de bytes de payload por seguridad (visualizacion en terminal) payload_size = sum(sum(len(str(c)) for c in row) for row in rows) if payload_size > MAX_TOTAL_BYTES: keep = max(1, len(rows) * MAX_TOTAL_BYTES // max(1, payload_size)) print(f" ! payload {payload_size} bytes > {MAX_TOTAL_BYTES} — recortando a {keep} filas") rows = rows[:keep] _print_table(headers, rows, total_rows=rc)