310b409ae0
- push_all(): pushea todos los YAMLs de un proyecto (cards primero,
dashboards despues), solo CREATE/UPDATE, resiliente a fallos por item
- explore.py: comandos describe (schema de DB) y sql (query ad-hoc con
limite, cap 5MB, bloqueo de escrituras destructivas)
- payload.py: auto-inyecta id:-N, visualization_settings:{} y
parameter_mappings:[] en dashcards nuevas para evitar 500 en push
- test_local: 11 cards + 3 dashboards sobre Sample Database de Metabase
- registry.db regenerado con auto_metabase_py_analytics indexada
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
213 lines
7.7 KiB
Python
213 lines
7.7 KiB
Python
"""Comandos de exploracion: describe + sql.
|
|
|
|
- describe <db_slug> Lista tablas, columnas, tipos y conteo de filas.
|
|
- sql <db_slug> "SELECT ..." Ejecuta SQL ad-hoc con limites de seguridad.
|
|
|
|
Ambos resuelven el slug de database via state/index.json del proyecto activo.
|
|
No tocan disco ni crean cards — son herramientas de inspeccion pura.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
from metabase.cards import metabase_execute_query
|
|
|
|
|
|
# ---------------------------------------------------------------- Limites
|
|
|
|
# Hard ceiling: ni con --limit muy alto se exceden estas filas/celdas.
|
|
HARD_MAX_ROWS = 10_000
|
|
DEFAULT_MAX_ROWS = 100
|
|
MAX_CELL_CHARS = 60 # truncar celdas largas en stdout
|
|
MAX_TOTAL_BYTES = 5_000_000 # 5 MB de payload de respuesta — corta antes
|
|
|
|
|
|
# ---------------------------------------------------------------- Pretty-print
|
|
|
|
|
|
def _truncate(s: str, n: int = MAX_CELL_CHARS) -> str:
|
|
if len(s) <= n:
|
|
return s
|
|
return s[: n - 1] + "…"
|
|
|
|
|
|
def _format_cell(v: Any) -> str:
|
|
if v is None:
|
|
return ""
|
|
if isinstance(v, float):
|
|
# evitar 1.5000000001
|
|
return f"{v:.4g}" if abs(v) < 1e6 else f"{v:.2f}"
|
|
return _truncate(str(v))
|
|
|
|
|
|
def _print_table(headers: list[str], rows: list[list[Any]], total_rows: int | None = None) -> None:
|
|
"""Imprime una tabla simple en stdout. Calcula anchos por columna."""
|
|
if not rows:
|
|
print(" (sin filas)")
|
|
if total_rows:
|
|
print(f" total en BD: {total_rows}")
|
|
return
|
|
|
|
formatted = [[_format_cell(c) for c in row] for row in rows]
|
|
widths = [len(h) for h in headers]
|
|
for row in formatted:
|
|
for i, cell in enumerate(row):
|
|
widths[i] = max(widths[i], len(cell))
|
|
|
|
sep = " ".join("-" * w for w in widths)
|
|
print(" " + " ".join(h.ljust(widths[i]) for i, h in enumerate(headers)))
|
|
print(" " + sep)
|
|
for row in formatted:
|
|
print(" " + " ".join(row[i].ljust(widths[i]) for i in range(len(row))))
|
|
|
|
print()
|
|
n = len(rows)
|
|
if total_rows is not None and total_rows > n:
|
|
print(f" ({n} filas mostradas, {total_rows} en BD)")
|
|
else:
|
|
print(f" ({n} filas)")
|
|
|
|
|
|
# ---------------------------------------------------------------- describe
|
|
|
|
|
|
def _resolve_db_id(project, db_slug: str) -> int:
|
|
idx = project.load_index()
|
|
dbs = idx.get("databases", {})
|
|
if db_slug not in dbs:
|
|
# tambien aceptar id numerico
|
|
try:
|
|
return int(db_slug)
|
|
except ValueError:
|
|
raise SystemExit(
|
|
f"database slug '{db_slug}' no esta en index. "
|
|
f"Conocidos: {sorted(dbs.keys())}"
|
|
)
|
|
return dbs[db_slug]
|
|
|
|
|
|
def cmd_describe(args, project, client) -> None:
|
|
"""Describe un database: tablas, columnas, tipos."""
|
|
db_id = _resolve_db_id(project, args.db)
|
|
meta = client.request("GET", f"/api/database/{db_id}/metadata")
|
|
|
|
print(f"\ndatabase: {meta.get('name')} (id={db_id}, engine={meta.get('engine')})")
|
|
if meta.get("description"):
|
|
print(f" {meta['description']}")
|
|
|
|
tables = meta.get("tables", []) or []
|
|
if args.filter:
|
|
f = args.filter.lower()
|
|
tables = [t for t in tables if f in (t.get("name") or "").lower()]
|
|
|
|
print(f"\ntablas: {len(tables)}")
|
|
for t in tables:
|
|
name = t.get("name")
|
|
schema = t.get("schema") or ""
|
|
rows = t.get("rows")
|
|
rows_str = f"~{rows} filas" if rows is not None else ""
|
|
prefix = f"{schema}." if schema and schema not in ("public", "PUBLIC") else ""
|
|
print(f"\n {prefix}{name} ({rows_str})")
|
|
if t.get("description"):
|
|
print(f" {t['description']}")
|
|
|
|
if args.tables_only:
|
|
continue
|
|
|
|
fields = t.get("fields", []) or []
|
|
max_name_len = max((len(f.get("name") or "") for f in fields), default=0)
|
|
for f in fields:
|
|
fname = (f.get("name") or "").ljust(max_name_len)
|
|
ftype = f.get("base_type", "").replace("type/", "")
|
|
extras = []
|
|
if f.get("semantic_type"):
|
|
extras.append(f.get("semantic_type").replace("type/", ""))
|
|
if f.get("fk_target_field_id"):
|
|
extras.append("FK")
|
|
extra_str = f" [{', '.join(extras)}]" if extras else ""
|
|
print(f" {fname} {ftype}{extra_str}")
|
|
|
|
if args.samples and not args.tables_only:
|
|
try:
|
|
sql = f'SELECT * FROM "{name}" LIMIT 3'
|
|
# Adapta al engine: H2/postgres usan dobles comillas; mysql backticks
|
|
if meta.get("engine") == "mysql":
|
|
sql = f"SELECT * FROM `{name}` LIMIT 3"
|
|
result = metabase_execute_query(client, db_id, sql, max_results=3)
|
|
cols = [c["display_name"] for c in result["data"]["cols"]]
|
|
rows_data = result["data"]["rows"][:3]
|
|
print(f" sample (3 rows):")
|
|
for row in rows_data:
|
|
pairs = [f"{cols[i]}={_format_cell(v)}" for i, v in enumerate(row)]
|
|
print(f" - {', '.join(pairs[:6])}{'...' if len(pairs) > 6 else ''}")
|
|
except Exception as e:
|
|
print(f" (sample fallo: {type(e).__name__})")
|
|
|
|
|
|
# ---------------------------------------------------------------- sql
|
|
|
|
|
|
def cmd_sql(args, project, client) -> None:
|
|
"""Ejecuta SQL ad-hoc contra un database. Limite de filas obligatorio."""
|
|
db_id = _resolve_db_id(project, args.db)
|
|
sql = args.query.strip().rstrip(";")
|
|
|
|
if not sql:
|
|
raise SystemExit("query vacia")
|
|
|
|
# Aviso si la query es claramente destructiva — solo lectura via /api/dataset
|
|
upper = sql.upper().lstrip()
|
|
destructive = ("INSERT", "UPDATE", "DELETE", "DROP", "TRUNCATE", "ALTER", "CREATE")
|
|
if any(upper.startswith(kw) for kw in destructive):
|
|
if not args.allow_write:
|
|
raise SystemExit(
|
|
"query empieza con keyword destructiva. "
|
|
"/api/dataset suele bloquearlas, pero si quieres seguir: --allow-write"
|
|
)
|
|
|
|
limit = min(max(1, args.limit), HARD_MAX_ROWS)
|
|
if args.limit > HARD_MAX_ROWS:
|
|
print(f" (--limit {args.limit} capado al hard ceiling {HARD_MAX_ROWS})")
|
|
|
|
print(f"\nsql: {sql[:200]}{'...' if len(sql) > 200 else ''}")
|
|
print(f"db: {args.db} (id={db_id}) limit: {limit}")
|
|
|
|
try:
|
|
result = metabase_execute_query(client, db_id, sql, max_results=limit)
|
|
except httpx.HTTPStatusError as e:
|
|
# Metabase mete el error en el JSON body incluso con 4xx
|
|
try:
|
|
body = e.response.json()
|
|
err = body.get("error") or body.get("message") or e.response.text[:500]
|
|
except Exception:
|
|
err = e.response.text[:500]
|
|
print(f"\nERROR ({e.response.status_code}): {err}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
status = result.get("status")
|
|
if status != "completed":
|
|
err = result.get("error") or result.get("message") or "(sin mensaje)"
|
|
print(f"\nERROR de Metabase: {err}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
cols_meta = result["data"]["cols"]
|
|
rows = result["data"]["rows"]
|
|
headers = [c.get("display_name") or c.get("name") for c in cols_meta]
|
|
|
|
rt = result.get("running_time", 0)
|
|
rc = result.get("row_count", len(rows))
|
|
print(f"running_time: {rt}ms row_count: {rc}\n")
|
|
|
|
# Cap de bytes de payload por seguridad (visualizacion en terminal)
|
|
payload_size = sum(sum(len(str(c)) for c in row) for row in rows)
|
|
if payload_size > MAX_TOTAL_BYTES:
|
|
keep = max(1, len(rows) * MAX_TOTAL_BYTES // max(1, payload_size))
|
|
print(f" ! payload {payload_size} bytes > {MAX_TOTAL_BYTES} — recortando a {keep} filas")
|
|
rows = rows[:keep]
|
|
|
|
_print_table(headers, rows, total_rows=rc)
|