Files
fn_registry/apps/auto_metabase/explore.py
T
egutierrez 310b409ae0 feat(auto_metabase): push-all + describe/sql + auto-inject de dashcards
- push_all(): pushea todos los YAMLs de un proyecto (cards primero,
  dashboards despues), solo CREATE/UPDATE, resiliente a fallos por item
- explore.py: comandos describe (schema de DB) y sql (query ad-hoc con
  limite, cap 5MB, bloqueo de escrituras destructivas)
- payload.py: auto-inyecta id:-N, visualization_settings:{} y
  parameter_mappings:[] en dashcards nuevas para evitar 500 en push
- test_local: 11 cards + 3 dashboards sobre Sample Database de Metabase
- registry.db regenerado con auto_metabase_py_analytics indexada

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-13 13:14:05 +02:00

213 lines
7.7 KiB
Python

"""Comandos de exploracion: describe + sql.
- describe <db_slug> Lista tablas, columnas, tipos y conteo de filas.
- sql <db_slug> "SELECT ..." Ejecuta SQL ad-hoc con limites de seguridad.
Ambos resuelven el slug de database via state/index.json del proyecto activo.
No tocan disco ni crean cards — son herramientas de inspeccion pura.
"""
from __future__ import annotations
import sys
from typing import Any
import httpx
from metabase.cards import metabase_execute_query
# ---------------------------------------------------------------- Limites
# Hard ceiling: ni con --limit muy alto se exceden estas filas/celdas.
HARD_MAX_ROWS = 10_000
DEFAULT_MAX_ROWS = 100
MAX_CELL_CHARS = 60 # truncar celdas largas en stdout
MAX_TOTAL_BYTES = 5_000_000 # 5 MB de payload de respuesta — corta antes
# ---------------------------------------------------------------- Pretty-print
def _truncate(s: str, n: int = MAX_CELL_CHARS) -> str:
if len(s) <= n:
return s
return s[: n - 1] + ""
def _format_cell(v: Any) -> str:
if v is None:
return ""
if isinstance(v, float):
# evitar 1.5000000001
return f"{v:.4g}" if abs(v) < 1e6 else f"{v:.2f}"
return _truncate(str(v))
def _print_table(headers: list[str], rows: list[list[Any]], total_rows: int | None = None) -> None:
"""Imprime una tabla simple en stdout. Calcula anchos por columna."""
if not rows:
print(" (sin filas)")
if total_rows:
print(f" total en BD: {total_rows}")
return
formatted = [[_format_cell(c) for c in row] for row in rows]
widths = [len(h) for h in headers]
for row in formatted:
for i, cell in enumerate(row):
widths[i] = max(widths[i], len(cell))
sep = " ".join("-" * w for w in widths)
print(" " + " ".join(h.ljust(widths[i]) for i, h in enumerate(headers)))
print(" " + sep)
for row in formatted:
print(" " + " ".join(row[i].ljust(widths[i]) for i in range(len(row))))
print()
n = len(rows)
if total_rows is not None and total_rows > n:
print(f" ({n} filas mostradas, {total_rows} en BD)")
else:
print(f" ({n} filas)")
# ---------------------------------------------------------------- describe
def _resolve_db_id(project, db_slug: str) -> int:
idx = project.load_index()
dbs = idx.get("databases", {})
if db_slug not in dbs:
# tambien aceptar id numerico
try:
return int(db_slug)
except ValueError:
raise SystemExit(
f"database slug '{db_slug}' no esta en index. "
f"Conocidos: {sorted(dbs.keys())}"
)
return dbs[db_slug]
def cmd_describe(args, project, client) -> None:
"""Describe un database: tablas, columnas, tipos."""
db_id = _resolve_db_id(project, args.db)
meta = client.request("GET", f"/api/database/{db_id}/metadata")
print(f"\ndatabase: {meta.get('name')} (id={db_id}, engine={meta.get('engine')})")
if meta.get("description"):
print(f" {meta['description']}")
tables = meta.get("tables", []) or []
if args.filter:
f = args.filter.lower()
tables = [t for t in tables if f in (t.get("name") or "").lower()]
print(f"\ntablas: {len(tables)}")
for t in tables:
name = t.get("name")
schema = t.get("schema") or ""
rows = t.get("rows")
rows_str = f"~{rows} filas" if rows is not None else ""
prefix = f"{schema}." if schema and schema not in ("public", "PUBLIC") else ""
print(f"\n {prefix}{name} ({rows_str})")
if t.get("description"):
print(f" {t['description']}")
if args.tables_only:
continue
fields = t.get("fields", []) or []
max_name_len = max((len(f.get("name") or "") for f in fields), default=0)
for f in fields:
fname = (f.get("name") or "").ljust(max_name_len)
ftype = f.get("base_type", "").replace("type/", "")
extras = []
if f.get("semantic_type"):
extras.append(f.get("semantic_type").replace("type/", ""))
if f.get("fk_target_field_id"):
extras.append("FK")
extra_str = f" [{', '.join(extras)}]" if extras else ""
print(f" {fname} {ftype}{extra_str}")
if args.samples and not args.tables_only:
try:
sql = f'SELECT * FROM "{name}" LIMIT 3'
# Adapta al engine: H2/postgres usan dobles comillas; mysql backticks
if meta.get("engine") == "mysql":
sql = f"SELECT * FROM `{name}` LIMIT 3"
result = metabase_execute_query(client, db_id, sql, max_results=3)
cols = [c["display_name"] for c in result["data"]["cols"]]
rows_data = result["data"]["rows"][:3]
print(f" sample (3 rows):")
for row in rows_data:
pairs = [f"{cols[i]}={_format_cell(v)}" for i, v in enumerate(row)]
print(f" - {', '.join(pairs[:6])}{'...' if len(pairs) > 6 else ''}")
except Exception as e:
print(f" (sample fallo: {type(e).__name__})")
# ---------------------------------------------------------------- sql
def cmd_sql(args, project, client) -> None:
"""Ejecuta SQL ad-hoc contra un database. Limite de filas obligatorio."""
db_id = _resolve_db_id(project, args.db)
sql = args.query.strip().rstrip(";")
if not sql:
raise SystemExit("query vacia")
# Aviso si la query es claramente destructiva — solo lectura via /api/dataset
upper = sql.upper().lstrip()
destructive = ("INSERT", "UPDATE", "DELETE", "DROP", "TRUNCATE", "ALTER", "CREATE")
if any(upper.startswith(kw) for kw in destructive):
if not args.allow_write:
raise SystemExit(
"query empieza con keyword destructiva. "
"/api/dataset suele bloquearlas, pero si quieres seguir: --allow-write"
)
limit = min(max(1, args.limit), HARD_MAX_ROWS)
if args.limit > HARD_MAX_ROWS:
print(f" (--limit {args.limit} capado al hard ceiling {HARD_MAX_ROWS})")
print(f"\nsql: {sql[:200]}{'...' if len(sql) > 200 else ''}")
print(f"db: {args.db} (id={db_id}) limit: {limit}")
try:
result = metabase_execute_query(client, db_id, sql, max_results=limit)
except httpx.HTTPStatusError as e:
# Metabase mete el error en el JSON body incluso con 4xx
try:
body = e.response.json()
err = body.get("error") or body.get("message") or e.response.text[:500]
except Exception:
err = e.response.text[:500]
print(f"\nERROR ({e.response.status_code}): {err}", file=sys.stderr)
sys.exit(1)
status = result.get("status")
if status != "completed":
err = result.get("error") or result.get("message") or "(sin mensaje)"
print(f"\nERROR de Metabase: {err}", file=sys.stderr)
sys.exit(1)
cols_meta = result["data"]["cols"]
rows = result["data"]["rows"]
headers = [c.get("display_name") or c.get("name") for c in cols_meta]
rt = result.get("running_time", 0)
rc = result.get("row_count", len(rows))
print(f"running_time: {rt}ms row_count: {rc}\n")
# Cap de bytes de payload por seguridad (visualizacion en terminal)
payload_size = sum(sum(len(str(c)) for c in row) for row in rows)
if payload_size > MAX_TOTAL_BYTES:
keep = max(1, len(rows) * MAX_TOTAL_BYTES // max(1, payload_size))
print(f" ! payload {payload_size} bytes > {MAX_TOTAL_BYTES} — recortando a {keep} filas")
rows = rows[:keep]
_print_table(headers, rows, total_rows=rc)