"""Mantenimiento y reparacion de cards MBQL de Metabase.""" import copy import time import uuid from .client import MetabaseClient # --------------------------------------------------------------------------- # Helpers internos compartidos # --------------------------------------------------------------------------- def _new_uuid() -> str: return str(uuid.uuid4()) def _field_name_of(node) -> tuple[str | None, str | None]: """Extrae (name, kind) de un node ['field'|'expression', meta, 'name'].""" if isinstance(node, list) and len(node) >= 3 and node[0] in ("field", "expression"): nm = node[-1] if isinstance(nm, str): return nm, node[0] return None, None # --------------------------------------------------------------------------- # metabase_fix_null_ratio # --------------------------------------------------------------------------- def _analyze_stage_null_ratio(stage: dict) -> tuple[dict, dict]: """Detecta slots vulnerables al patron SUM(a-b)/SUM(b) en una stage MBQL. Devuelve: vulnerable: {slot_diff: (slot_a, slot_b, expr_name)} subtractions: {expr_name: (op_a_name, op_b_name)} """ subtractions = {} for e in stage.get("expressions", []) or []: if not (isinstance(e, list) and len(e) == 4 and e[0] == "-"): continue meta = e[1] if isinstance(e[1], dict) else {} name = meta.get("lib/expression-name") if not name: continue a_name, a_kind = _field_name_of(e[2]) b_name, b_kind = _field_name_of(e[3]) if a_name and b_name and a_kind == "field" and b_kind == "field": subtractions[name] = (a_name, b_name) aggs = stage.get("aggregation", []) or [] func_counts: dict[str, int] = {} sum_field_to_slot: dict[str, str] = {} sum_expr_to_slot: dict[str, str] = {} for agg in aggs: if not isinstance(agg, list) or not agg: continue func = agg[0] func_counts[func] = func_counts.get(func, 0) + 1 slot = func if func_counts[func] == 1 else f"{func}_{func_counts[func]}" if func == "sum" and len(agg) >= 3: operand = agg[2] nm, kind = _field_name_of(operand) if kind == "field" and nm: sum_field_to_slot[nm] = slot elif kind == "expression" and nm: sum_expr_to_slot[nm] = slot vulnerable = {} for expr_name, slot_diff in sum_expr_to_slot.items(): if expr_name not in subtractions: continue op_a, op_b = subtractions[expr_name] slot_a = sum_field_to_slot.get(op_a) slot_b = sum_field_to_slot.get(op_b) if slot_a and slot_b: vulnerable[slot_diff] = (slot_a, slot_b, expr_name) return vulnerable, subtractions def _rewrite_field_refs(node, slot_map: dict): """Reemplaza recursivamente [field, meta, slot] donde slot in slot_map por [-, new_meta, [field, ..., slot_a], [field, ..., slot_b]].""" if isinstance(node, list): if ( len(node) >= 3 and node[0] == "field" and isinstance(node[-1], str) and node[-1] in slot_map ): slot_a, slot_b, _ = slot_map[node[-1]] base_type = ( node[1].get("base-type", "type/Decimal") if isinstance(node[1], dict) else "type/Decimal" ) return [ "-", {"lib/uuid": _new_uuid()}, ["field", {"base-type": base_type, "lib/uuid": _new_uuid()}, slot_a], ["field", {"base-type": base_type, "lib/uuid": _new_uuid()}, slot_b], ] return [_rewrite_field_refs(x, slot_map) for x in node] return node def _fix_card_query(dq: dict) -> list[str]: """Aplica el fix in-place al dataset_query. Devuelve lista de cambios.""" stages = dq.get("stages", []) changes = [] for si, stage in enumerate(stages): vulnerable, subtractions = _analyze_stage_null_ratio(stage) if not vulnerable: continue preagg_names = set(subtractions.keys()) new_exprs = [] for e in stage.get("expressions", []) or []: if ( isinstance(e, list) and len(e) == 4 and e[0] == "-" and isinstance(e[1], dict) and e[1].get("lib/expression-name") in preagg_names ): new_exprs.append(e) else: new_exprs.append(_rewrite_field_refs(e, vulnerable)) stage["expressions"] = new_exprs for sj in range(si + 1, len(stages)): for key in ("expressions", "aggregation", "breakout", "filters", "order-by", "fields"): if key in stages[sj]: stages[sj][key] = [ _rewrite_field_refs(x, vulnerable) for x in stages[sj][key] ] for slot, (sa, sb, ename) in vulnerable.items(): changes.append(f"stage[{si}] {slot}=sum({ename!r}) -> ({sa} - {sb})") return changes def metabase_fix_null_ratio( client: MetabaseClient, *, dry_run: bool = True, card_ids: list[int] | None = None, ) -> dict: """Detecta y repara el patron SUM(a-b)/SUM(b) en cards MBQL de Metabase. El patron vulnerable ocurre cuando una agregacion computa SUM(expr_resta) donde expr_resta es una resta pre-agg de dos campos. Si alguna fila tiene NULL, SUM(A-B) != SUM(A) - SUM(B). El fix reescribe las referencias post-agg al slot diferencia para usar (SUM(A) - SUM(B)) en su lugar. Solo procesa cards MBQL activas (type='query', no archivadas). Las cards SQL nativas o modelos se omiten silenciosamente. Args: client: Cliente Metabase autenticado. dry_run: Si True (default), escanea y reporta sin modificar nada. Si False, aplica el fix via PUT /api/card/:id. card_ids: Lista de IDs a procesar. None = todas las cards MBQL activas. Returns: dict con campos: scanned (int): cards MBQL evaluadas. affected (int): cards donde se detecto el patron vulnerable. fixed (int): cards efectivamente actualizadas (0 si dry_run=True). errors (list[dict]): lista de {card_id, error} para fallos en PUT. Example: >>> from metabase import MetabaseClient, metabase_fix_null_ratio >>> c = MetabaseClient("https://metabase.example.com", "mb_apikey") >>> report = metabase_fix_null_ratio(c, dry_run=True) >>> print(report) {'scanned': 312, 'affected': 4, 'fixed': 0, 'errors': []} >>> # Para aplicar: >>> report = metabase_fix_null_ratio(c, dry_run=False) """ all_cards = client.request("GET", "/api/card") if card_ids is not None: card_id_set = set(card_ids) all_cards = [c for c in all_cards if c.get("id") in card_id_set] mbql_cards = [ c for c in all_cards if not c.get("archived", False) and isinstance(c.get("dataset_query"), dict) and c["dataset_query"].get("type") == "query" and isinstance(c["dataset_query"].get("query", {}).get("stages") if "query" in c["dataset_query"] else c["dataset_query"].get("stages"), list) ] # Metabase MBQL puede tener stages en dataset_query.query.stages (legacy) # o en dataset_query.stages (v2). Normalizar: def _get_stages(dq: dict) -> list | None: if isinstance(dq.get("stages"), list): return dq["stages"] q = dq.get("query", {}) if isinstance(q.get("stages"), list): return q["stages"] return None affected_cards = [] scanned = 0 for card in all_cards: if card_ids is not None and card.get("id") not in set(card_ids): continue if card.get("archived", False): continue dq = card.get("dataset_query") if not isinstance(dq, dict): continue stages = _get_stages(dq) if stages is None: continue scanned += 1 dq_copy = copy.deepcopy(dq) # Operar sobre el stages del objeto correcto target = dq_copy if isinstance(dq_copy.get("stages"), list) else dq_copy.get("query", {}) changes = _fix_card_query(target) if changes: affected_cards.append((card, dq_copy, changes)) fixed = 0 errors: list[dict] = [] if not dry_run: for card, dq_fixed, _changes in affected_cards: try: client.request("PUT", f"/api/card/{card['id']}", json={"dataset_query": dq_fixed}) fixed += 1 except Exception as exc: errors.append({"card_id": card["id"], "error": str(exc)[:200]}) time.sleep(0.05) return { "scanned": scanned, "affected": len(affected_cards), "fixed": fixed, "errors": errors, } # --------------------------------------------------------------------------- # metabase_pair_n_n1_columns # --------------------------------------------------------------------------- def _slot_for_sum_field(stage: dict, target_field_name: str) -> str | None: """Devuelve el slot MBQL (ej: 'sum', 'sum_4') de sum(target_field) en la stage.""" aggs = stage.get("aggregation", []) or [] func_counts: dict[str, int] = {} for agg in aggs: if not isinstance(agg, list) or not agg: continue func = agg[0] func_counts[func] = func_counts.get(func, 0) + 1 slot = func if func_counts[func] == 1 else f"{func}_{func_counts[func]}" if func == "sum" and len(agg) >= 3: nm, kind = _field_name_of(agg[2]) if kind == "field" and nm == target_field_name: return slot return None def _find_paired_slots(dq: dict, base_name: str) -> tuple[str | None, str | None]: """Busca (slot_base, slot_n1) para sum(base_name) y sum(base_name_1) en el MBQL.""" for stage in (dq.get("stages") or []): if not stage.get("aggregation"): continue slot_n = _slot_for_sum_field(stage, base_name) slot_n1 = _slot_for_sum_field(stage, f"{base_name}_1") if slot_n and slot_n1: return slot_n, slot_n1 return None, None def _reorder_table_columns( cols: list[dict], slot_n: str, slot_n1: str, ) -> tuple[list[dict], bool, str]: """Habilita slot_n1 y lo reubica inmediatamente despues de slot_n. Returns: (new_cols, changed, reason) """ cols = [dict(c) for c in cols] idx_n = next((i for i, c in enumerate(cols) if c.get("name") == slot_n), -1) if idx_n < 0: return cols, False, "slot_n no presente en table.columns" idx_n1 = next((i for i, c in enumerate(cols) if c.get("name") == slot_n1), -1) # Ya en posicion correcta y habilitada: no hay cambio if idx_n1 == idx_n + 1 and cols[idx_n1].get("enabled") is True: return cols, False, "ya en la posicion correcta y habilitado" if idx_n1 < 0: entry: dict = {"name": slot_n1, "enabled": True} else: entry = cols.pop(idx_n1) entry["enabled"] = True if idx_n1 < idx_n: idx_n -= 1 insert_at = idx_n + 1 cols.insert(insert_at, entry) return cols, True, "reubicado y habilitado" def metabase_pair_n_n1_columns( client: MetabaseClient, *, dry_run: bool = True, card_ids: list[int] | None = None, base_field: str = "Valor_vendido", ) -> dict: """Habilita y posiciona la columna _1 junto a su par en cards tabla/pivot de Metabase. Para cards con display 'table' o 'pivot' que contienen agregaciones SUM(base_field) y SUM(base_field_1), busca la columna base_field_1 en visualization_settings.table.columns, la habilita (enabled=True) y la reubica inmediatamente despues de base_field para comparacion visual. Solo procesa cards con display 'table' o 'pivot' que tengan ambos slots y tengan table.columns definido en visualization_settings. Args: client: Cliente Metabase autenticado. dry_run: Si True (default), escanea y reporta sin modificar nada. Si False, aplica el cambio via PUT /api/card/:id. card_ids: Lista de IDs a procesar. None = todas las cards activas. base_field: Nombre del campo base MBQL (sin sufijo _1). Por defecto 'Valor_vendido'. La funcion buscara sum(base_field) y sum(base_field_1) en las agregaciones. Returns: dict con campos: scanned (int): cards con display table/pivot evaluadas. affected (int): cards donde se encontro el par y habia que mover. fixed (int): cards efectivamente actualizadas (0 si dry_run=True). skipped (int): cards ya correctas o sin table.columns. errors (list[dict]): lista de {card_id, error} para fallos en PUT. Example: >>> from metabase import MetabaseClient, metabase_pair_n_n1_columns >>> c = MetabaseClient("https://metabase.example.com", "mb_apikey") >>> report = metabase_pair_n_n1_columns(c, dry_run=True) >>> print(report) {'scanned': 45, 'affected': 3, 'fixed': 0, 'skipped': 42, 'errors': []} >>> # Con campo personalizado: >>> report = metabase_pair_n_n1_columns(c, dry_run=False, base_field="Importe") """ all_cards = client.request("GET", "/api/card") tabular_displays = {"table", "pivot"} scanned = 0 skipped = 0 to_update: list[tuple[dict, list[dict], str, str]] = [] for card in all_cards: if card_ids is not None and card.get("id") not in set(card_ids): continue if card.get("archived", False): continue if card.get("display") not in tabular_displays: continue dq = card.get("dataset_query") if not isinstance(dq, dict): continue slot_n, slot_n1 = _find_paired_slots(dq, base_field) if not (slot_n and slot_n1): continue scanned += 1 vs = card.get("visualization_settings") or {} cols = vs.get("table.columns") if not isinstance(cols, list): skipped += 1 continue new_cols, changed, _reason = _reorder_table_columns(cols, slot_n, slot_n1) if not changed: skipped += 1 continue to_update.append((card, new_cols, slot_n, slot_n1)) fixed = 0 errors: list[dict] = [] if not dry_run: for card, new_cols, _slot_n, _slot_n1 in to_update: new_vs = copy.deepcopy(card.get("visualization_settings") or {}) new_vs["table.columns"] = new_cols try: client.request( "PUT", f"/api/card/{card['id']}", json={"visualization_settings": new_vs}, ) fixed += 1 except Exception as exc: errors.append({"card_id": card["id"], "error": str(exc)[:200]}) time.sleep(0.05) return { "scanned": scanned, "affected": len(to_update), "fixed": fixed, "skipped": skipped, "errors": errors, }