chore: auto-commit (95 archivos)

- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 00:50:34 +02:00
parent a2bbf23374
commit e3c8979e8d
189 changed files with 18964 additions and 330 deletions
@@ -0,0 +1,73 @@
 ---
 name: cuda_toolkit_check
 kind: function
 lang: bash
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "cuda_toolkit_check() -> void"
 description: "Detecta componentes CUDA instalados en el sistema y emite pares key=value a stdout: nvcc (version o missing), nvidia_smi (present/missing), driver_version, cuda_libs (path o missing) y overall (ok|partial|missing). Exit code 0 siempre — funcion informativa, no fatal."
 tags: [cuda, nvidia, gpu, hardware, probe, infra, toolkit]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: []
 params:
  - name: (ninguno)
    desc: "No toma parametros. Lee el estado del sistema via nvcc, nvidia-smi y busqueda en rutas canonicas de CUDA."
 output: "Cinco pares key=value en stdout: nvcc, nvidia_smi, driver_version, cuda_libs, overall. overall=ok si los tres componentes principales estan presentes; partial si algunos; missing si ninguno."
 tested: false
 tests: []
 test_file_path: ""
 file_path: "bash/functions/infra/cuda_toolkit_check.sh"
 ---
 ## Ejemplo
 ```bash
 source bash/functions/infra/cuda_toolkit_check.sh
 cuda_toolkit_check
 ```
 Salida en maquina con CUDA completo:
 ```
 nvcc=12.4
 nvidia_smi=present
 driver_version=550.54.15
 cuda_libs=/usr/local/cuda
 overall=ok
 ```
 Salida en maquina sin CUDA:
 ```
 nvcc=missing
 nvidia_smi=missing
 driver_version=missing
 cuda_libs=missing
 overall=missing
 ```
 Invocar directamente:
 ```bash
 bash bash/functions/infra/cuda_toolkit_check.sh
 ```
 Parsear desde otro script:
 ```bash
 eval "$(cuda_toolkit_check)"
 echo "CUDA overall: $overall"
 if [[ "$overall" == "ok" ]]; then
    echo "CUDA completo: nvcc=$nvcc driver=$driver_version libs=$cuda_libs"
 fi
 ```
 ## Notas
 - Idempotente: no instala, no modifica nada, solo consulta.
 - Exit code 0 siempre — ausencia de CUDA es informacion, no fallo.
 - Busca `libcuda.so` en `/usr/local/cuda*`, `/opt/cuda*` y via `ldconfig -p`.
 - `driver_version` refleja el driver NVIDIA del kernel, reportado por nvidia-smi.
 - `nvcc` reporta la version del compilador CUDA toolkit (puede diferir de la version soportada por el driver).
 - Para obtener la version CUDA maxima soportada por el driver, usar `get_gpu_info_go_infra` (campo CudaVersion del struct GpuInfo).
@@ -0,0 +1,99 @@
 #!/usr/bin/env bash
 # cuda_toolkit_check — Detecta componentes CUDA instalados en el sistema.
 #
 # Emite pares key=value a stdout:
 #   nvcc=<version|missing>
 #   nvidia_smi=<present|missing>
 #   driver_version=<version|missing>
 #   cuda_libs=<path|missing>
 #   overall=<ok|partial|missing>
 #
 # Exit code 0 siempre (funcion informativa, no fatal).
 # Idempotente: se puede invocar multiples veces sin efectos secundarios.
 cuda_toolkit_check() {
    local nvcc_ver="missing"
    local nvidia_smi_status="missing"
    local driver_version="missing"
    local cuda_libs_path="missing"
    # --- nvcc ---
    if command -v nvcc &>/dev/null; then
        # nvcc --version imprime algo como:
        #   Cuda compilation tools, release 12.4, V12.4.131
        local raw
        raw="$(nvcc --version 2>&1)"
        # Extraer "12.4" de "release 12.4,"
        local ver
        ver="$(echo "$raw" | grep -oP 'release \K[0-9]+\.[0-9]+')"
        nvcc_ver="${ver:-present}"
    fi
    # --- nvidia-smi + driver_version ---
    if command -v nvidia-smi &>/dev/null; then
        nvidia_smi_status="present"
        # nvidia-smi --query-gpu=driver_version --format=csv,noheader retorna la version
        local drv
        drv="$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -n1 | tr -d ' ')"
        if [[ -n "$drv" ]]; then
            driver_version="$drv"
        fi
    fi
    # --- cuda_libs: buscar en rutas canonicas ---
    local search_dirs=(
        "/usr/local/cuda"
        "/usr/local/cuda-"*
        "/opt/cuda"
        "/opt/cuda-"*
        "/usr/lib/x86_64-linux-gnu/libcuda.so"*
        "/usr/lib/aarch64-linux-gnu/libcuda.so"*
    )
    for candidate in "${search_dirs[@]}"; do
        # shellcheck disable=SC2206
        # Expandir globs: si el candidato no existe el glob no expande
        for path in $candidate; do
            if [[ -e "$path" ]]; then
                # Normalizar: tomar solo el directorio raiz /usr/local/cuda*
                local base
                base="${path%%/lib*}"
                cuda_libs_path="$base"
                break 2
            fi
        done
    done
    # Si no encontramos directorio CUDA pero si libcuda.so en rutas de lib estandar
    if [[ "$cuda_libs_path" == "missing" ]]; then
        local libcuda
        libcuda="$(ldconfig -p 2>/dev/null | grep 'libcuda\.so' | head -n1 | awk '{print $NF}')"
        if [[ -n "$libcuda" ]]; then
            cuda_libs_path="$(dirname "$libcuda")"
        fi
    fi
    # --- overall ---
    local found_count=0
    [[ "$nvcc_ver"          != "missing" ]] && ((found_count++))
    [[ "$nvidia_smi_status" != "missing" ]] && ((found_count++))
    [[ "$cuda_libs_path"    != "missing" ]] && ((found_count++))
    local overall
    if   [[ $found_count -eq 0 ]]; then overall="missing"
    elif [[ $found_count -eq 3 ]]; then overall="ok"
    else                                  overall="partial"
    fi
    # --- emitir resultados ---
    echo "nvcc=${nvcc_ver}"
    echo "nvidia_smi=${nvidia_smi_status}"
    echo "driver_version=${driver_version}"
    echo "cuda_libs=${cuda_libs_path}"
    echo "overall=${overall}"
 }
 # Ejecutar si se invoca directamente
 if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    cuda_toolkit_check "$@"
 fi
@@ -0,0 +1,111 @@
 #!/usr/bin/env bash
 # Tests para cuda_toolkit_check
 # Smoke: verifica que stdout contiene todas las keys requeridas y exit code 0.
 set -uo pipefail
 # Nota: set -e NO se usa para que los asserts fallen de forma acumulativa
 # en lugar de abortar el script al primer fallo.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$SCRIPT_DIR/../cuda_toolkit_check.sh"
 PASS=0
 FAIL=0
 assert_eq() {
    local test_name="$1" expected="$2" got="$3"
    if [[ "$expected" == "$got" ]]; then
        echo "PASS: $test_name"
        ((PASS++)) || true
    else
        echo "FAIL: $test_name — expected '$expected', got '$got'"
        ((FAIL++)) || true
    fi
 }
 assert_contains() {
    local test_name="$1" needle="$2" haystack="$3"
    if echo "$haystack" | grep -qF "$needle"; then
        echo "PASS: $test_name"
        ((PASS++)) || true
    else
        echo "FAIL: $test_name — '$needle' not found in output"
        ((FAIL++)) || true
    fi
 }
 assert_matches_pattern() {
    local test_name="$1" pattern="$2" value="$3"
    if echo "$value" | grep -qE "$pattern"; then
        echo "PASS: $test_name"
        ((PASS++)) || true
    else
        echo "FAIL: $test_name — '$value' does not match pattern '$pattern'"
        ((FAIL++)) || true
    fi
 }
 assert_nonempty() {
    local test_name="$1" value="$2"
    if [[ -n "$value" ]]; then
        echo "PASS: $test_name"
        ((PASS++)) || true
    else
        echo "FAIL: $test_name — valor vacio"
        ((FAIL++)) || true
    fi
 }
 # --- Capturar salida ---
 OUTPUT="$(cuda_toolkit_check)"
 EXIT_CODE=$?
 # --- Test: exit code 0 ---
 assert_eq "exit code es 0" "0" "$EXIT_CODE"
 # --- Test: stdout contiene clave nvcc= ---
 assert_contains "stdout contiene clave nvcc=" "nvcc=" "$OUTPUT"
 # --- Test: stdout contiene clave nvidia_smi= ---
 assert_contains "stdout contiene clave nvidia_smi=" "nvidia_smi=" "$OUTPUT"
 # --- Test: stdout contiene clave driver_version= ---
 assert_contains "stdout contiene clave driver_version=" "driver_version=" "$OUTPUT"
 # --- Test: stdout contiene clave cuda_libs= ---
 assert_contains "stdout contiene clave cuda_libs=" "cuda_libs=" "$OUTPUT"
 # --- Test: stdout contiene clave overall= ---
 assert_contains "stdout contiene clave overall=" "overall=" "$OUTPUT"
 # --- Test: overall tiene valor valido (ok|partial|missing) ---
 OVERALL_VAL="$(echo "$OUTPUT" | grep '^overall=' | cut -d= -f2)"
 assert_matches_pattern "overall tiene valor valido ok|partial|missing" "^(ok|partial|missing)$" "$OVERALL_VAL"
 # --- Test: nvcc tiene valor no vacio ---
 NVCC_VAL="$(echo "$OUTPUT" | grep '^nvcc=' | cut -d= -f2)"
 assert_nonempty "nvcc tiene valor no vacio" "$NVCC_VAL"
 # --- Test: nvidia_smi tiene valor valido (present|missing) ---
 SMI_VAL="$(echo "$OUTPUT" | grep '^nvidia_smi=' | cut -d= -f2)"
 assert_matches_pattern "nvidia_smi tiene valor valido present|missing" "^(present|missing)$" "$SMI_VAL"
 # --- Test: driver_version tiene valor no vacio ---
 DRV_VAL="$(echo "$OUTPUT" | grep '^driver_version=' | cut -d= -f2)"
 assert_nonempty "driver_version tiene valor no vacio" "$DRV_VAL"
 # --- Test: cuda_libs tiene valor no vacio ---
 LIBS_VAL="$(echo "$OUTPUT" | grep '^cuda_libs=' | cut -d= -f2)"
 assert_nonempty "cuda_libs tiene valor no vacio" "$LIBS_VAL"
 # --- Test: exactamente 5 lineas en la salida ---
 LINE_COUNT="$(echo "$OUTPUT" | wc -l | tr -d ' ')"
 assert_eq "salida tiene exactamente 5 lineas" "5" "$LINE_COUNT"
 # --- Test: segunda invocacion idempotente (mismo resultado) ---
 OUTPUT2="$(cuda_toolkit_check)"
 assert_eq "segunda invocacion produce mismo resultado (idempotente)" "$OUTPUT" "$OUTPUT2"
 # --- Resumen ---
 echo "---"
 echo "Results: $PASS passed, $FAIL failed"
 [[ $FAIL -eq 0 ]] || exit 1
@@ -0,0 +1,90 @@
 ---
 name: vault_audit
 kind: pipeline
 lang: bash
 domain: pipelines
 version: "1.0.0"
 purity: impure
 signature: "vault_audit(<vault_name> | --all) [--skip-profilers] [--dry-run-layout] -> void"
 description: "Pipeline completo de auditoria para uno o todos los vaults declarados: layout-ensure, index, profile (csv/pdf/md), dedupe, aggregate y doctor. Produce tabla resumen con estado por vault y codigo de salida 4 si hay warnings."
 tags: [vault, audit, pipeline, launcher, infra, bash]
 uses_functions:
  - vault_layout_ensure_go_infra
  - vault_inventory_scan_go_infra
  - vault_index_open_go_infra
  - vault_index_write_go_infra
  - vault_csv_profile_py_datascience
  - vault_pdf_extract_py_datascience
  - vault_knowledge_parse_py_infra
  - vault_dedupe_report_py_infra
  - vault_aggregate_index_go_infra
  - vault_doctor_go_infra
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: []
 params:
  - name: vault_name
    desc: "Nombre del vault a auditar (como aparece en registry.db tabla vaults). Usar --all para todos."
  - name: --all
    desc: "Audita todos los vaults declarados en registry.db. Mutuamente excluyente con vault_name."
  - name: --skip-profilers
    desc: "Omite el paso de profiling CSV/PDF/MD. Util para auditorias rapidas de inventario."
  - name: --dry-run-layout
    desc: "Pasa --dry-run a vault layout-ensure: calcula cambios sin tocar el disco."
 output: "Tabla de resumen por vault con status ok/warn. Codigo de salida 0=exito, 1=root no localizable, 4=uno o mas vaults con warnings."
 tested: false
 tests: []
 test_file_path: ""
 file_path: "bash/functions/pipelines/vault_audit.sh"
 ---
 ## Ejemplo
 ```bash
 # Auditar un vault especifico
 FN_REGISTRY_ROOT=/home/lucas/fn_registry \
  bash bash/functions/pipelines/vault_audit.sh turismo_spain
 # Auditar todos los vaults
 FN_REGISTRY_ROOT=/home/lucas/fn_registry \
  bash bash/functions/pipelines/vault_audit.sh --all
 # Solo layout + index + aggregate (sin profilers, mas rapido)
 bash bash/functions/pipelines/vault_audit.sh turismo_spain --skip-profilers
 # Ver que haria layout-ensure sin tocar disco
 bash bash/functions/pipelines/vault_audit.sh turismo_spain --dry-run-layout
 # Equivalente via fn run (desde la raiz del registry)
 ./fn run vault_audit_bash_pipelines turismo_spain
 ```
 ## Pasos del pipeline
 1. **layout-ensure** — `fn vault layout-ensure <name>` asegura `data/{raw,processed,exports}` y `knowledge/{...}`.
 2. **index** — `fn vault index <name>` escanea archivos y persiste en `vault_index.db`.
 3. **profile** — `fn vault profile <name>` llama `vault_profile_dispatch.py` para CSV/PDF/MD.
 4. **dedupe** — `fn vault dedupe <name>` detecta duplicados por sha256 (informacional, no fatal).
 5. **aggregate** — `fn vault aggregate` copia todo a `registry.db` tabla `vault_files` (una sola vez al final).
 6. **doctor** — `fn vault doctor` muestra estado de salud de todos los vaults.
 ## Codigos de salida
 | Codigo | Significado |
 |--------|-------------|
 | 0 | Todos los vaults procesados sin errores |
 | 1 | FN_REGISTRY_ROOT no localizable o fn binary no encontrado |
 | 4 | Uno o mas vaults con warnings (layout o index fallaron) |
 ## Variables de entorno
 - `FN_REGISTRY_ROOT` — raiz del registry (auto-detectada si no esta seteada).
 - `FN_BIN` — path al binario `fn` (default: `$FN_REGISTRY_ROOT/fn`).
 ## Notas
 Requiere `sqlite3` en PATH para resolver la lista de vaults con `--all`.
 El paso de profile es non-fatal: errores en profilers individuales se reportan como warnings.
 El paso de dedupe es siempre informacional (no borra archivos).
@@ -0,0 +1,172 @@
 #!/usr/bin/env bash
 # vault_audit — Full audit pipeline for one or all declared vaults.
 # Runs: layout-ensure → index → profile → dedupe → aggregate → doctor
 #
 # Usage:
 #   vault_audit.sh <vault_name>
 #   vault_audit.sh --all
 #   vault_audit.sh <vault_name> --skip-profilers
 #   vault_audit.sh <vault_name> --dry-run-layout
 #   vault_audit.sh --all --skip-profilers
 set -euo pipefail
 # --- locate FN_REGISTRY_ROOT ---
 _find_registry_root() {
    local dir
    dir="$(pwd)"
    while [[ "$dir" != "/" ]]; do
        if [[ -f "$dir/registry.db" ]]; then
            echo "$dir"
            return 0
        fi
        dir="$(dirname "$dir")"
    done
    return 1
 }
 if [[ -n "${FN_REGISTRY_ROOT:-}" && -f "${FN_REGISTRY_ROOT}/registry.db" ]]; then
    REGISTRY_ROOT="$FN_REGISTRY_ROOT"
 elif REGISTRY_ROOT="$(_find_registry_root 2>/dev/null)"; then
    : # found
 else
    echo "ERROR: Cannot locate registry.db. Set FN_REGISTRY_ROOT or run from registry root." >&2
    exit 1
 fi
 FN_BIN="${FN_BIN:-${REGISTRY_ROOT}/fn}"
 if [[ ! -x "$FN_BIN" ]]; then
    echo "ERROR: fn binary not found at $FN_BIN. Build with: CGO_ENABLED=1 go build -tags fts5 -o fn ./cmd/fn/" >&2
    exit 1
 fi
 # --- parse args ---
 AUDIT_ALL=0
 SKIP_PROFILERS=0
 DRY_RUN_LAYOUT=0
 VAULT_NAMES=()
 START_TS=$(date +%s)
 while [[ $# -gt 0 ]]; do
    case "$1" in
        --all)           AUDIT_ALL=1 ;;
        --skip-profilers) SKIP_PROFILERS=1 ;;
        --dry-run-layout) DRY_RUN_LAYOUT=1 ;;
        -*)
            echo "ERROR: Unknown flag: $1" >&2
            echo "Usage: vault_audit.sh <name> | --all [--skip-profilers] [--dry-run-layout]" >&2
            exit 1
            ;;
        *)
            VAULT_NAMES+=("$1")
            ;;
    esac
    shift
 done
 if [[ $AUDIT_ALL -eq 0 && ${#VAULT_NAMES[@]} -eq 0 ]]; then
    echo "Usage: vault_audit.sh <vault_name> | --all [--skip-profilers] [--dry-run-layout]" >&2
    exit 1
 fi
 # --- resolve vault list ---
 if [[ $AUDIT_ALL -eq 1 ]]; then
    mapfile -t VAULT_NAMES < <(
        sqlite3 "${REGISTRY_ROOT}/registry.db" "SELECT name FROM vaults ORDER BY name;" 2>/dev/null || true
    )
    if [[ ${#VAULT_NAMES[@]} -eq 0 ]]; then
        echo "No vaults registered in registry.db. Run 'fn index' first." >&2
        exit 1
    fi
    echo "Found ${#VAULT_NAMES[@]} vault(s): ${VAULT_NAMES[*]}"
 fi
 # --- build fn vault flags ---
 LAYOUT_FLAGS=()
 if [[ $DRY_RUN_LAYOUT -eq 1 ]]; then
    LAYOUT_FLAGS+=(--dry-run)
 fi
 # --- per-vault audit ---
 PASS_COUNT=0
 FAIL_COUNT=0
 declare -A VAULT_STATUS
 audit_one() {
    local name="$1"
    local vault_ok=1
    echo ""
    echo "=== vault: $name ==="
    # Step 1: layout-ensure
    echo "  [1/5] layout-ensure"
    if ! "$FN_BIN" vault layout-ensure "$name" "${LAYOUT_FLAGS[@]}" 2>&1 | sed 's/^/    /'; then
        echo "    WARN: layout-ensure failed (non-fatal)" >&2
        vault_ok=0
    fi
    # Step 2: index
    echo "  [2/5] index"
    if ! "$FN_BIN" vault index "$name" 2>&1 | sed 's/^/    /'; then
        echo "    ERROR: index failed" >&2
        vault_ok=0
    fi
    # Step 3: profile
    if [[ $SKIP_PROFILERS -eq 0 ]]; then
        echo "  [3/5] profile"
        if ! "$FN_BIN" vault profile "$name" 2>&1 | sed 's/^/    /'; then
            echo "    WARN: profile had errors (non-fatal)" >&2
        fi
    else
        echo "  [3/5] profile (skipped)"
    fi
    # Step 4: dedupe (informational, non-fatal)
    echo "  [4/5] dedupe"
    "$FN_BIN" vault dedupe "$name" 2>&1 | sed 's/^/    /' || true
    # Step 5 deferred — aggregate runs once at the end
    echo "  [5/5] aggregate (deferred)"
    if [[ $vault_ok -eq 1 ]]; then
        VAULT_STATUS["$name"]="ok"
        PASS_COUNT=$((PASS_COUNT + 1))
    else
        VAULT_STATUS["$name"]="warn"
        FAIL_COUNT=$((FAIL_COUNT + 1))
    fi
 }
 for vault_name in "${VAULT_NAMES[@]}"; do
    audit_one "$vault_name"
 done
 # --- aggregate (once, after all vaults) ---
 echo ""
 echo "=== aggregate ==="
 "$FN_BIN" vault aggregate 2>&1 | sed 's/^/  /'
 # --- doctor (read-only health check) ---
 echo ""
 echo "=== doctor ==="
 "$FN_BIN" vault doctor 2>&1 | sed 's/^/  /' || true
 # --- summary table ---
 END_TS=$(date +%s)
 ELAPSED=$(( END_TS - START_TS ))
 echo ""
 echo "=== summary ==="
 printf "%-30s  %s\n" "VAULT" "STATUS"
 printf "%-30s  %s\n" "-----" "------"
 for vault_name in "${VAULT_NAMES[@]}"; do
    status="${VAULT_STATUS[$vault_name]:-unknown}"
    printf "%-30s  %s\n" "$vault_name" "$status"
 done
 echo ""
 echo "Done: ${PASS_COUNT} ok, ${FAIL_COUNT} warn  (${ELAPSED}s)"
 if [[ $FAIL_COUNT -gt 0 ]]; then
    exit 4
 fi
 exit 0
@@ -0,0 +1,318 @@
 package main
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
 	"fn-registry/functions/infra"
 	"fn-registry/registry"
 )
 // fnBinDir holds the temp directory for the compiled fn binary.
 // It is created by TestMain and cleaned up at test end.
 var fnBinDir string
 var fnBinPath string
 // TestMain compiles the fn binary once before all tests.
 func TestMain(m *testing.M) {
 	var err error
 	fnBinDir, err = os.MkdirTemp("", "fn-vault-test-*")
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "create temp dir: %v\n", err)
 		os.Exit(1)
 	}
 	defer os.RemoveAll(fnBinDir)
 	fnBinPath = filepath.Join(fnBinDir, "fn")
 	// Find registry root by walking up from current directory.
 	regRoot, err := findRoot()
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "find root: %v\n", err)
 		os.Exit(1)
 	}
 	cmd := exec.Command("go", "build", "-tags", "fts5", "-o", fnBinPath, ".")
 	cmd.Dir = filepath.Join(regRoot, "cmd", "fn")
 	if out, errB := cmd.CombinedOutput(); errB != nil {
 		fmt.Fprintf(os.Stderr, "build fn: %v\n%s\n", errB, out)
 		os.Exit(1)
 	}
 	os.Exit(m.Run())
 }
 func findRoot() (string, error) {
 	dir, err := os.Getwd()
 	if err != nil {
 		return "", err
 	}
 	for {
 		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
 			return dir, nil
 		}
 		parent := filepath.Dir(dir)
 		if parent == dir {
 			return "", fmt.Errorf("could not find go.mod from %s", dir)
 		}
 		dir = parent
 	}
 }
 func ensureFnBin(t *testing.T) string {
 	t.Helper()
 	return fnBinPath
 }
 // setupTestRegistry creates a minimal registry root with:
 //   - registry.db (opened + migrations applied via registry.Open)
 //   - a project with a vault declared in vault.yaml
 //   - a vault directory with some test files
 //   - a symlink from projects/test_proj/vaults/test_vault -> vault dir
 //
 // Returns (repoRoot, vaultDir).
 func setupTestRegistry(t *testing.T) (string, string) {
 	t.Helper()
 	repoRoot := t.TempDir()
 	// Create vault directory with files.
 	vaultDir := filepath.Join(t.TempDir(), "test_vault")
 	if err := os.MkdirAll(filepath.Join(vaultDir, "data", "raw"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(vaultDir, "data", "raw", "report.csv"),
 		[]byte("name,value\nfoo,1"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(vaultDir, "data", "raw", "notes.md"),
 		[]byte("# Notes\nsome text"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	// Create project directory structure.
 	projDir := filepath.Join(repoRoot, "projects", "test_proj")
 	vaultsDir := filepath.Join(projDir, "vaults")
 	if err := os.MkdirAll(vaultsDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	// Create vault.yaml.
 	vaultYAML := "vaults:\n  - name: test_vault\n    description: Test vault for unit tests\n    path: " + vaultDir + "\n    tags: [test]\n"
 	if err := os.WriteFile(filepath.Join(vaultsDir, "vault.yaml"), []byte(vaultYAML), 0644); err != nil {
 		t.Fatal(err)
 	}
 	// Create project.md.
 	projMD := "---\nname: test_proj\ndescription: Test project\ntags: [test]\n---\n"
 	if err := os.WriteFile(filepath.Join(projDir, "project.md"), []byte(projMD), 0644); err != nil {
 		t.Fatal(err)
 	}
 	// Open registry.db (creates schema + runs migrations).
 	db, err := registry.Open(filepath.Join(repoRoot, "registry.db"))
 	if err != nil {
 		t.Fatalf("registry.Open: %v", err)
 	}
 	// Index so the vault is registered in registry.db.
 	if _, err := registry.Index(db, repoRoot); err != nil {
 		t.Fatalf("registry.Index: %v", err)
 	}
 	db.Close()
 	return repoRoot, vaultDir
 }
 // runFn runs the fn binary in repoRoot with the given args.
 func runFn(t *testing.T, repoRoot string, args ...string) (string, string, int) {
 	t.Helper()
 	bin := ensureFnBin(t)
 	cmd := exec.Command(bin, args...)
 	cmd.Dir = repoRoot
 	var stdout, stderr strings.Builder
 	cmd.Stdout = &stdout
 	cmd.Stderr = &stderr
 	err := cmd.Run()
 	code := 0
 	if err != nil {
 		if exitErr, ok := err.(*exec.ExitError); ok {
 			code = exitErr.ExitCode()
 		} else {
 			t.Logf("cmd error: %v", err)
 		}
 	}
 	return stdout.String(), stderr.String(), code
 }
 // TestVaultList verifies that 'fn vault list' shows the indexed vault.
 func TestVaultList(t *testing.T) {
 	repoRoot, _ := setupTestRegistry(t)
 	out, stderr, code := runFn(t, repoRoot, "vault", "list")
 	if code != 0 {
 		t.Fatalf("fn vault list exit %d\nstderr: %s", code, stderr)
 	}
 	if !strings.Contains(out, "test_vault") {
 		t.Errorf("expected 'test_vault' in output, got:\n%s", out)
 	}
 }
 // TestVaultIndex verifies that 'fn vault index <name>' runs without error.
 func TestVaultIndex(t *testing.T) {
 	repoRoot, _ := setupTestRegistry(t)
 	out, stderr, code := runFn(t, repoRoot, "vault", "index", "test_vault")
 	if code != 0 {
 		t.Fatalf("fn vault index exit %d\nstderr: %s\nstdout: %s", code, stderr, out)
 	}
 	if !strings.Contains(out, "indexed") {
 		t.Errorf("expected 'indexed' in output, got:\n%s", out)
 	}
 }
 // TestVaultSearchJSON verifies that 'fn vault search --json' returns valid JSON array.
 func TestVaultSearchJSON(t *testing.T) {
 	repoRoot, vaultDir := setupTestRegistry(t)
 	// First index the vault so there is something to search.
 	if _, _, code := runFn(t, repoRoot, "vault", "index", "test_vault"); code != 0 {
 		t.Fatal("fn vault index failed")
 	}
 	// Seed some content into the vault index for the search to find.
 	db, err := infra.VaultIndexOpen(vaultDir)
 	if err != nil {
 		t.Fatalf("VaultIndexOpen: %v", err)
 	}
 	// Update content_text for FTS search.
 	db.Exec(`DELETE FROM files_fts WHERE rel_path = 'data/raw/report.csv'`)
 	db.Exec(`INSERT INTO files_fts(rel_path, content_text) VALUES ('data/raw/report.csv', 'foo report data')`)
 	db.Close()
 	out, stderr, code := runFn(t, repoRoot, "vault", "search", "report", "--json", "--vault", "test_vault")
 	if code != 0 {
 		t.Fatalf("fn vault search exit %d\nstderr: %s", code, stderr)
 	}
 	var result []map[string]interface{}
 	if err := json.Unmarshal([]byte(out), &result); err != nil {
 		t.Fatalf("output is not valid JSON: %v\nraw: %s", err, out)
 	}
 	// Should be a JSON array (possibly empty if search finds nothing, but must be valid).
 	t.Logf("search returned %d hits", len(result))
 }
 // TestVaultInfo verifies that 'fn vault info <name>' outputs vault stats.
 func TestVaultInfo(t *testing.T) {
 	repoRoot, _ := setupTestRegistry(t)
 	// Index first.
 	if _, _, code := runFn(t, repoRoot, "vault", "index", "test_vault"); code != 0 {
 		t.Fatal("fn vault index failed")
 	}
 	out, stderr, code := runFn(t, repoRoot, "vault", "info", "test_vault")
 	if code != 0 {
 		t.Fatalf("fn vault info exit %d\nstderr: %s", code, stderr)
 	}
 	if !strings.Contains(out, "test_vault") {
 		t.Errorf("expected vault name in output, got:\n%s", out)
 	}
 	if !strings.Contains(out, "Files:") {
 		t.Errorf("expected 'Files:' in output, got:\n%s", out)
 	}
 }
 // TestFormatBytes verifies the formatBytes helper.
 func TestFormatBytes(t *testing.T) {
 	cases := []struct {
 		input    int64
 		expected string
 	}{
 		{500, "500 B"},
 		{1024, "1.0 KB"},
 		{1536, "1.5 KB"},
 		{1048576, "1.0 MB"},
 		{1073741824, "1.0 GB"},
 	}
 	for _, tc := range cases {
 		got := formatBytes(tc.input)
 		if got != tc.expected {
 			t.Errorf("formatBytes(%d) = %q, want %q", tc.input, got, tc.expected)
 		}
 	}
 }
 // TestVaultLayoutEnsure verifies that 'fn vault layout-ensure --dry-run' works.
 func TestVaultLayoutEnsure(t *testing.T) {
 	repoRoot, _ := setupTestRegistry(t)
 	out, stderr, code := runFn(t, repoRoot, "vault", "layout-ensure", "test_vault", "--dry-run")
 	if code != 0 {
 		t.Fatalf("fn vault layout-ensure exit %d\nstderr: %s\nstdout: %s", code, stderr, out)
 	}
 	if !strings.Contains(out, "test_vault") {
 		t.Errorf("expected vault name in output, got:\n%s", out)
 	}
 }
 // TestVaultAggregate verifies that 'fn vault aggregate' runs without error on a clean registry.
 func TestVaultAggregate(t *testing.T) {
 	repoRoot, _ := setupTestRegistry(t)
 	// Index first so there is something to aggregate.
 	if _, _, code := runFn(t, repoRoot, "vault", "index", "test_vault"); code != 0 {
 		t.Fatal("fn vault index failed")
 	}
 	_, stderr, code := runFn(t, repoRoot, "vault", "aggregate")
 	if code != 0 {
 		t.Fatalf("fn vault aggregate exit %d\nstderr: %s", code, stderr)
 	}
 }
 // TestVaultDoctor verifies that 'fn vault doctor' runs and reports on vaults.
 func TestVaultDoctor(t *testing.T) {
 	repoRoot, _ := setupTestRegistry(t)
 	out, stderr, code := runFn(t, repoRoot, "vault", "doctor")
 	if code != 0 {
 		t.Fatalf("fn vault doctor exit %d\nstderr: %s", code, stderr)
 	}
 	if !strings.Contains(out, "test_vault") {
 		t.Errorf("expected 'test_vault' in doctor output, got:\n%s", out)
 	}
 }
 // TestVaultDedupe verifies that 'fn vault dedupe' runs without error after indexing.
 func TestVaultDedupe(t *testing.T) {
 	repoRoot, _ := setupTestRegistry(t)
 	if _, _, code := runFn(t, repoRoot, "vault", "index", "test_vault"); code != 0 {
 		t.Fatal("fn vault index failed")
 	}
 	out, stderr, code := runFn(t, repoRoot, "vault", "dedupe", "test_vault")
 	if code != 0 {
 		t.Fatalf("fn vault dedupe exit %d\nstderr: %s", code, stderr)
 	}
 	// Should say "No duplicates" or show a table — either is fine.
 	_ = out
 }
 // TestVaultAuditDryRun verifies that 'fn vault audit --dry-run-layout --skip-profilers' works.
 func TestVaultAuditDryRun(t *testing.T) {
 	repoRoot, _ := setupTestRegistry(t)
 	out, stderr, code := runFn(t, repoRoot, "vault", "audit", "test_vault",
 		"--dry-run-layout", "--skip-profilers")
 	// Exit 0 = fully ok; exit 4 = warnings (layout issues) — both acceptable here.
 	if code != 0 && code != 4 {
 		t.Fatalf("fn vault audit exit %d\nstderr: %s\nstdout: %s", code, stderr, out)
 	}
 	if !strings.Contains(out, "summary") {
 		t.Errorf("expected 'summary' section in audit output, got:\n%s", out)
 	}
 }
 // Suppress unused import for time.
 var _ = time.Now
@@ -44,6 +44,10 @@ func cmdDoctor(args []string) {
 		doctorUnused(r, jsonOut)
 	case "cpp-apps":
 		doctorCppApps(r, jsonOut)
 	case "ml":
 		doctorML(r, jsonOut)
 	case "vaults":
 		doctorVaults(r, jsonOut)
 	default:
 		fmt.Fprintf(os.Stderr, "unknown doctor subcommand: %s\n", sub)
 		doctorUsage()
@@ -65,6 +69,8 @@ Subcommands:
  uses-functions  Audit imports reales vs uses_functions del app.md
  unused          Funciones del registry sin consumidores
  cpp-apps        Conformidad de apps C++ con cpp/PATTERNS.md (cfg.about, dockspace, menubar)
  ml              Entorno ML: GPUs NVIDIA, CUDA toolkit, venv Python, paquetes torch/diffusers, CLIs y vault
  vaults          Salud de vaults: directorio, layout, índice, staleness, drift
 Flags:
  --json          Salida JSON (para scripting/agentes)`)
@@ -103,6 +109,16 @@ func doctorAll(root string, jsonOut bool) {
 		} else {
 			all["cpp_apps_error"] = err.Error()
 		}
 		if v, err := infra.AuditMlEnv(root); err == nil {
 			all["ml"] = v
 		} else {
 			all["ml_error"] = err.Error()
 		}
 		if v, err := infra.VaultDoctor(root); err == nil {
 			all["vaults"] = v
 		} else {
 			all["vaults_error"] = err.Error()
 		}
 		emit(all)
 		return
 	}
@@ -119,6 +135,10 @@ func doctorAll(root string, jsonOut bool) {
 	doctorUnused(root, false)
 	fmt.Println("\n=== C++ apps standard conformance ===")
 	doctorCppApps(root, false)
 	fmt.Println("\n=== ML environment ===")
 	doctorML(root, false)
 	fmt.Println("\n=== Vaults ===")
 	doctorVaults(root, false)
 }
 func doctorCppApps(root string, jsonOut bool) {
@@ -280,6 +300,81 @@ func doctorUnused(root string, jsonOut bool) {
 	fmt.Printf("\n%d unused functions (candidates to remove).\n", len(unused))
 }
 func doctorVaults(root string, jsonOut bool) {
 	entries, err := infra.VaultDoctor(root)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "error: %v\n", err)
 		os.Exit(1)
 	}
 	if jsonOut {
 		emit(entries)
 		return
 	}
 	if len(entries) == 0 {
 		fmt.Println("No vaults declared (no projects/*/vaults/vault.yaml found).")
 		return
 	}
 	w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
 	fmt.Fprintln(w, "NAME\tSTATUS\tFILES\tINDEXED\tISSUES")
 	ok := 0
 	for _, e := range entries {
 		issues := "-"
 		if len(e.Issues) > 0 {
 			issues = strings.Join(e.Issues, "; ")
 		}
 		fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%s\n",
 			e.VaultName, e.Status, e.DiskFiles, e.IndexedFiles, issues)
 		if e.Status == "ok" {
 			ok++
 		}
 	}
 	w.Flush()
 	fmt.Printf("\n%d/%d vaults healthy.\n", ok, len(entries))
 }
 func doctorML(root string, jsonOut bool) {
 	report, err := infra.AuditMlEnv(root)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "error: %v\n", err)
 		os.Exit(1)
 	}
 	if jsonOut {
 		emit(report)
 		return
 	}
 	fmt.Printf("GPUs detected: %d\n", len(report.Gpus))
 	for _, g := range report.Gpus {
 		fmt.Printf("  [%d] %s  VRAM: %d/%d MiB  Driver: %s  CUDA: %s\n",
 			g.Index, g.Name, g.VramFreeMb, g.VramTotalMb, g.DriverVersion, g.CudaVersion)
 	}
 	fmt.Println()
 	w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
 	fmt.Fprintln(w, "CHECK\tSTATUS\tVERSION\tDETAIL")
 	for _, c := range report.Checks {
 		version := c.Version
 		if version == "" {
 			version = "-"
 		}
 		detail := c.Detail
 		if len(detail) > 60 {
 			detail = detail[:60] + "..."
 		}
 		if detail == "" {
 			detail = "-"
 		}
 		fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", c.Name, c.Status, version, detail)
 	}
 	w.Flush()
 	overall := "OK"
 	if !report.OverallOK {
 		overall = "INCOMPLETE"
 	}
 	fmt.Printf("\nOverall ML environment: %s\n", overall)
 }
 func emit(v any) {
 	b, err := json.MarshalIndent(v, "", "  ")
 	if err != nil {
@@ -45,6 +45,8 @@ func main() {
 		cmdAnalysis(os.Args[2:])
 	case "sync":
 		cmdSync(os.Args[2:])
 	case "vault":
 		cmdVault(os.Args[2:])
 	case "doctor":
 		cmdDoctor(os.Args[2:])
 	case "help", "-h", "--help":
@@ -73,6 +75,7 @@ Usage:
  fn app     <list|clone|pull>             Gestiona apps externas (Gitea)
  fn analysis <list|clone|pull>            Gestiona analyses externas (Gitea)
  fn sync    [status|locations]            Sincroniza con servidor central
  fn vault   <list|search|index|info>      Gestiona y busca en data vaults
  fn doctor  [artefacts|services|sync|uses-functions|unused] [--json]
                                           Diagnostico read-only del registry`)
 }
@@ -3,8 +3,10 @@ add_imgui_app(tables_playground
    main.cpp
    data_table.cpp
    data_table_logic.cpp
    llm_anthropic.cpp
    lua_engine.cpp
    tql.cpp
    tql_to_sql.cpp
    viz.cpp
 )
 target_link_libraries(tables_playground PRIVATE lua54 implot)
@@ -13,10 +15,13 @@ target_link_libraries(tables_playground PRIVATE lua54 implot)
 add_executable(tables_playground_self_test
    self_test.cpp
    data_table_logic.cpp
    llm_anthropic.cpp
    lua_engine.cpp
    tql.cpp
    tql_to_sql.cpp
 )
 target_include_directories(tables_playground_self_test PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}
    ${CMAKE_SOURCE_DIR}/functions
 )
 target_link_libraries(tables_playground_self_test PRIVATE lua54)
@@ -1,20 +1,33 @@
 #include "data_table.h"
 #include "app_base.h"
 #include "imgui.h"
 #include "llm_anthropic.h"
 #include "lua_engine.h"
 #include "tql.h"
 #include "tql_to_sql.h"
 #include "viz.h"
 #include <algorithm>
 #include <cfloat>
 #include <cstdio>
 #include <cstring>
 #include <ctime>
 #include <fstream>
 #include <string>
 #include <unordered_map>
 namespace data_table {
 // UTC date today as ISO YYYY-MM-DD. Para preset filtros Last7/30/90d.
 static std::string today_iso() {
    std::time_t t = std::time(nullptr);
    std::tm tm = *std::gmtime(&t);
    char buf[16];
    std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d",
                  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
    return buf;
 }
 namespace {
 // ---------------------------------------------------------------------------
@@ -122,10 +135,106 @@ struct UiState {
    // Toggle Table <-> View: remember last non-table display.
    ViewMode last_non_table_main = ViewMode::Bar;
    // Drill history (fase 10). Stacks per-app; no persistido en TQL.
    std::vector<DrillStep> drill_back;
    std::vector<DrillStep> drill_forward;
    // Row inspector (fase 10). -1 cerrado, sino row idx en el output del stage activo.
    int  inspect_row     = -1;
    bool inspect_open    = false;
    // Ask AI modal (fase 11 — issue 0080).
    bool         ask_open       = false;
    bool         ask_busy       = false;
    int          ask_mode       = 0;     // 0 = TQL, 1 = SQL
    char         ask_question[2048] = {0};
    std::string  ask_current_tql;        // emit del state actual al abrir modal
    std::string  ask_response_raw;       // texto del modelo
    std::string  ask_response_code;      // bloque extraido (Lua o SQL)
    std::string  ask_error;
    std::string  ask_status;             // "Sent. Waiting..." / "OK" / error
    char         ask_edit_buf[8192] = {0}; // buffer editable de propuesta
 };
 UiState& ui() { static UiState s; return s; }
 // Row inspector modal (fase 10). Muestra todas cols + valores de la fila
 // inspect_row del output del stage activo. Read-only + Copy TSV + Filter
 // by this row (anade filters al stage previo si existe).
 static void draw_row_inspector_modal(State& st, int active,
                                      const char* const* cells, int rows, int cols,
                                      const std::vector<std::string>& headers,
                                      const std::vector<ColumnType>& types,
                                      const std::vector<std::string>& prev_input_headers) {
    auto& U = ui();
    if (!U.inspect_open) return;
    if (U.inspect_row < 0 || U.inspect_row >= rows) {
        U.inspect_open = false;
        return;
    }
    ImGui::OpenPopup("##row_inspector");
    ImGui::SetNextWindowSize(ImVec2(560, 400), ImGuiCond_Appearing);
    if (ImGui::BeginPopupModal("##row_inspector", &U.inspect_open,
                                ImGuiWindowFlags_NoSavedSettings)) {
        ImGui::Text("Row %d", U.inspect_row);
        ImGui::SameLine(0, 20);
        if (ImGui::SmallButton("Copy TSV")) {
            std::string tsv = row_to_tsv(cells, rows, cols, U.inspect_row, headers);
            ImGui::SetClipboardText(tsv.c_str());
        }
        ImGui::SameLine();
        bool can_filter = (active > 0 && !prev_input_headers.empty());
        ImGui::BeginDisabled(!can_filter);
        if (ImGui::SmallButton("Filter prev stage by this row")) {
            int target = active - 1;
            for (int c = 0; c < cols; ++c) {
                const char* v = cells[U.inspect_row * cols + c];
                if (!v || !*v) continue;
                const std::string& h = headers[c];
                std::string h_clean;
                parse_breakout_granularity(h, h_clean);
                int ci = -1;
                for (size_t i = 0; i < prev_input_headers.size(); ++i) {
                    if (prev_input_headers[i] == h_clean) { ci = (int)i; break; }
                }
                if (ci < 0) continue;
                DrillStep step;
                step.target_stage      = target;
                step.filter_pos        = (int)st.stages[target].filters.size();
                step.prev_active_stage = st.active_stage;
                step.added             = make_drill_filter(ci, v);
                if (apply_drill_step(st, step)) {
                    U.drill_back.push_back(step);
                }
            }
            U.drill_forward.clear();
            U.inspect_open = false;
        }
        ImGui::EndDisabled();
        ImGui::Separator();
        ImGuiTableFlags flags = ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg
                              | ImGuiTableFlags_ScrollY | ImGuiTableFlags_Resizable;
        if (ImGui::BeginTable("##inspector_tbl", 2, flags, ImVec2(-1, -1))) {
            ImGui::TableSetupColumn("col");
            ImGui::TableSetupColumn("value");
            ImGui::TableHeadersRow();
            for (int c = 0; c < cols; ++c) {
                ImGui::TableNextRow();
                ImGui::TableSetColumnIndex(0);
                ColumnType t = (c < (int)types.size()) ? types[c] : ColumnType::String;
                ImGui::Text("%s %s", column_type_icon(t),
                            (c < (int)headers.size()) ? headers[c].c_str() : "?");
                ImGui::TableSetColumnIndex(1);
                const char* v = cells[U.inspect_row * cols + c];
                ImGui::TextWrapped("%s", v ? v : "");
            }
            ImGui::EndTable();
        }
        ImGui::EndPopup();
    }
 }
 int autocomplete_cb(ImGuiInputTextCallbackData* data) {
    UiState* U = (UiState*)data->UserData;
    if (data->EventFlag == ImGuiInputTextFlags_CallbackAlways) {
@@ -180,6 +289,47 @@ void ensure_init(State& st, int eff_cols) {
 // ---------------------------------------------------------------------------
 void draw_stage_breadcrumb(State& st) {
    st.ensure_stage0();
    // Drill history back/forward (fase 10). Botones al inicio.
    auto& U = ui();
    {
        bool can_back = !U.drill_back.empty();
        ImGui::BeginDisabled(!can_back);
        if (ImGui::SmallButton("<##drill_back")) {
            DrillStep s = U.drill_back.back();
            U.drill_back.pop_back();
            if (undo_drill_step(st, s)) {
                U.drill_forward.push_back(s);
            }
        }
        ImGui::EndDisabled();
        if (can_back && ImGui::IsItemHovered())
            ImGui::SetTooltip("Drill back (%zu)", U.drill_back.size());
        ImGui::SameLine();
        bool can_fwd = !U.drill_forward.empty();
        ImGui::BeginDisabled(!can_fwd);
        if (ImGui::SmallButton(">##drill_fwd")) {
            DrillStep s = U.drill_forward.back();
            U.drill_forward.pop_back();
            if (apply_drill_step(st, s)) {
                U.drill_back.push_back(s);
            }
        }
        ImGui::EndDisabled();
        if (can_fwd && ImGui::IsItemHovered())
            ImGui::SetTooltip("Drill forward (%zu)", U.drill_forward.size());
        ImGui::SameLine();
        bool can_up = (st.active_stage > 0);
        ImGui::BeginDisabled(!can_up);
        if (ImGui::SmallButton("^##drill_up")) drill_up(st);
        ImGui::EndDisabled();
        if (can_up && ImGui::IsItemHovered())
            ImGui::SetTooltip("Drill up (stage previo, sin perder filters)");
        ImGui::SameLine();
        ImGui::TextDisabled("|");
        ImGui::SameLine();
    }
    for (int si = 0; si < (int)st.stages.size(); ++si) {
        if (si > 0) { ImGui::SameLine(); ImGui::TextDisabled(">"); ImGui::SameLine(); }
@@ -610,6 +760,19 @@ void draw_viz_selector(State& st) {
        ImGui::OpenPopup("##viz_cfg_popup");
    }
    ImGui::SameLine();
    if (ImGui::SmallButton("Ask AI##ask_open")) {
        auto& U2 = ui();
        U2.ask_open = true;
        U2.ask_busy = false;
        U2.ask_error.clear();
        U2.ask_status.clear();
        U2.ask_response_code.clear();
        U2.ask_response_raw.clear();
        U2.ask_current_tql = tql::emit(st,
            std::vector<std::string>(),  // emit headers stage 0 (caller fill si necesario)
            std::vector<ColumnType>());
    }
    ImGui::SameLine();
    if (ImGui::SmallButton("+ Viz##viz_add")) {
        VizPanel p;
        p.display = ViewMode::Bar;
@@ -737,7 +900,8 @@ void draw_joins_chips(State& st, const std::vector<TableInput>& joinables,
 // Filter chips para el stage activo. eff_headers/eff_cols son del INPUT del
 // stage activo (= orig+derived para stage 0; output del stage previo para 1+).
 // ---------------------------------------------------------------------------
-void draw_filter_chips(Stage& stg, const char* const* eff_headers, int eff_cols) {
+void draw_filter_chips(Stage& stg, const char* const* eff_headers, int eff_cols,
                        const std::vector<ColumnType>& eff_types) {
    auto& U = ui();
    ImGui::PushStyleColor(ImGuiCol_Button,        IM_COL32(120,  60, 170, 220));
    ImGui::PushStyleColor(ImGuiCol_ButtonHovered, IM_COL32(150,  85, 200, 240));
@@ -746,6 +910,50 @@ void draw_filter_chips(Stage& stg, const char* const* eff_headers, int eff_cols)
    ImGui::PopStyleColor(3);
    ImGui::SameLine();
    // Presets (fase 10): menu con Last7/30/90d (cols Date), ExcludeNulls (any),
    // NonZero (cols numericas). Apply append a stg.filters via build_preset_filters.
    if (ImGui::SmallButton("Presets##fpresets")) ImGui::OpenPopup("##presets_menu");
    if (ImGui::BeginPopup("##presets_menu")) {
        int first_date = -1, first_num = -1;
        for (int c = 0; c < eff_cols && c < (int)eff_types.size(); ++c) {
            if (first_date < 0 && eff_types[c] == ColumnType::Date) first_date = c;
            if (first_num  < 0 && (eff_types[c] == ColumnType::Int ||
                                    eff_types[c] == ColumnType::Float)) first_num = c;
        }
        auto apply_preset = [&](FilterPreset p, int col) {
            auto fs = build_preset_filters(p, col, today_iso());
            for (auto& f : fs) stg.filters.push_back(f);
        };
        if (first_date >= 0) {
            char l1[96], l2[96], l3[96];
            std::snprintf(l1, sizeof(l1), "Last 7 days on \"%s\"",  eff_headers[first_date]);
            std::snprintf(l2, sizeof(l2), "Last 30 days on \"%s\"", eff_headers[first_date]);
            std::snprintf(l3, sizeof(l3), "Last 90 days on \"%s\"", eff_headers[first_date]);
            if (ImGui::MenuItem(l1)) apply_preset(FilterPreset::Last7d,  first_date);
            if (ImGui::MenuItem(l2)) apply_preset(FilterPreset::Last30d, first_date);
            if (ImGui::MenuItem(l3)) apply_preset(FilterPreset::Last90d, first_date);
            ImGui::Separator();
        }
        if (ImGui::BeginMenu("Exclude nulls in...")) {
            for (int c = 0; c < eff_cols; ++c) {
                if (ImGui::MenuItem(eff_headers[c])) apply_preset(FilterPreset::ExcludeNulls, c);
            }
            ImGui::EndMenu();
        }
        if (first_num >= 0) {
            if (ImGui::BeginMenu("Non-zero in...")) {
                for (int c = 0; c < eff_cols && c < (int)eff_types.size(); ++c) {
                    if (eff_types[c] == ColumnType::Int || eff_types[c] == ColumnType::Float) {
                        if (ImGui::MenuItem(eff_headers[c])) apply_preset(FilterPreset::NonZero, c);
                    }
                }
                ImGui::EndMenu();
            }
        }
        ImGui::EndPopup();
    }
    ImGui::SameLine();
    if (stg.filters.empty()) {
        ImGui::TextDisabled("Sin filtros.");
        return;
@@ -778,7 +986,8 @@ void draw_filter_chips(Stage& stg, const char* const* eff_headers, int eff_cols)
 }
 // Chips de breakout (stage > 0).
-void draw_breakout_chips(Stage& stg, const char* const* in_headers, int in_cols) {
+void draw_breakout_chips(Stage& stg, const char* const* in_headers, int in_cols,
                          const std::vector<ColumnType>& in_types) {
    auto& U = ui();
    ImGui::PushStyleColor(ImGuiCol_Button,        IM_COL32( 60, 160, 170, 220));
    ImGui::PushStyleColor(ImGuiCol_ButtonHovered, IM_COL32( 80, 190, 200, 240));
@@ -792,6 +1001,17 @@ void draw_breakout_chips(Stage& stg, const char* const* in_headers, int in_cols)
        return;
    }
    for (size_t i = 0; i < stg.breakouts.size(); ) {
        std::string col_name;
        DateGranularity g = parse_breakout_granularity(stg.breakouts[i], col_name);
        // Resolve col index para lookup de tipo.
        int col_idx = -1;
        for (int c = 0; c < in_cols; ++c) {
            if (std::strcmp(in_headers[c], col_name.c_str()) == 0) { col_idx = c; break; }
        }
        bool is_date_col = (col_idx >= 0 && col_idx < (int)in_types.size()
                             && in_types[col_idx] == ColumnType::Date);
        char buf[256];
        std::snprintf(buf, sizeof(buf), "%s  x##bk%zu", stg.breakouts[i].c_str(), i);
        ImGui::PushStyleColor(ImGuiCol_Button,        IM_COL32( 60, 160, 170, 220));
@@ -802,20 +1022,42 @@ void draw_breakout_chips(Stage& stg, const char* const* in_headers, int in_cols)
        if (ImGui::IsItemClicked(ImGuiMouseButton_Right)) {
            U.edit_chip_kind = 2;
            U.edit_chip_idx  = (int)i;
-            // resolve current col name to index in in_headers
+            U.edit_col_idx = (col_idx >= 0) ? col_idx : 0;
            U.edit_col_idx = 0;
            for (int c = 0; c < in_cols; ++c) {
                if (std::strcmp(in_headers[c], stg.breakouts[i].c_str()) == 0) {
                    U.edit_col_idx = c; break;
                }
            }
            ImGui::OpenPopup("##edit_breakout");
        }
        if (clicked) { stg.breakouts.erase(stg.breakouts.begin() + i); continue; }
        // Granularity combo inline cuando col Date (fase 10).
        if (is_date_col) {
            ImGui::SameLine();
            const char* preview = (g == DateGranularity::None)
                                  ? "(raw)" : date_granularity_token(g);
            char combo_id[32];
            std::snprintf(combo_id, sizeof(combo_id), "##gran%zu", i);
            ImGui::SetNextItemWidth(72);
            if (ImGui::BeginCombo(combo_id, preview)) {
                DateGranularity opts[] = {
                    DateGranularity::None,
                    DateGranularity::Year,
                    DateGranularity::Month,
                    DateGranularity::Week,
                    DateGranularity::Day,
                    DateGranularity::Hour,
                };
                for (auto o : opts) {
                    const char* lbl = (o == DateGranularity::None)
                                      ? "(raw)" : date_granularity_token(o);
                    if (ImGui::Selectable(lbl, o == g)) {
                        stg.breakouts[i] = compose_breakout(col_name, o);
                    }
                }
                ImGui::EndCombo();
            }
        }
        ImGui::SameLine();
        ++i;
    }
    (void)in_headers; (void)in_cols;
    ImGui::NewLine();
 }
@@ -1220,7 +1462,8 @@ void draw_add_filter_popup(Stage& stg, const char* const* eff_headers_arr, int e
 }
 void draw_add_breakout_popup(Stage& stg, const char* const* in_headers, int in_cols,
-                              const std::vector<ColumnType>& in_types) {
+                              const std::vector<ColumnType>& in_types,
                              const char* const* in_cells, int in_rows) {
    auto& U = ui();
    if (!ImGui::BeginPopup("##addbreakout")) return;
    if (U.brk_picker_col < 0 || U.brk_picker_col >= in_cols) U.brk_picker_col = 0;
@@ -1236,7 +1479,18 @@ void draw_add_breakout_popup(Stage& stg, const char* const* in_headers, int in_c
        ImGui::EndCombo();
    }
    if (ImGui::Button("Add##bk")) {
-        stg.breakouts.emplace_back(in_headers[U.brk_picker_col]);
+        int c = U.brk_picker_col;
        std::string col = in_headers[c];
        // Fase 10: si col es Date, auto-detect granularidad via rango lexical
        // (ISO YYYY-MM-DD ordena bien). Default Day si rango invalido.
        if (c >= 0 && c < (int)in_types.size() && in_types[c] == ColumnType::Date) {
            std::string lo, hi;
            column_min_max(in_cells, in_rows, in_cols, c, lo, hi);
            DateGranularity g = auto_date_granularity(lo, hi);
            stg.breakouts.emplace_back(compose_breakout(col, g));
        } else {
            stg.breakouts.emplace_back(col);
        }
        ImGui::CloseCurrentPopup();
    }
    ImGui::EndPopup();
@@ -1441,8 +1695,17 @@ void drill_into(State& st, int from_stage,
        if (prev_input_headers[i] == col_name) { ci = (int)i; break; }
    }
    if (ci < 0) return;
-    st.stages[target].filters.push_back(make_drill_filter(ci, value));
+
-    st.active_stage = target;
+    // Fase 10: graba step en drill_back, limpia forward (rama nueva).
    DrillStep step;
    step.target_stage      = target;
    step.filter_pos        = (int)st.stages[target].filters.size();
    step.prev_active_stage = st.active_stage;
    step.added             = make_drill_filter(ci, value);
    apply_drill_step(st, step);
    auto& U = ui();
    U.drill_back.push_back(step);
    U.drill_forward.clear();
 }
 } // anon namespace
@@ -1659,7 +1922,7 @@ void render(const char* id,
            draw_joins_chips(st, *joinables, mh);
        }
-        draw_filter_chips(act, eff_headers.data(), eff_cols);
+        draw_filter_chips(act, eff_headers.data(), eff_cols, eff_types);
        draw_add_filter_popup(act, eff_headers.data(), eff_cols, eff_types);
        draw_edit_filter_popup(act, eff_headers.data(), eff_cols, eff_types);
@@ -2290,12 +2553,13 @@ void render(const char* id,
        if (chrome_visible) {
        ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(8, 2));
-        draw_filter_chips(act, ih_ptrs.data(), in_cols_n);
+        draw_filter_chips(act, ih_ptrs.data(), in_cols_n, input_types_active);
        draw_add_filter_popup(act, ih_ptrs.data(), in_cols_n, input_types_active);
        draw_edit_filter_popup(act, ih_ptrs.data(), in_cols_n, input_types_active);
-        draw_breakout_chips(act, ih_ptrs.data(), in_cols_n);
+        draw_breakout_chips(act, ih_ptrs.data(), in_cols_n, input_types_active);
-        draw_add_breakout_popup(act, ih_ptrs.data(), in_cols_n, input_types_active);
+        draw_add_breakout_popup(act, ih_ptrs.data(), in_cols_n, input_types_active,
                                 cur_cells, cur_rows);
        draw_edit_breakout_popup(act, ih_ptrs.data(), in_cols_n);
        draw_aggregation_chips(act, ih_ptrs.data(), in_cols_n);
@@ -2524,7 +2788,22 @@ void render(const char* id,
                    so_local.cells.push_back(cur_cells[i]);
                so_ptr = &so_local;
            }
-            viz::render(*so_ptr, st.display, st.viz_config, ImVec2(-1, -1));
+            int clicked_row = -1;
            viz::render(*so_ptr, st.display, st.viz_config, ImVec2(-1, -1), &clicked_row);
            // Fase 10: click sobre chart -> drill al stage previo usando
            // breakout col[0] como filtro Op::Eq sobre cells[clicked_row].
            if (clicked_row >= 0 && active > 0 &&
                so_ptr->cols > 0 && clicked_row < so_ptr->rows) {
                int n_brk = (int)st.stages[active].breakouts.size();
                if (n_brk > 0) {
                    const char* v = so_ptr->cells[clicked_row * so_ptr->cols + 0];
                    std::string col_clean;
                    parse_breakout_granularity(so_ptr->headers[0], col_clean);
                    drill_into(st, active, col_clean,
                                v ? std::string(v) : "",
                                input_headers_active);
                }
            }
            goto stage_n_table_end;
        }
@@ -2613,12 +2892,10 @@ void render(const char* id,
                    ImGui::PushID(r * cur_cols_n + c);
                    ImGui::Selectable(cell ? cell : "");
                    if (ImGui::IsItemHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Right)) {
-                        // Drill-down solo si c es col de breakout (c < n_brk).
+                        U.pending_col   = c;
-                        if (c < n_brk) {
+                        U.pending_value = cell ? cell : "";
-                            U.pending_col   = c;
+                        U.inspect_row   = r;
-                            U.pending_value = cell ? cell : "";
+                        ImGui::OpenPopup("##drill_popup");
                            ImGui::OpenPopup("##drill_popup");
                        }
                    }
                    if (ImGui::BeginPopup("##drill_popup")) {
                        if (c < n_brk) {
@@ -2631,6 +2908,12 @@ void render(const char* id,
                                           input_headers_active);
                                ImGui::CloseCurrentPopup();
                            }
                            ImGui::Separator();
                        }
                        if (ImGui::MenuItem("Inspect row...")) {
                            U.inspect_row  = r;
                            U.inspect_open = true;
                            ImGui::CloseCurrentPopup();
                        }
                        ImGui::EndPopup();
                    }
@@ -2642,6 +2925,11 @@ void render(const char* id,
        }
        stage_n_table_end:;
        // Row inspector modal (fase 10). Activado via right-click "Inspect row..."
        // sobre celdas del table del stage activo. `cur_cells` ya es row-major.
        draw_row_inspector_modal(st, active, cur_cells, cur_rows, cur_cols_n,
                                  cur_headers, cur_types, input_headers_active);
        // Render extras (stage>0 path)
        if (!st.extra_panels.empty() && cur_cols_n > 0) {
            StageOutput so_local;
@@ -2958,6 +3246,118 @@ void render(const char* id,
        ImGui::EndPopup();
    }
    // Ask AI modal (fase 11 — issue 0080).
    if (U.ask_open) ImGui::OpenPopup("Ask AI");
    ImGui::SetNextWindowSize(ImVec2(820, 560), ImGuiCond_Appearing);
    if (ImGui::BeginPopupModal("Ask AI", &U.ask_open,
                                ImGuiWindowFlags_NoSavedSettings)) {
        ImGui::TextDisabled("Ask en lenguaje natural. Default TQL. SQL solo si DuckDB linkado.");
        const char* modes[] = {"TQL", "SQL (DuckDB)"};
 #ifndef FN_TQL_DUCKDB
        // SQL mode disabled visually pero el toggle existe (informativo)
        if (U.ask_mode == 1) U.ask_mode = 0;
 #endif
        ImGui::Combo("Output##askmode", &U.ask_mode, modes, IM_ARRAYSIZE(modes));
 #ifndef FN_TQL_DUCKDB
        if (U.ask_mode == 1) {
            ImGui::TextColored(ImVec4(1, 0.5f, 0.3f, 1),
                "SQL mode requires FN_TQL_DUCKDB=1 build flag.");
        }
 #endif
        ImGui::InputTextMultiline("##ask_q", U.ask_question, sizeof(U.ask_question),
                                   ImVec2(-1, 80));
        ImGui::BeginDisabled(U.ask_busy);
        if (ImGui::Button("Send")) {
            U.ask_busy = true;
            U.ask_status = "Sending...";
            U.ask_error.clear();
            U.ask_response_code.clear();
            U.ask_response_raw.clear();
            // Build AskInput desde el state actual.
            llm_anthropic::AskInput in;
            in.question = U.ask_question;
            in.tql_current = U.ask_current_tql;
            in.col_names = U.active_headers;
            in.col_types = U.active_types;
            in.mode = (U.ask_mode == 1)
                ? llm_anthropic::OutputMode::SQL
                : llm_anthropic::OutputMode::TQL;
            // Llamada blocking (UI freeze breve durante red).
            auto r = llm_anthropic::ask(in);
            U.ask_busy = false;
            if (!r.error.empty()) {
                U.ask_error = r.error;
                U.ask_status = "Error";
            } else {
                U.ask_response_raw = r.raw;
                U.ask_response_code = r.code;
                U.ask_status = "Got response.";
                // Llenar edit buffer
                std::snprintf(U.ask_edit_buf, sizeof(U.ask_edit_buf),
                              "%s", r.code.c_str());
            }
        }
        ImGui::EndDisabled();
        ImGui::SameLine();
        if (!U.ask_status.empty()) {
            ImGui::TextDisabled("%s", U.ask_status.c_str());
        }
        if (!U.ask_error.empty()) {
            ImGui::TextColored(ImVec4(1, 0.4f, 0.4f, 1), "%s", U.ask_error.c_str());
        }
        ImGui::Separator();
        ImGui::Columns(2, "ask_cols", true);
        ImGui::TextUnformatted("Current");
        ImGui::InputTextMultiline("##ask_cur",
            const_cast<char*>(U.ask_current_tql.c_str()),
            U.ask_current_tql.size() + 1,
            ImVec2(-1, 240),
            ImGuiInputTextFlags_ReadOnly);
        ImGui::NextColumn();
        ImGui::TextUnformatted("Proposed (editable before apply)");
        ImGui::InputTextMultiline("##ask_new", U.ask_edit_buf, sizeof(U.ask_edit_buf),
                                   ImVec2(-1, 240));
        ImGui::Columns(1);
        bool can_apply = !U.ask_busy && U.ask_edit_buf[0] != '\0';
        ImGui::BeginDisabled(!can_apply);
        if (ImGui::Button("Apply")) {
            std::string err;
            if (U.ask_mode == 0) {
                // TQL apply
                bool ok = tql::apply(U.ask_edit_buf, st,
                                      U.active_headers,
                                      U.active_types,
                                      nullptr, 0,
                                      (int)U.active_headers.size(),
                                      &err);
                if (ok) {
                    U.ask_status = "Applied OK.";
                    U.ask_open = false;
                } else {
                    U.ask_error = "tql::apply error: " + err;
                    U.ask_status = "Apply failed.";
                }
            } else {
                // SQL apply: requires DuckDB adapter (no v1).
                U.ask_status = "SQL execute requires FN_TQL_DUCKDB build flag.";
            }
        }
        ImGui::EndDisabled();
        ImGui::SameLine();
        if (ImGui::Button("Reject")) {
            U.ask_response_code.clear();
            U.ask_edit_buf[0] = '\0';
        }
        ImGui::SameLine();
        if (ImGui::Button("Close")) {
            U.ask_open = false;
        }
        ImGui::EndPopup();
    }
    if (U.open_cell_popup) { ImGui::OpenPopup("##cell_op"); U.open_cell_popup = false; }
    if (ImGui::BeginPopup("##cell_op")) {
        ColumnType t = (U.pending_col >= 0 && U.pending_col < eff_cols)
@@ -567,6 +567,69 @@ Filter make_drill_filter(int col_idx, const std::string& value) {
    return f;
 }
 bool apply_drill_step(State& st, const DrillStep& step) {
    if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false;
    Stage& s = st.stages[step.target_stage];
    int pos = step.filter_pos;
    if (pos < 0 || pos > (int)s.filters.size()) return false;
    s.filters.insert(s.filters.begin() + pos, step.added);
    st.active_stage = step.target_stage;
    return true;
 }
 bool drill_up(State& st) {
    if (st.stages.empty()) return false;
    if (st.active_stage <= 0) return false;
    st.active_stage -= 1;
    return true;
 }
 std::string row_to_tsv(const char* const* cells, int rows, int cols,
                        int row_idx, const std::vector<std::string>& headers) {
    if (row_idx < 0 || row_idx >= rows || cols <= 0) return "";
    std::string out;
    for (int c = 0; c < cols; ++c) {
        if (c > 0) out += '\t';
        if (c < (int)headers.size()) out += headers[c];
    }
    out += "\r\n";
    for (int c = 0; c < cols; ++c) {
        if (c > 0) out += '\t';
        const char* v = cells[row_idx * cols + c];
        if (v) out += v;
    }
    out += "\r\n";
    return out;
 }
 std::vector<Filter> build_filters_from_row(const char* const* cells, int rows,
                                            int cols, int row_idx) {
    std::vector<Filter> out;
    if (row_idx < 0 || row_idx >= rows || cols <= 0) return out;
    for (int c = 0; c < cols; ++c) {
        const char* v = cells[row_idx * cols + c];
        if (!v || !*v) continue;
        Filter f;
        f.col = c;
        f.op  = Op::Eq;
        f.value = v;
        out.push_back(f);
    }
    return out;
 }
 bool undo_drill_step(State& st, const DrillStep& step) {
    if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false;
    Stage& s = st.stages[step.target_stage];
    int pos = step.filter_pos;
    if (pos < 0 || pos >= (int)s.filters.size()) return false;
    s.filters.erase(s.filters.begin() + pos);
    if (step.prev_active_stage >= 0 && step.prev_active_stage < (int)st.stages.size()) {
        st.active_stage = step.prev_active_stage;
    }
    return true;
 }
 std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
                               const std::vector<Filter>& filters)
 {
@@ -696,19 +759,57 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
    }
    // Grouped: agrupa visible por valores de breakout, calcula aggregations.
-    std::vector<int> break_cols(stage.breakouts.size());
+    // Breakouts pueden llevar sufijo `:granularity` para cols Date (fase 10).
-    for (size_t i = 0; i < stage.breakouts.size(); ++i) {
+    int nbreaks = (int)stage.breakouts.size();
-        break_cols[i] = find_col(in_headers, stage.breakouts[i]);
+    std::vector<int> break_cols(nbreaks);
    std::vector<DateGranularity> break_grans(nbreaks);
    bool any_trunc = false;
    for (int i = 0; i < nbreaks; ++i) {
        std::string col_name;
        break_grans[i] = parse_breakout_granularity(stage.breakouts[i], col_name);
        if (break_grans[i] != DateGranularity::None) any_trunc = true;
        break_cols[i] = find_col(in_headers, col_name);
    }
    // Pre-truncate solo cuando hay granularity activa. Strings persistidos en
    // out.cell_backing para que los punteros sobrevivan al return de la funcion.
    // Reservamos upfront para que push_back no invalide punteros anteriores.
    // Tamaño = trunc cells + aggregation cells (peor caso n_groups <= in_rows).
    out.cell_backing.reserve(
        (size_t)in_rows * (size_t)nbreaks +
        (size_t)in_rows * stage.aggregations.size() + 16);
    std::vector<const char*> trunc_ptrs;
    if (any_trunc) {
        trunc_ptrs.assign((size_t)in_rows * (size_t)nbreaks, nullptr);
        for (int r = 0; r < in_rows; ++r) {
            for (int i = 0; i < nbreaks; ++i) {
                if (break_grans[i] == DateGranularity::None) continue;
                int bc = break_cols[i];
                if (bc < 0) continue;
                const char* v = in_cells[r * in_cols + bc];
                out.cell_backing.emplace_back(
                    truncate_date(v ? v : "", break_grans[i]));
                trunc_ptrs[(size_t)r * nbreaks + i] = out.cell_backing.back().c_str();
            }
        }
    }
    auto cell_for = [&](int r, int i) -> const char* {
        int bc = break_cols[i];
        if (bc < 0) return "";
        if (break_grans[i] != DateGranularity::None) {
            return trunc_ptrs[(size_t)r * nbreaks + i];
        }
        const char* v = in_cells[r * in_cols + bc];
        return v ? v : "";
    };
    auto make_key = [&](int r) -> std::string {
        std::string k;
-        for (size_t i = 0; i < break_cols.size(); ++i) {
+        for (int i = 0; i < nbreaks; ++i) {
            if (i > 0) k += '\x1f'; // separador unit-separator (no aparece en datos)
-            int bc = break_cols[i];
+            k += cell_for(r, i);
            if (bc < 0) continue;
            const char* v = in_cells[r * in_cols + bc];
            k += (v ? v : "");
        }
        return k;
    };
@@ -727,10 +828,9 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
            key_to_group.emplace(k, gi);
            group_keys.push_back(k);
            group_rows.emplace_back();
-            std::vector<const char*> bv(break_cols.size(), "");
+            std::vector<const char*> bv((size_t)nbreaks, "");
-            for (size_t i = 0; i < break_cols.size(); ++i) {
+            for (int i = 0; i < nbreaks; ++i) {
-                int bc = break_cols[i];
+                bv[i] = cell_for(r, i);
                bv[i] = (bc >= 0) ? in_cells[r * in_cols + bc] : "";
            }
            group_breakvals.push_back(std::move(bv));
        } else gi = it->second;
@@ -742,11 +842,17 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
    out.cols = out_cols;
    out.headers.reserve(out_cols);
    out.types.reserve(out_cols);
-    for (size_t i = 0; i < stage.breakouts.size(); ++i) {
+    for (int i = 0; i < nbreaks; ++i) {
        out.headers.push_back(stage.breakouts[i]);
        int bc = break_cols[i];
-        out.types.push_back((bc >= 0 && bc < (int)in_types.size())
+        // Si hay granularity activa, el output es String (formato ymd o similar),
-                            ? in_types[bc] : ColumnType::String);
+        // no la fecha original.
        ColumnType ot = ColumnType::String;
        if (break_grans[i] == DateGranularity::None
            && bc >= 0 && bc < (int)in_types.size()) {
            ot = in_types[bc];
        }
        out.types.push_back(ot);
    }
    for (const auto& a : stage.aggregations) {
        out.headers.push_back(aggregation_alias(a));
@@ -1102,4 +1208,288 @@ StageOutput join_tables(const char* const* left_cells, int left_rows, int left_c
    return out;
 }
 // ----------------------------------------------------------------------------
 // Fase 10: drill extendido — granularity + presets.
 // ----------------------------------------------------------------------------
 const char* date_granularity_token(DateGranularity g) {
    switch (g) {
        case DateGranularity::Year:  return "year";
        case DateGranularity::Month: return "month";
        case DateGranularity::Week:  return "week";
        case DateGranularity::Day:   return "day";
        case DateGranularity::Hour:  return "hour";
        default: return "";
    }
 }
 DateGranularity date_granularity_from_token(const char* s) {
    if (!s) return DateGranularity::None;
    std::string t(s);
    if (t == "year")  return DateGranularity::Year;
    if (t == "month") return DateGranularity::Month;
    if (t == "week")  return DateGranularity::Week;
    if (t == "day")   return DateGranularity::Day;
    if (t == "hour")  return DateGranularity::Hour;
    return DateGranularity::None;
 }
 DateGranularity parse_breakout_granularity(const std::string& breakout,
                                           std::string& col_out) {
    auto pos = breakout.rfind(':');
    if (pos == std::string::npos) {
        col_out = breakout;
        return DateGranularity::None;
    }
    std::string suffix = breakout.substr(pos + 1);
    DateGranularity g = date_granularity_from_token(suffix.c_str());
    if (g == DateGranularity::None) {
        col_out = breakout;
        return DateGranularity::None;
    }
    col_out = breakout.substr(0, pos);
    return g;
 }
 std::string compose_breakout(const std::string& col, DateGranularity g) {
    if (g == DateGranularity::None) return col;
    return col + ":" + date_granularity_token(g);
 }
 int nearest_index_1d(double target, const double* xs, int n) {
    if (n <= 0 || !xs) return -1;
    int best = -1;
    double best_d = 0.0;
    for (int i = 0; i < n; ++i) {
        double v = xs[i];
        if (std::isnan(v)) continue;
        double d = std::fabs(v - target);
        if (best < 0 || d < best_d) { best = i; best_d = d; }
    }
    return best;
 }
 int nearest_index_2d(double tx, double ty,
                      const double* xs, const double* ys, int n) {
    if (n <= 0 || !xs || !ys) return -1;
    int best = -1;
    double best_d = 0.0;
    for (int i = 0; i < n; ++i) {
        double x = xs[i], y = ys[i];
        if (std::isnan(x) || std::isnan(y)) continue;
        double dx = x - tx, dy = y - ty;
        double d = dx*dx + dy*dy;
        if (best < 0 || d < best_d) { best = i; best_d = d; }
    }
    return best;
 }
 double pie_angle(double cx, double cy, double mx, double my) {
    // ImPlot pie: 0 = top, sentido horario. atan2 estandar: 0 = +X (right), CCW.
    // Conversion: ImPlot angle = atan2(dx, -dy) y normalizar a [0, 2*PI).
    double dx = mx - cx;
    double dy = my - cy;
    double a = std::atan2(dx, -dy); // 0 cuando (dx=0, dy<0) = top
    const double two_pi = 6.283185307179586;
    if (a < 0) a += two_pi;
    return a;
 }
 int pie_slice_at_angle(double angle, const double* sums, int n) {
    if (n <= 0 || !sums) return -1;
    double total = 0.0;
    for (int i = 0; i < n; ++i) {
        if (sums[i] < 0) return -1;
        total += sums[i];
    }
    if (total <= 0.0) return -1;
    const double two_pi = 6.283185307179586;
    if (angle < 0 || angle >= two_pi) return -1;
    double cum = 0.0;
    for (int i = 0; i < n; ++i) {
        cum += (sums[i] / total) * two_pi;
        if (angle < cum) return i;
    }
    return n - 1; // edge case rounding
 }
 void heatmap_cell_at(double px, double py, int rows, int cols,
                      int& row_out, int& col_out) {
    row_out = -1;
    col_out = -1;
    if (rows <= 0 || cols <= 0) return;
    if (px < 0.0 || px >= (double)cols) return;
    if (py < 0.0 || py >= (double)rows) return;
    col_out = (int)px;
    // ImPlot heatmap pinta row 0 arriba; plot Y suele invertirse. Caller
    // normaliza si necesita. Aqui devolvemos row = floor(py) en coord plot.
    row_out = (int)py;
 }
 void column_min_max(const char* const* cells, int rows, int cols, int col_idx,
                    std::string& min_out, std::string& max_out) {
    min_out.clear();
    max_out.clear();
    if (col_idx < 0 || col_idx >= cols) return;
    bool first = true;
    for (int r = 0; r < rows; ++r) {
        const char* v = cells[r * cols + col_idx];
        if (!v || !*v) continue;
        std::string s(v);
        if (first) {
            min_out = s;
            max_out = s;
            first = false;
        } else {
            if (s < min_out) min_out = s;
            if (s > max_out) max_out = s;
        }
    }
 }
 namespace {
 // Parse ISO "YYYY-MM-DD..." -> (y, m, d). True si los 3 primeros campos OK.
 bool parse_ymd(const std::string& s, int& y, int& m, int& d) {
    if (s.size() < 10) return false;
    for (int i : {0,1,2,3,5,6,8,9}) {
        if (s[(size_t)i] < '0' || s[(size_t)i] > '9') return false;
    }
    if (s[4] != '-' || s[7] != '-') return false;
    y = (s[0]-'0')*1000 + (s[1]-'0')*100 + (s[2]-'0')*10 + (s[3]-'0');
    m = (s[5]-'0')*10 + (s[6]-'0');
    d = (s[8]-'0')*10 + (s[9]-'0');
    if (m < 1 || m > 12 || d < 1 || d > 31) return false;
    return true;
 }
 // Dias desde 0001-01-01 (proleptic Gregorian).
 long ymd_to_days(int y, int m, int d) {
    if (m <= 2) { y -= 1; m += 12; }
    long era = (y >= 0 ? y : y - 399) / 400;
    unsigned yoe = (unsigned)(y - era * 400);
    unsigned doy = (unsigned)((153 * (m - 3) + 2) / 5 + d - 1);
    unsigned doe = yoe * 365 + yoe/4 - yoe/100 + doy;
    return era * 146097 + (long)doe;
 }
 void days_to_ymd(long days, int& y, int& m, int& d) {
    long era = (days >= 0 ? days : days - 146096) / 146097;
    unsigned doe = (unsigned)(days - era * 146097);
    unsigned yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365;
    int yr = (int)yoe + (int)era * 400;
    unsigned doy = doe - (365*yoe + yoe/4 - yoe/100);
    unsigned mp  = (5*doy + 2)/153;
    unsigned day = doy - (153*mp + 2)/5 + 1;
    unsigned mon = mp < 10 ? mp + 3 : mp - 9;
    if (mon <= 2) yr += 1;
    y = yr; m = (int)mon; d = (int)day;
 }
 } // anon
 std::string truncate_date(const std::string& date, DateGranularity g) {
    if (g == DateGranularity::None) return date;
    int y, m, d;
    if (!parse_ymd(date, y, m, d)) return date;
    char buf[32];
    switch (g) {
        case DateGranularity::Year:
            std::snprintf(buf, sizeof(buf), "%04d", y);
            return buf;
        case DateGranularity::Month:
            std::snprintf(buf, sizeof(buf), "%04d-%02d", y, m);
            return buf;
        case DateGranularity::Day:
            std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", y, m, d);
            return buf;
        case DateGranularity::Hour: {
            int hh = 0;
            if (date.size() >= 13 && date[10] == 'T'
                && date[11] >= '0' && date[11] <= '9'
                && date[12] >= '0' && date[12] <= '9') {
                hh = (date[11]-'0')*10 + (date[12]-'0');
                if (hh < 0 || hh > 23) hh = 0;
            }
            std::snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d", y, m, d, hh);
            return buf;
        }
        case DateGranularity::Week: {
            // Hinnant ymd_to_days: day 0 == 0000-03-01 (Wednesday).
            //   days%7: 0=Wed, 1=Thu, 2=Fri, 3=Sat, 4=Sun, 5=Mon, 6=Tue.
            // Monday offset: (mod - 5 + 7) % 7.
            long days = ymd_to_days(y, m, d);
            int mod = (int)(((days % 7) + 7) % 7);
            int rem = ((mod - 5) % 7 + 7) % 7;
            long monday = days - rem;
            int yy, mm, dd;
            days_to_ymd(monday, yy, mm, dd);
            std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd);
            return buf;
        }
        default: return date;
    }
 }
 DateGranularity auto_date_granularity(const std::string& min_ymd,
                                      const std::string& max_ymd) {
    int y1,m1,d1, y2,m2,d2;
    if (!parse_ymd(min_ymd, y1,m1,d1)) return DateGranularity::Day;
    if (!parse_ymd(max_ymd, y2,m2,d2)) return DateGranularity::Day;
    long span = ymd_to_days(y2,m2,d2) - ymd_to_days(y1,m1,d1);
    if (span < 0) span = -span;
    if (span > 730) return DateGranularity::Year;   // >2 anios
    if (span > 60)  return DateGranularity::Month;
    if (span > 14)  return DateGranularity::Week;
    return DateGranularity::Day;
 }
 const char* filter_preset_label(FilterPreset p) {
    switch (p) {
        case FilterPreset::Last7d:       return "Last 7 days";
        case FilterPreset::Last30d:      return "Last 30 days";
        case FilterPreset::Last90d:      return "Last 90 days";
        case FilterPreset::ExcludeNulls: return "Exclude nulls";
        case FilterPreset::NonZero:      return "Non-zero only";
    }
    return "?";
 }
 std::vector<Filter> build_preset_filters(FilterPreset preset, int col,
                                         const std::string& today_ymd) {
    std::vector<Filter> out;
    auto last_n = [&](int n) {
        int y, m, d;
        if (!parse_ymd(today_ymd, y, m, d)) return;
        long days = ymd_to_days(y, m, d) - n;
        int yy, mm, dd;
        days_to_ymd(days, yy, mm, dd);
        char buf[16];
        std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd);
        Filter f;
        f.col = col;
        f.op = Op::Gte;
        f.value = buf;
        out.push_back(f);
    };
    switch (preset) {
        case FilterPreset::Last7d:  last_n(7);  break;
        case FilterPreset::Last30d: last_n(30); break;
        case FilterPreset::Last90d: last_n(90); break;
        case FilterPreset::ExcludeNulls: {
            Filter f; f.col = col; f.op = Op::Neq; f.value = "";
            out.push_back(f);
            break;
        }
        case FilterPreset::NonZero: {
            Filter f1; f1.col = col; f1.op = Op::Neq; f1.value = "";
            Filter f2; f2.col = col; f2.op = Op::Neq; f2.value = "0";
            out.push_back(f1);
            out.push_back(f2);
            break;
        }
    }
    return out;
 }
 } // namespace data_table
@@ -1,26 +1,20 @@
 // Logica pura del playground data_table. Sin ImGui — testable headless.
-// Cuando se promueva al registry, esto sera la base de data_table_cpp_viz.
+// TIPOS promovidos al registry (issue 0081). Este header solo declara
 // funciones; los types vienen de cpp/functions/core/data_table_types.h.
 #pragma once
 #include "core/data_table_types.h"
 #include <string>
 #include <utility>
 #include <vector>
 namespace data_table {
-enum class Op {
+// ----------------------------------------------------------------------------
-    Eq, Neq, Gt, Gte, Lt, Lte,
+// Helpers para Op y ColumnType.
-    Contains, NotContains, StartsWith, EndsWith
+// ----------------------------------------------------------------------------
 };
 const char* op_label(Op o);
-bool op_is_string_only(Op o);
+bool        op_is_string_only(Op o);
 // ----------------------------------------------------------------------------
 // Column types - declarado por caller con fallback a auto-detect.
 // ----------------------------------------------------------------------------
 enum class ColumnType {
    Auto, String, Int, Float, Bool, Date, Json
 };
 const char* column_type_name(ColumnType t);
 const char* column_type_icon(ColumnType t); // UTF-8 Tabler icon
@@ -36,63 +30,11 @@ ColumnType auto_detect_type(const char* const* cells, int rows, int cols,
 ColumnType effective_type(ColumnType declared,
                          const char* const* cells, int rows, int cols, int col);
 // Derived column: inmutable. Dos modos:
 //   1) Retipo puro: source_col >= 0, formula == "". Cells del origen.
 //   2) Formula:     source_col == -1, formula no vacia. Eval por Lua.
 struct DerivedColumn {
    int         source_col = -1;
    ColumnType  type       = ColumnType::String;
    std::string name;
    std::string formula;        // "" = retipado puro; resto = body Lua
    int         lua_id  = -1;   // referencia en lua_engine; -1 si no compilado
    std::string compile_error;
 };
 // Filter movido aqui (antes era despues de State) porque TQL Stage lo necesita.
 struct Filter {
    int         col;
    Op          op;
    std::string value;
 };
 struct ColorRule {
    int          col;
    std::string  equals;
    unsigned int color;
 };
 // ----------------------------------------------------------------------------
-// TQL (Table Query Language) — stage model. Ver docs/TQL.md.
+// Aggregation helpers.
 // ----------------------------------------------------------------------------
 enum class AggFn {
    Count, Sum, Avg, Min, Max, Distinct, Stddev,
    Median, P25, P75, P90, P99, Percentile
 };
 const char* agg_fn_name(AggFn f);
 struct Aggregation {
    AggFn       fn = AggFn::Count;
    std::string col;         // ignorado para Count
    double      arg = 0.0;   // para Percentile (0..1)
    std::string alias;       // vacio -> auto-generado via aggregation_alias()
 };
 struct SortClause {
    std::string col;
    bool        desc = false;
 };
 // Stage: layer de TQL. Stage 0 = Raw (sin breakouts/aggregations).
 // Stage 1+ pueden agrupar. Cada stage consume output del anterior.
 struct Stage {
    std::vector<Filter>          filters;
    std::vector<DerivedColumn>   derived;       // expressions de este stage
    std::vector<std::string>     breakouts;     // col names del INPUT de este stage
    std::vector<Aggregation>     aggregations;
    std::vector<SortClause>      sorts;
 };
 // Pure: alias por defecto cuando agg.alias esta vacio.
 //   count          -> "count"
 //   distinct col   -> "distinct_<col>"
@@ -101,224 +43,125 @@ struct Stage {
 std::string aggregation_alias(const Aggregation& a);
 // Pure: tipo del output de la aggregation.
 //   count, distinct   -> Int
 //   sum, avg, stddev,
 //   median, p*, percentile -> Float
 //   min, max          -> mismo tipo que la col origen
 ColumnType  aggregation_type(const Aggregation& a,
                              const std::vector<std::string>& in_headers,
                              const std::vector<ColumnType>&  in_types);
-// Output de compute_stage. Posee `cell_backing` (strings nuevos para
+// ----------------------------------------------------------------------------
-// resultados agregados) y `cells` (punteros row-major a backing o a
+// Compute pipeline.
-// `in_cells` original para passthrough).
+// ----------------------------------------------------------------------------
 struct StageOutput {
    std::vector<std::string>  cell_backing;
    std::vector<const char*>  cells;
    int                       rows = 0;
    int                       cols = 0;
    std::vector<std::string>  headers;
    std::vector<ColumnType>   types;
 };
 // Pure: ejecuta un Stage sobre los cells de entrada. Aplica filter -> (group+agg|passthrough) -> sort.
 StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
                          const std::vector<std::string>& in_headers,
                          const std::vector<ColumnType>&  in_types,
                          const Stage& stage);
-// Pure: aplica filtros usando headers para resolver f.col (que ahora es
+// Pure: aplica filtros usando headers para resolver f.col.
 // indice en el array de in_headers, no del dataset original). Devuelve
 // indices de filas que pasan.
 std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
                               const std::vector<Filter>& filters);
 // Pure: helper para drill-down. Devuelve un Filter Op::Eq sobre col_idx con
-// el value indicado. col_idx es indice en los headers del INPUT del stage
+// el value indicado.
 // previo (donde se va a aplicar el filtro).
 Filter make_drill_filter(int col_idx, const std::string& value);
 // ----------------------------------------------------------------------------
-// ViewMode: tipo de visualizacion a renderizar sobre el output del stage activo.
+// ViewMode helpers.
 // "Table" siempre disponible. Resto requiere ciertos tipos de columnas.
 // ----------------------------------------------------------------------------
-enum class ViewMode {
+const char* view_mode_token(ViewMode m);
-    Table,
+const char* view_mode_label(ViewMode m);
    // Bars
    Bar,           // horizontal bars: 1 cat + 1 num
    Column,        // vertical bars: 1 cat + 1 num
    GroupedBar,    // 1 cat + N num (side-by-side)
    StackedBar,    // 1 cat + N num (stacked)
    // Lines / area
    Line,          // X + 1..N Y series
    Area,          // shaded to y=0
    Stairs,        // step plot
    // Points
    Scatter,       // X + Y
    Bubble,        // X + Y + size
    // Distribution
    Histogram,     // 1 num
    Histogram2D,   // 2 num
    Heatmap,       // matrix from breakouts
    BoxPlot,       // 1 cat + 1 num (min/p25/p50/p75/max per group)
    // Stems / signals
    Stem,
    ErrorBars,
    // Composition
    Pie,
    Donut,
    Funnel,        // ordered descending bars
    Waterfall,     // running sum
    // Single values
    KPI,           // big text + label
    KPIGrid,       // all aggregations as cards
    // Specialized
    Candlestick,   // OHLC: time + open + high + low + close
    Radar,         // multi-axis (1 cat + N num)
 };
 const char* view_mode_token(ViewMode m);          // "table", "bar", ...
 const char* view_mode_label(ViewMode m);          // "Table", "Bar (horizontal)", ...
 ViewMode    view_mode_from_token(const char* s);
 int         view_mode_min_cols(ViewMode m);
 bool        view_mode_needs_numeric(ViewMode m);
 bool        view_mode_needs_category(ViewMode m);
 // Requiere stage agrupado (breakout+aggregation). Si user esta en stage 0 con
 // uno de estos, conviene auto-promote a stage 1.
 bool        view_mode_needs_aggregation(ViewMode m);
-// Lista completa de modos para el selector UI (orden de display).
+// Lista completa de modos para el selector UI.
 const ViewMode* all_view_modes(int* n_out);
 // ----------------------------------------------------------------------------
 // Joins (MBQL-style). Ver issue 0078.
 // ----------------------------------------------------------------------------
 enum class JoinStrategy { Left, Inner, Right, Full };
 const char*  join_strategy_token(JoinStrategy s);
 JoinStrategy join_strategy_from_token(const char* s);
 const char*  join_strategy_label(JoinStrategy s);
 // Tabla extra pasada al render() para joins. Owner externo (caller).
 struct TableInput {
    std::string                 name;       // identificador estable (matchea Join.source)
    std::vector<std::string>    headers;
    std::vector<ColumnType>     types;
    const char* const*          cells = nullptr;  // row-major, headers.size() cols x rows filas
    int                         rows  = 0;
    int                         cols  = 0;
 };
 // Join clause: une la tabla actual con `source` por las parejas `on`,
 // prefijando las cols del derecho con `alias.`.
 struct Join {
    std::string                                          alias;
    std::string                                          source;
    std::vector<std::pair<std::string, std::string>>     on;        // {left_col, right_col}
    JoinStrategy                                         strategy = JoinStrategy::Left;
    std::vector<std::string>                             fields;    // vacio = all del derecho
 };
 // Pure: resuelve indice del main entre `tables` segun `main_source`.
 // Vacio -> 0. Nombre desconocido -> 0. tables vacio -> -1.
 int resolve_main_idx(const std::vector<TableInput>& tables, const std::string& main_source);
-// Pure: aplica un join sobre dos tablas. Resultado: StageOutput con
+// Pure: aplica un join sobre dos tablas.
 // `headers` = left + `<alias>.<right_col>` (filtrado por fields si no vacio).
 StageOutput join_tables(const char* const* left_cells, int left_rows, int left_cols,
                        const std::vector<std::string>& left_headers,
                        const std::vector<ColumnType>&  left_types,
                        const TableInput& right,
                        const Join& jn);
-// ViewConfig: overrides manuales de auto-detect para la vista activa.
+// ----------------------------------------------------------------------------
-// Campos vacios -> auto. Si col name no existe en output, viz cae a auto.
+// Drill apply/undo (fase 10).
-struct ViewConfig {
+// ----------------------------------------------------------------------------
-    std::string                 x_col;        // single: scatter, line, hist2d
+bool apply_drill_step(State& st, const DrillStep& step);
-    std::vector<std::string>    y_cols;       // 1..N: line/area/bar/etc
+bool undo_drill_step(State& st, const DrillStep& step);
    std::string                 size_col;     // bubble
    std::string                 cat_col;      // bar/pie/funnel/box override
    unsigned int                primary_color = 0;     // 0 = ImPlot auto
    int                         hist_bins     = 0;     // 0 = Sturges
    float                       pie_radius    = 0.0f;  // 0 = default
    bool                        show_legend   = true;
    bool                        show_markers  = false; // line/area markers
    bool                        locked        = false; // disable pan/zoom
    mutable bool                fit_request   = false; // consumed by viz::render
 };
-// VizPanel: viz adicional sobre el mismo StageOutput. State.display + viz_config
+// Pure (fase 10): drill-up. Decrementa active_stage si > 0.
-// es el panel 0 (siempre visible); extra_panels son los aniadidos por el user.
+bool drill_up(State& st);
 struct VizPanel {
    ViewMode    display = ViewMode::Bar;
    ViewConfig  config;
    // Memoria del ultimo non-Table display para toggle Table<->View.
    mutable ViewMode last_non_table = ViewMode::Bar;
 };
-// State: stage pipeline + viz globales.
+// Pure (fase 10): serializa una fila a TSV.
-//
+std::string row_to_tsv(const char* const* cells, int rows, int cols,
-// `stages` siempre tiene tamaño >= 1 (auto-init en compute_visible_rows / render
+                        int row_idx, const std::vector<std::string>& headers);
 // si esta vacio: se crea stages[0] vacio). Stage 0 es Raw (filters + derived +
 // sorts; SIN breakouts/aggregations). Stages 1+ pueden agrupar.
 //
 // `active_stage` = indice del stage cuyo output se renderiza.
 // `col_visible/col_order/color_rules` aplican al output del stage activo.
 struct State {
    std::vector<Stage>          stages;
    int                         active_stage = 0;
    ViewMode                    display = ViewMode::Table;
    ViewConfig                  viz_config;
    std::vector<VizPanel>       extra_panels;
    std::vector<Join>           joins;     // aplicado antes de stages[0]
    std::string                 main_source;  // name de TableInput a usar como main; vacio -> tables[0]
-    std::vector<ColorRule>      color_rules;
+// Pure (fase 10): construye filters Op::Eq desde una fila.
-    std::vector<bool>           col_visible;       // size = effective_cols del stage activo
+std::vector<Filter> build_filters_from_row(const char* const* cells, int rows,
-    std::vector<int>            col_order;         // permutacion [0..effective_cols)
+                                            int cols, int row_idx);
-    // --- Compat helpers: shortcuts a stages[0] (Raw) ---
+// ----------------------------------------------------------------------------
-    // Util tras refactor para tests / accesos puntuales. Garantizan stages[0]
+// Date granularity helpers (fase 10).
-    // existe (lo crean vacio si no).
+// ----------------------------------------------------------------------------
-    Stage&       raw();
+const char*      date_granularity_token(DateGranularity g);
-    const Stage& raw() const;
+DateGranularity  date_granularity_from_token(const char* s);
    Stage&       active();
    const Stage& active_const() const;
    void         ensure_stage0();
 };
-// Parse "1.23" -> 1.23, true. False si la celda no es numero completo.
+DateGranularity parse_breakout_granularity(const std::string& breakout,
                                           std::string& col_out);
 std::string compose_breakout(const std::string& col, DateGranularity g);
 void column_min_max(const char* const* cells, int rows, int cols, int col_idx,
                    std::string& min_out, std::string& max_out);
 // Hit-tests para click-to-drill sobre charts (fase 10).
 int nearest_index_1d(double target, const double* xs, int n);
 int nearest_index_2d(double tx, double ty,
                      const double* xs, const double* ys, int n);
 double pie_angle(double cx, double cy, double mx, double my);
 int pie_slice_at_angle(double angle, const double* sums, int n);
 void heatmap_cell_at(double px, double py, int rows, int cols,
                      int& row_out, int& col_out);
 // Date trunc + auto + presets.
 std::string truncate_date(const std::string& date, DateGranularity g);
 DateGranularity auto_date_granularity(const std::string& min_ymd,
                                      const std::string& max_ymd);
 const char* filter_preset_label(FilterPreset p);
 std::vector<Filter> build_preset_filters(FilterPreset preset, int col,
                                         const std::string& today_ymd);
 // ----------------------------------------------------------------------------
 // Misc helpers.
 // ----------------------------------------------------------------------------
 bool parse_number(const char* s, double& out);
 // Compara dos celdas con operador. Numerico si ambas parseables; lexical si no.
 bool compare(const char* a, const char* b, Op op);
 // Aplica filtros y ordena. Devuelve indices de filas visibles.
 std::vector<int> compute_visible_rows(const char* const* cells,
                                      int rows, int cols,
                                      const State& st);
 // Pure: muta col_order de st para colocar `src` en la posicion (en orden visual)
 // donde estaba `dst`. No-op si src == dst o cualquiera fuera del array.
 void reorder_column(State& st, int src, int dst);
 // Pure: dado un buffer y posicion de cursor, busca el `[` abierto sin cerrar
 // mas reciente. Devuelve su indice (o -1 si ninguno). Rellena `filter_text`
 // con los caracteres entre `[` y cursor.
 // Para autocomplete de formulas: cuando el usuario teclea `[` el ImGui callback
 // detecta esto y muestra un popup con cols disponibles.
 int find_open_bracket(const char* buf, int len, int cursor, std::string& filter_text);
 // Pure: reemplaza src[start..cursor) por "[name]". Devuelve nuevo string y
 // actualiza `new_cursor` a la posicion despues del `]`.
 std::string insert_column_ref(const std::string& src, int start, int cursor,
                              const std::string& name, int& new_cursor);
 // CSV: escapa una celda segun RFC 4180 (wrap en " si contiene , " o newline).
 std::string csv_escape(const char* s);
 // Construye TSV de un rect de seleccion. Headers SIEMPRE incluidos.
 // view_row_lo/hi: indices en visible_rows.
 // view_col_lo/hi: indices en col_order. Cols ocultas se omiten.
 std::string build_tsv(const char* const* cells, int rows, int cols,
                      const char* const* headers,
                      const std::vector<int>&  col_order,
@@ -327,19 +170,21 @@ std::string build_tsv(const char* const* cells, int rows, int cols,
                      int view_row_lo, int view_row_hi,
                      int view_col_lo, int view_col_hi);
 // Construye CSV (full visible view). Headers incluidos, cells escapados.
 std::string build_csv(const char* const* cells, int rows, int cols,
                      const char* const* headers,
                      const std::vector<int>&  col_order,
                      const std::vector<bool>& col_visible,
                      const std::vector<int>&  visible_rows);
 // ----------------------------------------------------------------------------
 // Column statistics (no movido todavia al registry).
 // ----------------------------------------------------------------------------
 struct ColStats {
-    int    total          = 0;     // filas escaneadas
+    int    total          = 0;
-    int    empty_count    = 0;     // cells == "" o null
+    int    empty_count    = 0;
-    int    unique_count   = 0;     // distintas (cap configurable)
+    int    unique_count   = 0;
-    bool   unique_capped  = false; // true si se alcanzo el cap
+    bool   unique_capped  = false;
-    bool   numeric        = false; // true si todas las cells no-vacias parsean como numero
+    bool   numeric        = false;
    int    numeric_count  = 0;
    double min            = 0;
    double max            = 0;
@@ -348,16 +193,12 @@ struct ColStats {
    double p25            = 0;
    double p50            = 0;
    double p75            = 0;
-    std::vector<float> hist;                                   // bins (HIST_BINS) si numeric
+    std::vector<float> hist;
-    std::vector<std::pair<std::string,int>> top_categories;    // top 8 por count desc
+    std::vector<std::pair<std::string,int>> top_categories;
 };
 constexpr int HIST_BINS = 24;
 // Pure: escanea una columna y devuelve estadisticas. `unique_cap` corta el
 // conteo de unicos si excede (para datasets de millones). 0 = sin cap.
 // Si `indices != nullptr` y `n_indices > 0`, recorre solo las filas indicadas
 // (uso tipico: stats sobre filas visibles post-filtro).
 ColStats compute_column_stats(const char* const* cells, int rows, int cols,
                              int col, int unique_cap = 100000,
                              const int* indices = nullptr, int n_indices = 0);
@@ -0,0 +1,295 @@
 // llm_anthropic.cpp — cliente Anthropic minimal via cURL popen.
 // Ver issue 0080.
 #include "llm_anthropic.h"
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <sstream>
 #include <string>
 namespace llm_anthropic {
 using namespace data_table;
 namespace {
 // JSON escape minimal.
 std::string json_escape(const std::string& s) {
    std::string o;
    o.reserve(s.size() + 8);
    for (char c : s) {
        switch (c) {
            case '"':  o += "\\\""; break;
            case '\\': o += "\\\\"; break;
            case '\n': o += "\\n";  break;
            case '\r': o += "\\r";  break;
            case '\t': o += "\\t";  break;
            case '\b': o += "\\b";  break;
            case '\f': o += "\\f";  break;
            default:
                if ((unsigned char)c < 0x20) {
                    char buf[8];
                    std::snprintf(buf, sizeof(buf), "\\u%04x", (int)(unsigned char)c);
                    o += buf;
                } else {
                    o += c;
                }
        }
    }
    return o;
 }
 const char* col_type_doc(ColumnType t) {
    switch (t) {
        case ColumnType::String: return "string";
        case ColumnType::Int:    return "int";
        case ColumnType::Float:  return "float";
        case ColumnType::Bool:   return "bool";
        case ColumnType::Date:   return "date";
        case ColumnType::Json:   return "json";
        case ColumnType::Auto:   return "auto";
    }
    return "?";
 }
 std::string build_schema_block(const AskInput& in) {
    std::ostringstream os;
    os << "Available columns (stage 0 input):\n";
    for (size_t i = 0; i < in.col_names.size(); ++i) {
        os << "  - " << in.col_names[i] << ": "
           << col_type_doc(i < in.col_types.size() ? in.col_types[i] : ColumnType::String)
           << "\n";
    }
    if (!in.joinable_names.empty()) {
        os << "Joinable tables (for join clause):\n";
        for (const auto& n : in.joinable_names) os << "  - " << n << "\n";
    }
    return os.str();
 }
 std::string build_system_prompt(OutputMode mode) {
    if (mode == OutputMode::TQL) {
        return
            "You are a TQL (Table Query Language) expert. Output ONLY a Lua code block. "
            "TQL is a Lua table with shape:\n"
            "  return { version=1, display=\"table\"|\"bar\"|\"line\"|...,\n"
            "    main_source=\"name\", joins={ {alias,source,on,strategy,fields},... },\n"
            "    stages={ {filter={{op,col,value},...}, breakout={...}, aggregation={...}, sort={...} },... },\n"
            "    columns={ name = {type=\"int|float|...\", formula=\"[col]+1\"},... }\n"
            "  }\n"
            "Stage 0 = Raw (filters + derived + sort, NO breakouts/aggs).\n"
            "Stage 1+ groups (breakouts + aggregations).\n"
            "Breakout granularity: append :year|:month|:week|:day|:hour to col name.\n"
            "Aggregation functions: count|sum|avg|min|max|distinct|stddev|median|p25|p75|p90|p99|percentile.\n"
            "Filter ops: '='|'!='|'<'|'<='|'>'|'>='|'contains'|'!contains'|'starts'|'ends'.\n"
            "Sort: {{dir, col}, ...} where dir = 'asc'|'desc'.\n"
            "Join strategies: 'left'|'inner'|'right'|'full'.\n"
            "Formulas use Lua expression syntax with [col] for column refs.\n"
            "Output format: ```lua\\n...\\n```";
    }
    return
        "You are a DuckDB SQL expert. Output ONLY a SQL code block compatible with DuckDB.\n"
        "Use CTEs to chain stages. Use date_trunc('month', col) for granularity.\n"
        "Use quantile_cont(col, p) for percentiles. Use ? for bound params.\n"
        "Joins: LEFT/INNER/RIGHT/FULL OUTER JOIN. String concat: ||. Aggregations: standard SQL.\n"
        "Output format: ```sql\\n...\\n```";
 }
 } // anon
 std::string build_request_body(const AskInput& in) {
    std::string system_msg = build_system_prompt(in.mode);
    std::string schema     = build_schema_block(in);
    std::ostringstream user_msg;
    user_msg << "Question: " << in.question << "\n\n"
             << schema << "\n";
    if (!in.tql_current.empty()) {
        user_msg << "Current TQL:\n```lua\n" << in.tql_current << "\n```\n";
    }
    std::string model = in.model.empty() ? "claude-sonnet-4-6" : in.model;
    std::ostringstream body;
    body << "{"
         << "\"model\":\""  << json_escape(model)        << "\","
         << "\"max_tokens\":" << in.max_tokens           << ","
         << "\"system\":\"" << json_escape(system_msg)   << "\","
         << "\"messages\":[{"
         <<   "\"role\":\"user\","
         <<   "\"content\":\"" << json_escape(user_msg.str()) << "\""
         << "}]"
         << "}";
    return body.str();
 }
 std::string extract_code_block(const std::string& raw, const std::string& lang) {
    // Buscar ```<lang> primero, sino ``` plain.
    std::string fence_lang = "```" + lang;
    auto pos = raw.find(fence_lang);
    size_t code_start = std::string::npos;
    if (pos != std::string::npos) {
        code_start = pos + fence_lang.size();
    } else {
        pos = raw.find("```");
        if (pos != std::string::npos) {
            code_start = pos + 3;
            // skip optional lang tag
            while (code_start < raw.size() && raw[code_start] != '\n' &&
                   raw[code_start] != '\r' && std::isalnum((unsigned char)raw[code_start])) {
                ++code_start;
            }
        }
    }
    if (code_start == std::string::npos) {
        // No fence — return raw stripped.
        size_t i = 0; while (i < raw.size() && std::isspace((unsigned char)raw[i])) ++i;
        size_t j = raw.size(); while (j > i && std::isspace((unsigned char)raw[j-1])) --j;
        return raw.substr(i, j - i);
    }
    // Skip newline tras fence.
    if (code_start < raw.size() && raw[code_start] == '\n') ++code_start;
    auto end = raw.find("```", code_start);
    if (end == std::string::npos) end = raw.size();
    std::string code = raw.substr(code_start, end - code_start);
    // Trim trailing newline.
    while (!code.empty() && (code.back() == '\n' || code.back() == '\r')) code.pop_back();
    return code;
 }
 std::string parse_response_text(const std::string& json) {
    // Buscar pattern: "text":"..."
    // Simple: primer occurrence de \"text\":\" tras \"type\":\"text\"
    auto t = json.find("\"text\"");
    while (t != std::string::npos) {
        // Skip "text"
        size_t i = t + 6;
        // Skip whitespace y :
        while (i < json.size() && (json[i] == ' ' || json[i] == ':' || json[i] == '\t')) ++i;
        if (i >= json.size() || json[i] != '"') {
            t = json.find("\"text\"", t + 1);
            continue;
        }
        ++i;
        std::string out;
        while (i < json.size() && json[i] != '"') {
            if (json[i] == '\\' && i + 1 < json.size()) {
                char esc = json[i+1];
                if      (esc == 'n')  out += '\n';
                else if (esc == 't')  out += '\t';
                else if (esc == 'r')  out += '\r';
                else if (esc == '"')  out += '"';
                else if (esc == '\\') out += '\\';
                else if (esc == '/')  out += '/';
                else if (esc == 'u' && i + 5 < json.size()) {
                    // basic ascii \uXXXX
                    int code = 0;
                    for (int k = 0; k < 4; ++k) {
                        char c = json[i + 2 + k];
                        int v = (c >= '0' && c <= '9') ? c - '0'
                               : (c >= 'a' && c <= 'f') ? c - 'a' + 10
                               : (c >= 'A' && c <= 'F') ? c - 'A' + 10 : 0;
                        code = code * 16 + v;
                    }
                    if (code < 128) out += (char)code;
                    else out += '?';
                    i += 5;
                } else {
                    out += esc;
                }
                i += 2;
            } else {
                out += json[i++];
            }
        }
        return out;
    }
    return "";
 }
 namespace {
 // Lee API key segun prioridad: param > env FN_LLM_API_KEY > pass anthropic/api-key.
 std::string resolve_api_key(const std::string& provided) {
    if (!provided.empty()) return provided;
    const char* env = std::getenv("FN_LLM_API_KEY");
    if (env && *env) return env;
    // pass anthropic/api-key | head -n1
    FILE* p = popen("pass anthropic/api-key 2>/dev/null | head -n1", "r");
    if (!p) return "";
    std::string out;
    char buf[256];
    while (fgets(buf, sizeof(buf), p)) out += buf;
    pclose(p);
    while (!out.empty() && (out.back() == '\n' || out.back() == '\r')) out.pop_back();
    return out;
 }
 } // anon
 std::string call_api(const std::string& body, const std::string& api_key,
                      std::string& error_out) {
    error_out.clear();
    // Test injection
    const char* mock = std::getenv("FN_LLM_MOCK_RESPONSE");
    if (mock && *mock) return mock;
    std::string key = resolve_api_key(api_key);
    if (key.empty()) {
        error_out = "no API key (set FN_LLM_API_KEY env, pass param, or `pass anthropic/api-key`)";
        return "";
    }
    const char* endpoint_env = std::getenv("FN_LLM_ENDPOINT");
    std::string endpoint = endpoint_env && *endpoint_env
        ? endpoint_env
        : "https://api.anthropic.com/v1/messages";
    // popen "w+" no portable. Write body a tmp file y leer respuesta de curl
    // por redireccion. Portable Unix/Mingw.
    std::string tmp_in  = std::tmpnam(nullptr);
    std::string tmp_out = std::tmpnam(nullptr);
    {
        FILE* f = std::fopen(tmp_in.c_str(), "w");
        if (!f) { error_out = "tmp file write fail"; return ""; }
        std::fwrite(body.data(), 1, body.size(), f);
        std::fclose(f);
    }
    std::string cmd2 = "curl -sS -X POST "
        "-H \"content-type: application/json\" "
        "-H \"anthropic-version: 2023-06-01\" "
        "-H \"x-api-key: " + key + "\" "
        "--data-binary @" + tmp_in + " " + endpoint
        + " > " + tmp_out + " 2>&1";
    int rc = std::system(cmd2.c_str());
    std::string resp;
    {
        FILE* f = std::fopen(tmp_out.c_str(), "r");
        if (f) {
            char buf[4096];
            size_t n;
            while ((n = std::fread(buf, 1, sizeof(buf), f)) > 0) resp.append(buf, n);
            std::fclose(f);
        }
    }
    std::remove(tmp_in.c_str());
    std::remove(tmp_out.c_str());
    if (rc != 0) {
        error_out = "curl exit " + std::to_string(rc) + ": " + resp;
        return "";
    }
    return resp;
 }
 AskResult ask(const AskInput& in, const std::string& api_key) {
    AskResult r;
    std::string body = build_request_body(in);
    std::string raw_json = call_api(body, api_key, r.error);
    if (!r.error.empty()) return r;
    r.raw = parse_response_text(raw_json);
    std::string lang = (in.mode == OutputMode::TQL) ? "lua" : "sql";
    r.code = extract_code_block(r.raw, lang);
    return r;
 }
 } // namespace llm_anthropic
@@ -0,0 +1,58 @@
 // llm_anthropic: cliente HTTP minimal a Anthropic Claude API.
 // Sin deps externas (cURL via popen).
 // Ver issue 0080.
 #pragma once
 #include "data_table_logic.h"
 #include "tql_to_sql.h"
 #include <string>
 #include <vector>
 namespace llm_anthropic {
 enum class OutputMode { TQL, SQL };
 struct AskInput {
    std::string                                question;       // pregunta NL
    std::string                                tql_current;    // TQL actual (emitido)
    std::vector<std::string>                   col_names;      // schema input
    std::vector<data_table::ColumnType>        col_types;
    std::vector<std::string>                   joinable_names; // tables disponibles para join
    OutputMode                                 mode = OutputMode::TQL;
    std::string                                model;          // empty -> default
    int                                        max_tokens = 8192;
 };
 struct AskResult {
    std::string code;     // bloque ```lua o ```sql extraido (sin fences)
    std::string raw;      // texto completo de la respuesta
    std::string error;    // non-empty si fallo
    int         tokens_in  = 0;
    int         tokens_out = 0;
 };
 // Pure: construye el system prompt y user message JSON-escapado.
 // Devuelve el JSON body completo POST al endpoint /v1/messages.
 std::string build_request_body(const AskInput& in);
 // Pure: extrae primer ```<lang>\n ... \n``` bloque de `raw`. lang = "lua"|"sql".
 // Si no encuentra fence, retorna raw stripped.
 std::string extract_code_block(const std::string& raw, const std::string& lang);
 // Pure: extrae texto del JSON de respuesta Anthropic.
 // Busca `"content":[{"type":"text","text":"..."}]` y devuelve el text.
 std::string parse_response_text(const std::string& json_body);
 // Impure: lanza cURL via popen, posts `body` al endpoint Anthropic /v1/messages,
 // retorna response body (JSON crudo). API key leida de:
 //   1. parametro `api_key` si non-empty
 //   2. env FN_LLM_API_KEY
 //   3. `pass anthropic/api-key | head -n1`
 // Si FN_LLM_MOCK_RESPONSE env set, retorna su valor (test injection).
 std::string call_api(const std::string& body, const std::string& api_key,
                      std::string& error_out);
 // Orchestrator: build prompt + POST + parse. Convenience wrapper.
 AskResult ask(const AskInput& in, const std::string& api_key = "");
 } // namespace llm_anthropic
@@ -7,9 +7,12 @@
 // Exit 0 = todos los checks pasan, 1 = falla.
 #include "data_table_logic.h"
 #include "llm_anthropic.h"
 #include "lua_engine.h"
 #include "tql.h"
 #include "tql_to_sql.h"
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -2051,6 +2054,782 @@ return {
        check(join_strategy_from_token("nope")  == JoinStrategy::Left,  "phase9: parse fallback left");
    }
    // === phase10: drill extendido ===
    {
        // truncate_date — granularities sobre 2026-05-12 (martes).
        std::string d = "2026-05-12";
        check(truncate_date(d, DateGranularity::Year)  == "2026",       "phase10: trunc year");
        check(truncate_date(d, DateGranularity::Month) == "2026-05",    "phase10: trunc month");
        check(truncate_date(d, DateGranularity::Day)   == "2026-05-12", "phase10: trunc day");
        check(truncate_date(d, DateGranularity::Week)  == "2026-05-11", "phase10: trunc week (Mon)");
        check(truncate_date("2026-05-12T14:33:01", DateGranularity::Hour) == "2026-05-12T14",
              "phase10: trunc hour");
        check(truncate_date("not-a-date", DateGranularity::Month) == "not-a-date",
              "phase10: trunc passthrough invalido");
        check(truncate_date(d, DateGranularity::None) == d, "phase10: trunc None == identidad");
    }
    {
        // auto_date_granularity
        check(auto_date_granularity("2024-01-01", "2026-05-12") == DateGranularity::Year,
              "phase10: auto year >2y");
        check(auto_date_granularity("2026-01-01", "2026-05-12") == DateGranularity::Month,
              "phase10: auto month >60d");
        check(auto_date_granularity("2026-04-15", "2026-05-12") == DateGranularity::Week,
              "phase10: auto week >14d");
        check(auto_date_granularity("2026-05-05", "2026-05-12") == DateGranularity::Day,
              "phase10: auto day <=14d");
        check(auto_date_granularity("bad", "2026-05-12") == DateGranularity::Day,
              "phase10: auto fallback day");
    }
    {
        // parse_breakout_granularity
        std::string col;
        check(parse_breakout_granularity("ts:month", col) == DateGranularity::Month,
              "phase10: parse breakout month");
        check(col == "ts", "phase10: parse breakout col stripped");
        check(parse_breakout_granularity("ts", col) == DateGranularity::None,
              "phase10: parse breakout sin sufijo None");
        check(col == "ts", "phase10: col sin sufijo intacto");
        check(parse_breakout_granularity("ts:wat", col) == DateGranularity::None,
              "phase10: sufijo desconocido None");
        check(col == "ts:wat", "phase10: col preserva sufijo desconocido");
    }
    {
        // compose_breakout
        check(compose_breakout("ts", DateGranularity::None)  == "ts",        "phase10: compose None");
        check(compose_breakout("ts", DateGranularity::Month) == "ts:month",  "phase10: compose month");
        check(compose_breakout("ts", DateGranularity::Year)  == "ts:year",   "phase10: compose year");
        // round-trip parse(compose)
        std::string col;
        auto g = parse_breakout_granularity(compose_breakout("foo", DateGranularity::Week), col);
        check(g == DateGranularity::Week && col == "foo", "phase10: compose+parse round-trip");
    }
    {
        // column_min_max
        const char* cells[] = {
            "2026-03-01",
            "2026-01-15",
            "",
            "2026-05-12",
            "2026-02-22",
        };
        std::string lo, hi;
        column_min_max(cells, 5, 1, 0, lo, hi);
        check(lo == "2026-01-15" && hi == "2026-05-12", "phase10: column_min_max ISO ordena lexical");
        const char* empty_cells[] = {"", "", ""};
        column_min_max(empty_cells, 3, 1, 0, lo, hi);
        check(lo.empty() && hi.empty(), "phase10: column_min_max sin datos -> vacio");
        column_min_max(cells, 5, 1, 5, lo, hi);  // col fuera de rango
        check(lo.empty() && hi.empty(), "phase10: column_min_max col fuera de rango -> vacio");
    }
    {
        // tokens round-trip granularity
        check(date_granularity_from_token("year")  == DateGranularity::Year,  "phase10: token year");
        check(date_granularity_from_token("month") == DateGranularity::Month, "phase10: token month");
        check(date_granularity_from_token("week")  == DateGranularity::Week,  "phase10: token week");
        check(date_granularity_from_token("day")   == DateGranularity::Day,   "phase10: token day");
        check(date_granularity_from_token("hour")  == DateGranularity::Hour,  "phase10: token hour");
        check(date_granularity_from_token("nope")  == DateGranularity::None,  "phase10: token fallback None");
        check(std::string(date_granularity_token(DateGranularity::Month)) == "month",
              "phase10: emit month");
        check(std::string(date_granularity_token(DateGranularity::None)) == "",
              "phase10: emit None empty");
    }
    {
        // build_preset_filters
        auto f7 = build_preset_filters(FilterPreset::Last7d, 2, "2026-05-12");
        check(f7.size() == 1, "phase10: Last7d -> 1 filter");
        check(f7[0].col == 2 && f7[0].op == Op::Gte && f7[0].value == "2026-05-05",
              "phase10: Last7d -> Gte 2026-05-05");
        auto f30 = build_preset_filters(FilterPreset::Last30d, 2, "2026-05-12");
        check(f30[0].value == "2026-04-12", "phase10: Last30d -> 2026-04-12");
        auto f90 = build_preset_filters(FilterPreset::Last90d, 2, "2026-05-12");
        check(f90[0].value == "2026-02-11", "phase10: Last90d -> 2026-02-11");
        auto fn0 = build_preset_filters(FilterPreset::ExcludeNulls, 3, "");
        check(fn0.size() == 1 && fn0[0].op == Op::Neq && fn0[0].value == "",
              "phase10: ExcludeNulls -> Neq ''");
        auto fnz = build_preset_filters(FilterPreset::NonZero, 4, "");
        check(fnz.size() == 2, "phase10: NonZero -> 2 filters");
        check(fnz[0].op == Op::Neq && fnz[0].value == "" &&
              fnz[1].op == Op::Neq && fnz[1].value == "0",
              "phase10: NonZero -> Neq '' AND Neq '0'");
        auto fbad = build_preset_filters(FilterPreset::Last7d, 2, "bad-date");
        check(fbad.empty(), "phase10: Last7d con today invalido -> empty");
    }
    {
        // TQL round-trip: breakout con sufijo :granularity.
        State st0;
        st0.stages.resize(2);
        st0.stages[1].breakouts = {"ts:month"};
        Aggregation a; a.fn = AggFn::Count; a.alias = "n";
        st0.stages[1].aggregations.push_back(a);
        std::vector<std::string> hdrs = {"ts", "amount"};
        std::vector<ColumnType>  tys  = {ColumnType::Date, ColumnType::Float};
        int eff = 2;
        std::string text = tql::emit(st0, hdrs, tys);
        check(text.find("\"ts:month\"") != std::string::npos,
              "phase10 TQL: emit breakout granularity sufijo");
        std::string err;
        State st1;
        bool ok = tql::apply(text, st1, hdrs, tys, nullptr, 2, eff, &err);
        check(ok, "phase10 TQL: apply round-trip ok");
        check(st1.stages.size() >= 2 && st1.stages[1].breakouts.size() == 1 &&
              st1.stages[1].breakouts[0] == "ts:month",
              "phase10 TQL: breakout granularity preservada");
    }
    {
        // compute_stage aplica truncado de fecha cuando hay :granularity.
        const char* cells[] = {
            "2026-01-15", "10",
            "2026-01-22", "20",
            "2026-02-03", "30",
            "2026-03-11", "40",
        };
        std::vector<std::string> hdrs = {"ts", "amount"};
        std::vector<ColumnType>  tys  = {ColumnType::Date, ColumnType::Float};
        Stage s1;
        s1.breakouts = {"ts:month"};
        Aggregation ag; ag.fn = AggFn::Count; ag.alias = "n";
        s1.aggregations.push_back(ag);
        auto out = compute_stage(cells, 4, 2, hdrs, tys, s1);
        check(out.rows == 3, "phase10: trunc month -> 3 grupos (Jan/Feb/Mar)");
        check(out.headers[0] == "ts:month", "phase10: header preserva sufijo");
        // Verifica que algun valor de breakout es "2026-01"
        bool found_jan = false;
        for (int r = 0; r < out.rows; ++r) {
            if (std::string(out.cells[r * out.cols + 0]) == "2026-01") found_jan = true;
        }
        check(found_jan, "phase10: trunc value '2026-01' presente");
    }
    // === phase10 hit-tests para click-to-drill ===
    {
        // nearest_index_1d
        double xs[] = {0, 1, 2, 3, 4};
        check(nearest_index_1d(0.0, xs, 5) == 0,    "phase10 hit: nearest_1d exact 0");
        check(nearest_index_1d(2.4, xs, 5) == 2,    "phase10 hit: nearest_1d 2.4 -> 2");
        check(nearest_index_1d(2.6, xs, 5) == 3,    "phase10 hit: nearest_1d 2.6 -> 3");
        check(nearest_index_1d(-1.0, xs, 5) == 0,   "phase10 hit: nearest_1d clamp left");
        check(nearest_index_1d(99.0, xs, 5) == 4,   "phase10 hit: nearest_1d clamp right");
        check(nearest_index_1d(0.0, nullptr, 0) == -1, "phase10 hit: nearest_1d empty -> -1");
    }
    {
        // nearest_index_2d
        double xs[] = {0, 10, 5, 5};
        double ys[] = {0, 0, 10, 5};
        check(nearest_index_2d(0.1, 0.1, xs, ys, 4) == 0, "phase10 hit: nearest_2d cerca de (0,0)");
        check(nearest_index_2d(9.9, 0.0, xs, ys, 4) == 1, "phase10 hit: nearest_2d cerca de (10,0)");
        check(nearest_index_2d(5.0, 4.9, xs, ys, 4) == 3, "phase10 hit: nearest_2d cerca de (5,5)");
        check(nearest_index_2d(0, 0, nullptr, nullptr, 0) == -1, "phase10 hit: nearest_2d empty -> -1");
    }
    {
        // pie_angle (convencion ImPlot: 0 = top, sentido horario)
        const double PI = 3.14159265358979323846;
        double a;
        a = pie_angle(0.5, 0.5, 0.5, 0.0); // top
        check(std::fabs(a - 0.0) < 1e-9, "phase10 hit: pie_angle top = 0");
        a = pie_angle(0.5, 0.5, 1.0, 0.5); // right -> PI/2
        check(std::fabs(a - PI/2) < 1e-9, "phase10 hit: pie_angle right = PI/2");
        a = pie_angle(0.5, 0.5, 0.5, 1.0); // bottom -> PI
        check(std::fabs(a - PI) < 1e-9, "phase10 hit: pie_angle bottom = PI");
        a = pie_angle(0.5, 0.5, 0.0, 0.5); // left -> 3*PI/2
        check(std::fabs(a - 3*PI/2) < 1e-9, "phase10 hit: pie_angle left = 3PI/2");
    }
    {
        // pie_slice_at_angle: 4 slices iguales -> cada uno cubre PI/2.
        double sums[] = {1.0, 1.0, 1.0, 1.0};
        const double PI = 3.14159265358979323846;
        check(pie_slice_at_angle(0.0,        sums, 4) == 0, "phase10 hit: slice 0 (top)");
        check(pie_slice_at_angle(PI/4,       sums, 4) == 0, "phase10 hit: slice 0 (mid)");
        check(pie_slice_at_angle(PI/2 + 0.1, sums, 4) == 1, "phase10 hit: slice 1");
        check(pie_slice_at_angle(PI + 0.1,   sums, 4) == 2, "phase10 hit: slice 2");
        check(pie_slice_at_angle(3*PI/2 + 0.1, sums, 4) == 3, "phase10 hit: slice 3");
        double zeros[] = {0.0, 0.0};
        check(pie_slice_at_angle(0.5, zeros, 2) == -1, "phase10 hit: total 0 -> -1");
        check(pie_slice_at_angle(0.0, nullptr, 0) == -1, "phase10 hit: empty -> -1");
        double neg[] = {1.0, -1.0};
        check(pie_slice_at_angle(0.5, neg, 2) == -1, "phase10 hit: neg sum -> -1");
    }
    {
        // heatmap_cell_at
        int rr, cc;
        heatmap_cell_at(1.5, 2.5, 4, 3, rr, cc);
        check(rr == 2 && cc == 1, "phase10 hit: heatmap (1.5,2.5) en 4x3 -> r2 c1");
        heatmap_cell_at(-1, 0, 4, 3, rr, cc);
        check(rr == -1 && cc == -1, "phase10 hit: heatmap fuera de rango");
        heatmap_cell_at(0, 0, 0, 0, rr, cc);
        check(rr == -1 && cc == -1, "phase10 hit: heatmap empty");
    }
    {
        // E2E click-to-drill: simular pipeline stage1 agrupado, click en row idx 2.
        State st;
        st.stages.resize(2);
        std::vector<std::string> hdrs = {"lang", "n"};
        std::vector<ColumnType>  tys  = {ColumnType::String, ColumnType::Int};
        st.stages[1].breakouts.push_back("lang");
        st.stages[1].aggregations.push_back({AggFn::Count});
        st.active_stage = 1;
        // Stage 1 output simulado (3 grupos).
        const char* g_cells[] = {
            "go",  "3",
            "py",  "2",
            "cpp", "1",
        };
        StageOutput so;
        so.cells.insert(so.cells.end(), g_cells, g_cells + 6);
        so.rows = 3;
        so.cols = 2;
        so.headers = {"lang", "count"};
        // Simular click en row idx 2 (cpp).
        int clicked_row = 2;
        int n_brk = (int)st.stages[1].breakouts.size();
        check(n_brk == 1, "phase10 e2e: 1 breakout");
        const char* v = so.cells[clicked_row * so.cols + 0];
        std::string col_clean;
        parse_breakout_granularity(so.headers[0], col_clean);
        check(col_clean == "lang", "phase10 e2e: col_clean stripped OK");
        st.stages[0].filters.push_back(make_drill_filter(0, v));
        st.active_stage = 0;
        check(st.active_stage == 0, "phase10 e2e: active retrocede a 0");
        check(st.stages[0].filters.size() == 1, "phase10 e2e: 1 filter anadido");
        check(st.stages[0].filters[0].col == 0 &&
              st.stages[0].filters[0].op == Op::Eq &&
              st.stages[0].filters[0].value == "cpp",
              "phase10 e2e: filter Op::Eq col=0 value=cpp");
    }
    // === phase10 drill history (apply/undo step) ===
    {
        State st;
        st.stages.resize(2);
        st.active_stage = 1;
        DrillStep step;
        step.target_stage      = 0;
        step.filter_pos        = 0;
        step.prev_active_stage = 1;
        step.added             = make_drill_filter(0, "go");
        check(apply_drill_step(st, step), "phase10 hist: apply ok");
        check(st.stages[0].filters.size() == 1, "phase10 hist: filter anadido");
        check(st.stages[0].filters[0].value == "go", "phase10 hist: value preservado");
        check(st.active_stage == 0, "phase10 hist: active = target");
        check(undo_drill_step(st, step), "phase10 hist: undo ok");
        check(st.stages[0].filters.empty(), "phase10 hist: filter eliminado");
        check(st.active_stage == 1, "phase10 hist: active restaurado");
        // Redo
        check(apply_drill_step(st, step), "phase10 hist: redo ok");
        check(st.stages[0].filters.size() == 1, "phase10 hist: redo filter de vuelta");
        check(st.active_stage == 0, "phase10 hist: redo active retorna");
        // Edge: target fuera de rango
        DrillStep bad;
        bad.target_stage = 99;
        check(!apply_drill_step(st, bad), "phase10 hist: apply fuera de rango -> false");
        check(!undo_drill_step(st, bad), "phase10 hist: undo fuera de rango -> false");
        // Edge: pos invalida
        DrillStep bad_pos = step;
        bad_pos.filter_pos = 99;
        check(!undo_drill_step(st, bad_pos), "phase10 hist: undo pos invalida -> false");
    }
    // === phase10 drill history: back/forward stack semantics simulado ===
    {
        State st;
        st.stages.resize(3);
        st.active_stage = 2;
        std::vector<DrillStep> back_stack;
        std::vector<DrillStep> fwd_stack;
        auto drill = [&](int from, int target, int pos, int col, const std::string& v) {
            DrillStep s;
            s.target_stage      = target;
            s.filter_pos        = pos;
            s.prev_active_stage = from;
            s.added             = make_drill_filter(col, v);
            apply_drill_step(st, s);
            back_stack.push_back(s);
            fwd_stack.clear();
        };
        drill(2, 1, 0, 0, "go");
        check(st.stages[1].filters.size() == 1, "phase10 hist seq: drill1 aplicado");
        drill(1, 0, 0, 1, "10");
        check(st.stages[0].filters.size() == 1, "phase10 hist seq: drill2 aplicado");
        check(back_stack.size() == 2, "phase10 hist seq: back stack 2");
        check(fwd_stack.empty(),       "phase10 hist seq: forward limpio");
        // Back x1
        DrillStep s = back_stack.back(); back_stack.pop_back();
        undo_drill_step(st, s);
        fwd_stack.push_back(s);
        check(st.stages[0].filters.empty(), "phase10 hist seq: back deshace drill2");
        check(st.active_stage == 1,         "phase10 hist seq: back restaura active=1");
        check(fwd_stack.size() == 1,        "phase10 hist seq: fwd stack 1");
        // Forward x1
        s = fwd_stack.back(); fwd_stack.pop_back();
        apply_drill_step(st, s);
        back_stack.push_back(s);
        check(st.stages[0].filters.size() == 1, "phase10 hist seq: forward reaplica");
        check(st.active_stage == 0,             "phase10 hist seq: forward active=0");
    }
    // === phase10 row inspector (row_to_tsv + build_filters_from_row) ===
    {
        const char* cells[] = {
            "go",  "10", "filter",
            "py",  "20", "sma",
            "go",  "30", "map",
        };
        std::vector<std::string> hdrs = {"lang", "n", "fn"};
        std::string tsv = row_to_tsv(cells, 3, 3, 1, hdrs);
        check(tsv == "lang\tn\tfn\r\npy\t20\tsma\r\n",
              "phase10 inspect: row_to_tsv layout");
        check(row_to_tsv(cells, 3, 3, -1, hdrs).empty(), "phase10 inspect: tsv neg row -> empty");
        check(row_to_tsv(cells, 3, 3, 5, hdrs).empty(),  "phase10 inspect: tsv row oob -> empty");
        check(row_to_tsv(cells, 3, 0, 0, hdrs).empty(),  "phase10 inspect: tsv cols=0 -> empty");
        auto fs = build_filters_from_row(cells, 3, 3, 0);
        check(fs.size() == 3, "phase10 inspect: 3 filters de row 0");
        check(fs[0].col == 0 && fs[0].op == Op::Eq && fs[0].value == "go",
              "phase10 inspect: filter[0] col=0 op=Eq value=go");
        check(fs[2].value == "filter", "phase10 inspect: filter[2] value=filter");
        // Row con celda vacia -> filter saltado
        const char* sparse[] = {"a", "", "c"};
        auto fs2 = build_filters_from_row(sparse, 1, 3, 0);
        check(fs2.size() == 2 && fs2[0].col == 0 && fs2[1].col == 2,
              "phase10 inspect: cells vacios salteados");
        check(build_filters_from_row(cells, 3, 3, -1).empty(),
              "phase10 inspect: build_filters row invalido -> empty");
    }
    // === phase10 drill-up ===
    {
        State st;
        st.stages.resize(3);
        st.active_stage = 2;
        check(drill_up(st), "phase10 up: 2->1 ok");
        check(st.active_stage == 1, "phase10 up: active=1");
        check(drill_up(st), "phase10 up: 1->0 ok");
        check(st.active_stage == 0, "phase10 up: active=0");
        check(!drill_up(st), "phase10 up: 0 -> false");
        check(st.active_stage == 0, "phase10 up: queda en 0");
        // Filters no se mueven
        State st2;
        st2.stages.resize(2);
        st2.active_stage = 1;
        st2.stages[1].filters.push_back({0, Op::Eq, "x"});
        drill_up(st2);
        check(st2.stages[0].filters.empty() && st2.stages[1].filters.size() == 1,
              "phase10 up: filters quedan en su stage");
        State empty_st;
        check(!drill_up(empty_st), "phase10 up: stages vacio -> false");
    }
    // === phase11: Lua subset validator + transpiler ===
    {
        std::string err;
        // Subset OK: literales + ops
        std::string e1 = tql_to_sql::transpile_expr("1 + 2", {}, err);
        check(err.empty() && e1.find("1 + 2") != std::string::npos,
              "phase11 lua: literal arith");
        std::string e2 = tql_to_sql::transpile_expr("[a] + [b] * 2", {}, err);
        check(err.empty() && e2.find("\"a\"") != std::string::npos &&
              e2.find("\"b\"") != std::string::npos,
              "phase11 lua: col refs + arith");
        std::string e3 = tql_to_sql::transpile_expr("[a] .. \"_\" .. [b]", {}, err);
        check(err.empty() && e3.find(" || ") != std::string::npos,
              "phase11 lua: concat -> ||");
        std::string e4 = tql_to_sql::transpile_expr(
            "if [n] > 10 then \"big\" else \"small\" end", {}, err);
        check(err.empty() && e4.find("CASE WHEN") != std::string::npos &&
              e4.find("THEN") != std::string::npos && e4.find("ELSE") != std::string::npos,
              "phase11 lua: if/then/else -> CASE");
        std::string e5 = tql_to_sql::transpile_expr("math.floor([x] / 100)", {}, err);
        check(err.empty() && e5.find("floor(") != std::string::npos,
              "phase11 lua: math.floor");
        std::string e6 = tql_to_sql::transpile_expr("string.upper([name])", {}, err);
        check(err.empty() && e6.find("upper(") != std::string::npos,
              "phase11 lua: string.upper");
        std::string e7 = tql_to_sql::transpile_expr("string.sub([s], 1, 3)", {}, err);
        check(err.empty() && e7.find("substring(") != std::string::npos,
              "phase11 lua: string.sub 3-arg");
        std::string e8 = tql_to_sql::transpile_expr("not ([x] == nil)", {}, err);
        check(err.empty() && e8.find("NOT") != std::string::npos && e8.find("NULL") != std::string::npos,
              "phase11 lua: not + nil");
        std::string e9 = tql_to_sql::transpile_expr("tonumber([n])", {}, err);
        check(err.empty() && e9.find("CAST(") != std::string::npos,
              "phase11 lua: tonumber -> CAST DOUBLE");
        // Fuera subset: 9 categorias rechazadas
        err.clear();
        check(tql_to_sql::transpile_expr("function() return 1 end", {}, err).empty()
              && err.find("closures") != std::string::npos,
              "phase11 lua: function closure rechazado");
        err.clear();
        check(tql_to_sql::transpile_expr("local x = 1", {}, err).empty()
              && err.find("local") != std::string::npos,
              "phase11 lua: local rechazado");
        err.clear();
        check(tql_to_sql::transpile_expr("for i=1,10 do end", {}, err).empty()
              && err.find("loops") != std::string::npos,
              "phase11 lua: for loop rechazado");
        err.clear();
        check(tql_to_sql::transpile_expr("while true do end", {}, err).empty()
              && err.find("loops") != std::string::npos,
              "phase11 lua: while loop rechazado");
        err.clear();
        check(tql_to_sql::transpile_expr("{1,2,3}", {}, err).empty()
              && err.find("table") != std::string::npos,
              "phase11 lua: table literal rechazado");
        err.clear();
        check(tql_to_sql::transpile_expr("io.read()", {}, err).empty()
              && err.find("io") != std::string::npos,
              "phase11 lua: io.* rechazado");
        err.clear();
        check(tql_to_sql::transpile_expr("string.gsub([s], \"a\", \"b\")", {}, err).empty()
              && err.find("whitelist") != std::string::npos,
              "phase11 lua: string.gsub no whitelisted");
        err.clear();
        check(tql_to_sql::transpile_expr("print([x])", {}, err).empty()
              && err.find("print") != std::string::npos,
              "phase11 lua: print rechazado");
        err.clear();
        check(tql_to_sql::transpile_expr("[a]; [b]", {}, err).empty()
              && err.find("multi-statement") != std::string::npos,
              "phase11 lua: ';' multi-stmt rechazado");
        // is_transpilable wrapper
        std::string werr;
        check(tql_to_sql::is_transpilable("[a] + 1", werr), "phase11 lua: is_transpilable OK");
        check(!tql_to_sql::is_transpilable("function() end", werr),
              "phase11 lua: is_transpilable false para closure");
    }
    // === phase11: TQL State -> SQL DuckDB emit ===
    {
        // Setup: 1 tabla "users" con cols lang,n.
        TableInput t;
        t.name = "users";
        t.headers = {"lang", "n"};
        t.types = {ColumnType::String, ColumnType::Int};
        // Cells no usado por emit (solo schema).
        std::vector<TableInput> tables = {t};
        // Caso 1: stage 0 simple (sin filters ni sort)
        {
            State st;
            st.stages.resize(1);
            auto e = tql_to_sql::emit_sql(st, tables);
            check(e.error.empty(), "phase11 sql: empty pipeline -> no error");
            check(e.sql.find("WITH t0") != std::string::npos &&
                  e.sql.find("FROM \"users\"") != std::string::npos &&
                  e.sql.find("SELECT * FROM t0") != std::string::npos,
                  "phase11 sql: stage0 SELECT * FROM users");
        }
        // Caso 2: stage 0 filter + sort
        {
            State st;
            st.stages.resize(1);
            st.stages[0].filters.push_back({0, Op::Eq, "go"});
            st.stages[0].filters.push_back({1, Op::Gt, "10"});
            st.stages[0].sorts.push_back({"n", true});
            auto e = tql_to_sql::emit_sql(st, tables);
            check(e.error.empty(), "phase11 sql: filter+sort OK");
            check(e.sql.find("WHERE") != std::string::npos &&
                  e.sql.find("\"lang\" = ?") != std::string::npos &&
                  e.sql.find("\"n\" > ?") != std::string::npos,
                  "phase11 sql: filter clauses");
            check(e.params.size() == 2 && e.params[0] == "go" && e.params[1] == "10",
                  "phase11 sql: params bound");
            check(e.sql.find("ORDER BY \"n\" DESC") != std::string::npos,
                  "phase11 sql: ORDER BY desc");
        }
        // Caso 3: stage 1 group + count
        {
            State st;
            st.stages.resize(2);
            st.stages[1].breakouts.push_back("lang");
            st.stages[1].aggregations.push_back({AggFn::Count});
            st.active_stage = 1;
            auto e = tql_to_sql::emit_sql(st, tables);
            check(e.error.empty(), "phase11 sql: group ok");
            check(e.sql.find("t1 AS") != std::string::npos &&
                  e.sql.find("COUNT(*)") != std::string::npos &&
                  e.sql.find("GROUP BY") != std::string::npos &&
                  e.sql.find("SELECT * FROM t1") != std::string::npos,
                  "phase11 sql: stage1 CTE + COUNT + GROUP BY");
        }
        // Caso 4: granularity :month -> date_trunc
        {
            State st;
            st.stages.resize(2);
            st.stages[1].breakouts.push_back("ts:month");
            st.stages[1].aggregations.push_back({AggFn::Sum, "n"});
            st.active_stage = 1;
            TableInput ts_t;
            ts_t.name = "events";
            ts_t.headers = {"ts", "n"};
            ts_t.types = {ColumnType::Date, ColumnType::Int};
            std::vector<TableInput> tt = {ts_t};
            auto e = tql_to_sql::emit_sql(st, tt);
            check(e.error.empty(), "phase11 sql: granularity ok");
            check(e.sql.find("date_trunc('month'") != std::string::npos &&
                  e.sql.find("SUM(\"n\")") != std::string::npos,
                  "phase11 sql: date_trunc + SUM");
        }
        // Caso 5: aggregations p25/median/p99
        {
            State st;
            st.stages.resize(2);
            st.stages[1].breakouts.push_back("lang");
            st.stages[1].aggregations.push_back({AggFn::Median, "n"});
            st.stages[1].aggregations.push_back({AggFn::P25, "n"});
            st.stages[1].aggregations.push_back({AggFn::P99, "n"});
            st.active_stage = 1;
            auto e = tql_to_sql::emit_sql(st, tables);
            check(e.error.empty(), "phase11 sql: percentiles ok");
            check(e.sql.find("quantile_cont(\"n\", 0.5)") != std::string::npos &&
                  e.sql.find("quantile_cont(\"n\", 0.25)") != std::string::npos &&
                  e.sql.find("quantile_cont(\"n\", 0.99)") != std::string::npos,
                  "phase11 sql: quantile_cont calls");
        }
        // Caso 6: joins 4 strategies
        {
            State st;
            st.stages.resize(1);
            Join jn;
            jn.alias = "o";
            jn.source = "orders";
            jn.on.push_back({"user_id", "user_id"});
            jn.strategy = JoinStrategy::Left;
            st.joins.push_back(jn);
            TableInput u, o;
            u.name = "users";
            u.headers = {"user_id", "name"};
            u.types = {ColumnType::String, ColumnType::String};
            o.name = "orders";
            o.headers = {"user_id", "amount"};
            o.types = {ColumnType::String, ColumnType::Int};
            std::vector<TableInput> tt = {u, o};
            auto e = tql_to_sql::emit_sql(st, tt);
            check(e.error.empty(), "phase11 sql: join ok");
            check(e.sql.find("LEFT JOIN \"orders\" AS \"o\"") != std::string::npos &&
                  e.sql.find("ON \"users\".\"user_id\" = \"o\".\"user_id\"") != std::string::npos,
                  "phase11 sql: LEFT JOIN ON syntax");
            // Inner
            st.joins[0].strategy = JoinStrategy::Inner;
            auto e2 = tql_to_sql::emit_sql(st, tt);
            check(e2.sql.find("INNER JOIN") != std::string::npos, "phase11 sql: INNER JOIN");
            // Right
            st.joins[0].strategy = JoinStrategy::Right;
            auto e3 = tql_to_sql::emit_sql(st, tt);
            check(e3.sql.find("RIGHT JOIN") != std::string::npos, "phase11 sql: RIGHT JOIN");
            // Full
            st.joins[0].strategy = JoinStrategy::Full;
            auto e4 = tql_to_sql::emit_sql(st, tt);
            check(e4.sql.find("FULL OUTER JOIN") != std::string::npos, "phase11 sql: FULL OUTER JOIN");
        }
        // Caso 7: derived col subset -> SQL expression
        {
            State st;
            st.stages.resize(1);
            DerivedColumn d;
            d.name = "size_kb";
            d.source_col = -1;
            d.formula = "[n] / 1024.0";
            d.type = ColumnType::Float;
            st.stages[0].derived.push_back(d);
            auto e = tql_to_sql::emit_sql(st, tables);
            check(e.error.empty(), "phase11 sql: derived subset ok");
            check(e.sql.find("\"n\" / 1024") != std::string::npos &&
                  e.sql.find("AS \"size_kb\"") != std::string::npos,
                  "phase11 sql: derived expression + alias");
        }
        // Caso 8: derived col FUERA subset -> warning + skip
        {
            State st;
            st.stages.resize(1);
            DerivedColumn d;
            d.name = "bad";
            d.source_col = -1;
            d.formula = "string.gsub([n], \"a\", \"b\")";
            d.type = ColumnType::String;
            st.stages[0].derived.push_back(d);
            auto e = tql_to_sql::emit_sql(st, tables);
            check(e.error.empty(), "phase11 sql: derived fuera subset NO bloquea emit");
            check(!e.warnings.empty() &&
                  e.warnings[0].find("out of SQL subset") != std::string::npos,
                  "phase11 sql: warning derived fuera subset");
            check(e.sql.find("\"bad\"") == std::string::npos,
                  "phase11 sql: derived skip cuando fuera subset");
        }
        // Caso 9: empty tables -> error
        {
            State st;
            st.stages.resize(1);
            std::vector<TableInput> empty;
            auto e = tql_to_sql::emit_sql(st, empty);
            check(!e.error.empty() && e.error.find("no input tables") != std::string::npos,
                  "phase11 sql: empty tables -> error");
        }
        // Caso 10: stage 0 con LIKE (Contains)
        {
            State st;
            st.stages.resize(1);
            st.stages[0].filters.push_back({0, Op::Contains, "go"});
            auto e = tql_to_sql::emit_sql(st, tables);
            check(e.error.empty(), "phase11 sql: LIKE Contains ok");
            check(e.sql.find("LIKE ?") != std::string::npos &&
                  e.params.size() == 1 && e.params[0] == "%go%",
                  "phase11 sql: Contains -> LIKE %go%");
        }
    }
    // === phase11: LLM client (mock, no red) ===
    {
        llm_anthropic::AskInput in;
        in.question = "show top 10 langs";
        in.tql_current = "return { stages = {} }";
        in.col_names = {"lang", "n"};
        in.col_types = {ColumnType::String, ColumnType::Int};
        in.mode = llm_anthropic::OutputMode::TQL;
        std::string body = llm_anthropic::build_request_body(in);
        check(body.find("\"model\":\"claude-sonnet-4-6\"") != std::string::npos,
              "phase11 llm: default model");
        check(body.find("\"max_tokens\":8192") != std::string::npos,
              "phase11 llm: max_tokens");
        check(body.find("\\\"system\\\"") == std::string::npos /* not double-escaped */,
              "phase11 llm: system not double-escaped");
        check(body.find("Available columns") != std::string::npos,
              "phase11 llm: schema block present");
        check(body.find("show top 10 langs") != std::string::npos,
              "phase11 llm: question present");
        check(body.find("TQL") != std::string::npos,
              "phase11 llm: system mentions TQL");
        in.mode = llm_anthropic::OutputMode::SQL;
        std::string body_sql = llm_anthropic::build_request_body(in);
        check(body_sql.find("DuckDB") != std::string::npos,
              "phase11 llm: SQL mode mentions DuckDB");
    }
    {
        // extract_code_block
        std::string raw1 = "Here you go:\n```lua\nreturn { x = 1 }\n```\nDone!";
        std::string code = llm_anthropic::extract_code_block(raw1, "lua");
        check(code == "return { x = 1 }", "phase11 llm: extract ```lua block");
        std::string raw2 = "Sure:\n```\nplain code\n```";
        std::string code2 = llm_anthropic::extract_code_block(raw2, "lua");
        check(code2 == "plain code", "phase11 llm: extract bare ```");
        std::string raw3 = "no fences here";
        std::string code3 = llm_anthropic::extract_code_block(raw3, "lua");
        check(code3 == "no fences here", "phase11 llm: no fence -> stripped");
        std::string raw4 = "```sql\nSELECT 1;\n```";
        std::string code4 = llm_anthropic::extract_code_block(raw4, "sql");
        check(code4 == "SELECT 1;", "phase11 llm: extract ```sql");
    }
    {
        // parse_response_text from JSON
        std::string j = "{\"id\":\"x\",\"content\":[{\"type\":\"text\",\"text\":\"hello\\nworld\"}],\"role\":\"assistant\"}";
        std::string t = llm_anthropic::parse_response_text(j);
        check(t == "hello\nworld", "phase11 llm: parse text content");
        std::string j2 = "{\"content\":[{\"type\":\"text\",\"text\":\"\\\"quoted\\\"\"}]}";
        std::string t2 = llm_anthropic::parse_response_text(j2);
        check(t2 == "\"quoted\"", "phase11 llm: parse quoted escape");
        std::string j3 = "{\"error\":\"foo\"}";
        std::string t3 = llm_anthropic::parse_response_text(j3);
        check(t3.empty(), "phase11 llm: no text -> empty");
    }
    {
        // Mock end-to-end via FN_LLM_MOCK_RESPONSE (portable Linux/Mingw via putenv).
        const char* mock_kv =
            "FN_LLM_MOCK_RESPONSE={\"content\":[{\"type\":\"text\",\"text\":\"```lua\\nreturn { mock = true }\\n```\"}]}";
        putenv((char*)mock_kv);
        llm_anthropic::AskInput in;
        in.question = "q";
        in.col_names = {"a"};
        in.col_types = {ColumnType::String};
        auto r = llm_anthropic::ask(in);
        check(r.error.empty(), "phase11 llm mock: no error");
        check(r.code == "return { mock = true }", "phase11 llm mock: code extracted");
        // Unset: putenv con "VAR=" deja vacio (suficiente para nuestro check `*mock`).
        putenv((char*)"FN_LLM_MOCK_RESPONSE=");
    }
    std::printf("\n=== %d passed, %d failed ===\n", passed, failed);
    return failed == 0 ? 0 : 1;
 }
@@ -652,7 +652,8 @@ bool apply(const std::string& lua_text, State& state,
            }
            lua_pop(L, 1);
-            // breakout (solo aplica stages >= 1, no-op silencioso si stage 0)
+            // breakout (solo aplica stages >= 1, no-op silencioso si stage 0).
            // Acepta sufijo ":granularity" para cols Date (fase 10).
            lua_getfield(L, -1, "breakout");
            if (lua_istable(L, -1)) {
                int n = (int)lua_rawlen(L, -1);
@@ -660,8 +661,10 @@ bool apply(const std::string& lua_text, State& state,
                    lua_rawgeti(L, -1, i);
                    if (lua_isstring(L, -1)) {
                        std::string bn = lua_tostring(L, -1);
-                        if (find_orig_col(cur_headers, bn) < 0) {
+                        std::string clean;
-                            warn("stage " + std::to_string(si - 1) + ": breakout col \"" + bn + "\" not in input headers");
+                        parse_breakout_granularity(bn, clean);
                        if (find_orig_col(cur_headers, clean) < 0) {
                            warn("stage " + std::to_string(si - 1) + ": breakout col \"" + clean + "\" not in input headers");
                        }
                        stg.breakouts.emplace_back(bn);
                    }
@@ -0,0 +1,862 @@
 // tql_to_sql.cpp — pure walker TQL -> SQL DuckDB + Lua subset transpiler.
 // Ver issue 0080. Sin DuckDB linkado.
 #include "tql_to_sql.h"
 #include <cctype>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <set>
 #include <sstream>
 #include <unordered_map>
 namespace tql_to_sql {
 using namespace data_table;
 // ============================================================================
 // Lua subset tokenizer + recursive-descent expression parser -> SQL string.
 // ============================================================================
 namespace {
 struct Tok {
    enum Kind {
        EndT, NumT, StrT, IdentT, ColT,
        // operators / keywords
        Plus, Minus, Star, Slash, Percent, ConcatT,
        Eq, Neq, Lt, Lte, Gt, Gte,
        AndT, OrT, NotT,
        IfT, ThenT, ElseT, EndKW,
        LParen, RParen, Comma, Dot,
        TrueT, FalseT, NilT,
    } kind = EndT;
    std::string text;  // raw token texto (para idents/numbers/strings)
 };
 // Categorias prohibidas: token literal -> mensaje.
 const std::unordered_map<std::string, const char*>& forbidden_keywords() {
    static const std::unordered_map<std::string, const char*> M = {
        {"function", "closures not allowed in SQL transpile subset"},
        {"local",    "local declarations not allowed"},
        {"for",      "loops not allowed"},
        {"while",    "loops not allowed"},
        {"repeat",   "loops not allowed"},
        {"do",       "block statements not allowed"},
        {"return",   "explicit return not allowed (formula is implicit expression)"},
        {"goto",     "goto not allowed"},
        {"break",    "break not allowed (no loops)"},
        // io/os/debug/coroutines
        {"io",       "io.* access not allowed"},
        {"os",       "os.* access not allowed"},
        {"debug",    "debug.* access not allowed"},
        {"package",  "package access not allowed"},
        {"require",  "require not allowed"},
        {"coroutine","coroutines not allowed"},
        {"setmetatable","metatables not allowed"},
        {"getmetatable","metatables not allowed"},
        {"rawget",   "rawget not allowed"},
        {"rawset",   "rawset not allowed"},
        {"pcall",    "pcall not allowed"},
        {"xpcall",   "xpcall not allowed"},
        {"print",    "print not allowed (SQL has no side effects)"},
    };
    return M;
 }
 // Whitelist de funciones SQL-transpilables: lua name -> SQL function template.
 // Template usa $1, $2, ... como placeholders de argumentos.
 struct FnMap { int min_args; int max_args; const char* sql_tmpl; };
 const std::unordered_map<std::string, FnMap>& fn_whitelist() {
    static const std::unordered_map<std::string, FnMap> M = {
        // math.*
        {"math.floor", {1, 1, "floor($1)"}},
        {"math.ceil",  {1, 1, "ceiling($1)"}},
        {"math.abs",   {1, 1, "abs($1)"}},
        {"math.sqrt",  {1, 1, "sqrt($1)"}},
        {"math.sin",   {1, 1, "sin($1)"}},
        {"math.cos",   {1, 1, "cos($1)"}},
        {"math.log",   {1, 1, "ln($1)"}},
        {"math.exp",   {1, 1, "exp($1)"}},
        {"math.min",   {2, 2, "least($1, $2)"}},
        {"math.max",   {2, 2, "greatest($1, $2)"}},
        // string.*
        {"string.upper", {1, 1, "upper($1)"}},
        {"string.lower", {1, 1, "lower($1)"}},
        {"string.len",   {1, 1, "length($1)"}},
        {"string.sub",   {2, 3, "/*SUBSTRING*/"}},   // manejo especial: argc 2 vs 3
        // top-level
        {"tostring",   {1, 1, "CAST($1 AS VARCHAR)"}},
        {"tonumber",   {1, 1, "CAST($1 AS DOUBLE)"}},
    };
    return M;
 }
 // Identifier SQL-safe: si tiene caracteres especiales o coincide con keyword,
 // usar `"col"`. Aqui simplificado: siempre quote con dobles comillas para
 // preservar case y permitir `:` (sufijo granularity).
 std::string sql_ident(const std::string& name) {
    std::string out;
    out.reserve(name.size() + 4);
    out += '"';
    for (char c : name) {
        if (c == '"') out += "\"\"";   // escape
        else          out += c;
    }
    out += '"';
    return out;
 }
 std::string sql_string_literal(const std::string& s) {
    std::string out;
    out.reserve(s.size() + 4);
    out += '\'';
    for (char c : s) {
        if (c == '\'') out += "''";
        else           out += c;
    }
    out += '\'';
    return out;
 }
 class Lexer {
 public:
    Lexer(const std::string& src) : src_(src) {}
    // Devuelve true si parsea OK. False con err en error_.
    bool tokenize(std::vector<Tok>& out) {
        size_t i = 0;
        while (i < src_.size()) {
            char c = src_[i];
            if (std::isspace((unsigned char)c)) { ++i; continue; }
            // Lua line comment
            if (c == '-' && i + 1 < src_.size() && src_[i+1] == '-') {
                while (i < src_.size() && src_[i] != '\n') ++i;
                continue;
            }
            if (c == '[' ) {
                // col ref [identifier]
                size_t j = i + 1;
                std::string name;
                while (j < src_.size() && src_[j] != ']') {
                    name += src_[j];
                    ++j;
                }
                if (j >= src_.size()) { error_ = "unterminated [col] ref"; return false; }
                Tok t; t.kind = Tok::ColT; t.text = name;
                out.push_back(t);
                i = j + 1;
                continue;
            }
            if (c == '"' || c == '\'') {
                char q = c;
                ++i;
                std::string s;
                while (i < src_.size() && src_[i] != q) {
                    if (src_[i] == '\\' && i + 1 < src_.size()) {
                        char esc = src_[i+1];
                        if      (esc == 'n')  s += '\n';
                        else if (esc == 't')  s += '\t';
                        else if (esc == '\\') s += '\\';
                        else if (esc == '\'') s += '\'';
                        else if (esc == '"')  s += '"';
                        else                  s += esc;
                        i += 2;
                    } else {
                        s += src_[i++];
                    }
                }
                if (i >= src_.size()) { error_ = "unterminated string literal"; return false; }
                ++i;
                Tok t; t.kind = Tok::StrT; t.text = s;
                out.push_back(t);
                continue;
            }
            if (std::isdigit((unsigned char)c) || (c == '.' && i + 1 < src_.size() && std::isdigit((unsigned char)src_[i+1]))) {
                std::string n;
                bool seen_dot = false;
                while (i < src_.size()) {
                    char d = src_[i];
                    if (std::isdigit((unsigned char)d)) { n += d; ++i; }
                    else if (d == '.' && !seen_dot) { n += d; seen_dot = true; ++i; }
                    else break;
                }
                Tok t; t.kind = Tok::NumT; t.text = n;
                out.push_back(t);
                continue;
            }
            if (std::isalpha((unsigned char)c) || c == '_') {
                std::string id;
                while (i < src_.size() &&
                       (std::isalnum((unsigned char)src_[i]) || src_[i] == '_')) {
                    id += src_[i++];
                }
                // Check forbidden keywords y mapeo a tokens.
                auto& F = forbidden_keywords();
                auto fit = F.find(id);
                if (fit != F.end()) {
                    error_ = std::string("token '") + id + "': " + fit->second;
                    return false;
                }
                Tok t;
                if      (id == "and")   t.kind = Tok::AndT;
                else if (id == "or")    t.kind = Tok::OrT;
                else if (id == "not")   t.kind = Tok::NotT;
                else if (id == "if")    t.kind = Tok::IfT;
                else if (id == "then")  t.kind = Tok::ThenT;
                else if (id == "else")  t.kind = Tok::ElseT;
                else if (id == "end")   t.kind = Tok::EndKW;
                else if (id == "true")  t.kind = Tok::TrueT;
                else if (id == "false") t.kind = Tok::FalseT;
                else if (id == "nil")   t.kind = Tok::NilT;
                else                    { t.kind = Tok::IdentT; t.text = id; }
                out.push_back(t);
                continue;
            }
            // Operators
            auto emit = [&](Tok::Kind k, int len) {
                Tok t; t.kind = k; out.push_back(t); i += (size_t)len;
            };
            if (c == '+') { emit(Tok::Plus,  1); continue; }
            if (c == '-') { emit(Tok::Minus, 1); continue; }
            if (c == '*') { emit(Tok::Star,  1); continue; }
            if (c == '/') { emit(Tok::Slash, 1); continue; }
            if (c == '%') { emit(Tok::Percent,1); continue; }
            if (c == '(') { emit(Tok::LParen, 1); continue; }
            if (c == ')') { emit(Tok::RParen, 1); continue; }
            if (c == ',') { emit(Tok::Comma,  1); continue; }
            if (c == '.') {
                if (i + 1 < src_.size() && src_[i+1] == '.') {
                    if (i + 2 < src_.size() && src_[i+2] == '.') {
                        error_ = "'...' vararg not allowed"; return false;
                    }
                    emit(Tok::ConcatT, 2); continue;
                }
                emit(Tok::Dot, 1); continue;
            }
            if (c == '=') {
                if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Eq, 2); continue; }
                error_ = "single '=' (assignment) not allowed"; return false;
            }
            if (c == '~') {
                if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Neq, 2); continue; }
                error_ = "stray '~'"; return false;
            }
            if (c == '<') {
                if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Lte, 2); continue; }
                emit(Tok::Lt, 1); continue;
            }
            if (c == '>') {
                if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Gte, 2); continue; }
                emit(Tok::Gt, 1); continue;
            }
            if (c == '{') { error_ = "table literals '{...}' not allowed"; return false; }
            if (c == '}') { error_ = "stray '}'"; return false; }
            if (c == ';') { error_ = "multi-statement not allowed"; return false; }
            if (c == '#') { error_ = "length '#' operator not allowed"; return false; }
            if (c == ':') { error_ = "method calls ':' not allowed"; return false; }
            error_ = std::string("unexpected character '") + c + "'";
            return false;
        }
        Tok t; t.kind = Tok::EndT;
        out.push_back(t);
        return true;
    }
    const std::string& error() const { return error_; }
 private:
    const std::string& src_;
    std::string error_;
 };
 class Parser {
 public:
    Parser(const std::vector<Tok>& toks,
           const std::vector<std::string>& headers)
        : toks_(toks), headers_(headers) {}
    // expr := ternary
    // ternary := if/then/else | logic_or
    bool parse_expr(std::string& out) {
        return parse_ternary(out);
    }
    bool parse_ternary(std::string& out) {
        if (peek(0).kind == Tok::IfT) {
            ++pos_;
            std::string a, b, c;
            if (!parse_logic_or(a)) return false;
            if (!eat(Tok::ThenT, "'then' expected after 'if'"))  return false;
            if (!parse_ternary(b)) return false;
            if (!eat(Tok::ElseT, "'else' expected (subset requires else branch)")) return false;
            if (!parse_ternary(c)) return false;
            if (!eat(Tok::EndKW, "'end' expected to close 'if'")) return false;
            out = "CASE WHEN " + a + " THEN " + b + " ELSE " + c + " END";
            return true;
        }
        return parse_logic_or(out);
    }
    bool parse_logic_or(std::string& out) {
        if (!parse_logic_and(out)) return false;
        while (peek(0).kind == Tok::OrT) {
            ++pos_;
            std::string rhs;
            if (!parse_logic_and(rhs)) return false;
            out = "(" + out + " OR " + rhs + ")";
        }
        return true;
    }
    bool parse_logic_and(std::string& out) {
        if (!parse_not(out)) return false;
        while (peek(0).kind == Tok::AndT) {
            ++pos_;
            std::string rhs;
            if (!parse_not(rhs)) return false;
            out = "(" + out + " AND " + rhs + ")";
        }
        return true;
    }
    bool parse_not(std::string& out) {
        if (peek(0).kind == Tok::NotT) {
            ++pos_;
            std::string e;
            if (!parse_not(e)) return false;
            out = "NOT (" + e + ")";
            return true;
        }
        return parse_comparison(out);
    }
    bool parse_comparison(std::string& out) {
        if (!parse_concat(out)) return false;
        while (true) {
            Tok::Kind k = peek(0).kind;
            const char* op = nullptr;
            if      (k == Tok::Eq)  op = " = ";
            else if (k == Tok::Neq) op = " <> ";
            else if (k == Tok::Lt)  op = " < ";
            else if (k == Tok::Lte) op = " <= ";
            else if (k == Tok::Gt)  op = " > ";
            else if (k == Tok::Gte) op = " >= ";
            else break;
            ++pos_;
            std::string rhs;
            if (!parse_concat(rhs)) return false;
            out = "(" + out + op + rhs + ")";
        }
        return true;
    }
    bool parse_concat(std::string& out) {
        if (!parse_additive(out)) return false;
        while (peek(0).kind == Tok::ConcatT) {
            ++pos_;
            std::string rhs;
            if (!parse_additive(rhs)) return false;
            out = "(" + out + " || " + rhs + ")";
        }
        return true;
    }
    bool parse_additive(std::string& out) {
        if (!parse_multiplicative(out)) return false;
        while (peek(0).kind == Tok::Plus || peek(0).kind == Tok::Minus) {
            const char* op = (peek(0).kind == Tok::Plus) ? " + " : " - ";
            ++pos_;
            std::string rhs;
            if (!parse_multiplicative(rhs)) return false;
            out = "(" + out + op + rhs + ")";
        }
        return true;
    }
    bool parse_multiplicative(std::string& out) {
        if (!parse_unary(out)) return false;
        while (peek(0).kind == Tok::Star || peek(0).kind == Tok::Slash || peek(0).kind == Tok::Percent) {
            const char* op = (peek(0).kind == Tok::Star) ? " * "
                           : (peek(0).kind == Tok::Slash) ? " / " : " % ";
            ++pos_;
            std::string rhs;
            if (!parse_unary(rhs)) return false;
            out = "(" + out + op + rhs + ")";
        }
        return true;
    }
    bool parse_unary(std::string& out) {
        if (peek(0).kind == Tok::Minus) {
            ++pos_;
            std::string e;
            if (!parse_unary(e)) return false;
            out = "(-" + e + ")";
            return true;
        }
        return parse_primary(out);
    }
    bool parse_primary(std::string& out) {
        Tok t = peek(0);
        if (t.kind == Tok::NumT) {
            ++pos_;
            out = t.text;
            return true;
        }
        if (t.kind == Tok::StrT) {
            ++pos_;
            out = sql_string_literal(t.text);
            return true;
        }
        if (t.kind == Tok::TrueT)  { ++pos_; out = "TRUE";  return true; }
        if (t.kind == Tok::FalseT) { ++pos_; out = "FALSE"; return true; }
        if (t.kind == Tok::NilT)   { ++pos_; out = "NULL";  return true; }
        if (t.kind == Tok::ColT) {
            // Check col exists (warning, not error).
            ++pos_;
            (void)headers_;  // currently not validating — caller can do that
            out = sql_ident(t.text);
            return true;
        }
        if (t.kind == Tok::LParen) {
            ++pos_;
            std::string e;
            if (!parse_expr(e)) return false;
            if (!eat(Tok::RParen, "expected ')'")) return false;
            out = "(" + e + ")";
            return true;
        }
        if (t.kind == Tok::IdentT) {
            // Function call: identifier ("." identifier)? "(" args ")"
            std::string name = t.text;
            ++pos_;
            if (peek(0).kind == Tok::Dot) {
                ++pos_;
                if (peek(0).kind != Tok::IdentT) {
                    error_ = "expected identifier after '.'";
                    return false;
                }
                name += "." + peek(0).text;
                ++pos_;
            }
            if (peek(0).kind != Tok::LParen) {
                error_ = "bare identifier '" + name +
                         "' not allowed (only [col] refs + whitelisted fn calls)";
                return false;
            }
            ++pos_;  // consume '('
            std::vector<std::string> args;
            if (peek(0).kind != Tok::RParen) {
                while (true) {
                    std::string a;
                    if (!parse_expr(a)) return false;
                    args.push_back(a);
                    if (peek(0).kind == Tok::Comma) { ++pos_; continue; }
                    break;
                }
            }
            if (!eat(Tok::RParen, "expected ')' closing function args")) return false;
            // Validate against whitelist
            auto& W = fn_whitelist();
            auto wit = W.find(name);
            if (wit == W.end()) {
                error_ = "function '" + name +
                         "' not in SQL transpile whitelist (math.*, string.upper/lower/len/sub, tostring, tonumber)";
                return false;
            }
            const FnMap& fm = wit->second;
            if ((int)args.size() < fm.min_args || (int)args.size() > fm.max_args) {
                std::ostringstream os;
                os << "function '" << name << "' takes " << fm.min_args;
                if (fm.max_args != fm.min_args) os << ".." << fm.max_args;
                os << " args, got " << args.size();
                error_ = os.str();
                return false;
            }
            // Casos especiales
            if (name == "string.sub") {
                // Lua: string.sub(s, i [, j]) — i/j 1-based, inclusive.
                // SQL DuckDB: substring(s, i, count). count = j - i + 1.
                if (args.size() == 2) {
                    // sin j -> hasta el final. DuckDB substring(s, i) acepta.
                    out = "substring(" + args[0] + ", " + args[1] + ")";
                } else {
                    out = "substring(" + args[0] + ", " + args[1] +
                          ", (" + args[2] + ") - (" + args[1] + ") + 1)";
                }
                return true;
            }
            // Generico: substituir $1..$N en template.
            std::string s = fm.sql_tmpl;
            for (int i = 0; i < (int)args.size(); ++i) {
                char ph[6];
                std::snprintf(ph, sizeof(ph), "$%d", i + 1);
                std::string p = ph;
                size_t at = 0;
                while ((at = s.find(p, at)) != std::string::npos) {
                    s.replace(at, p.size(), args[i]);
                    at += args[i].size();
                }
            }
            out = s;
            return true;
        }
        error_ = std::string("unexpected token in expression");
        return false;
    }
    bool eat(Tok::Kind k, const char* msg) {
        if (peek(0).kind != k) { error_ = msg; return false; }
        ++pos_;
        return true;
    }
    const Tok& peek(int off) const {
        size_t i = pos_ + (size_t)off;
        if (i >= toks_.size()) return toks_.back();
        return toks_[i];
    }
    bool at_end() const { return peek(0).kind == Tok::EndT; }
    const std::string& error() const { return error_; }
 private:
    const std::vector<Tok>&            toks_;
    const std::vector<std::string>&    headers_;
    size_t                             pos_ = 0;
    std::string                        error_;
 };
 } // anon
 std::string transpile_expr(const std::string& formula,
                            const std::vector<std::string>& in_headers,
                            std::string& error_out) {
    error_out.clear();
    std::vector<Tok> toks;
    Lexer lex(formula);
    if (!lex.tokenize(toks)) {
        error_out = lex.error();
        return "";
    }
    Parser p(toks, in_headers);
    std::string out;
    if (!p.parse_expr(out)) {
        error_out = p.error();
        return "";
    }
    if (!p.at_end()) {
        error_out = "unexpected trailing tokens after expression";
        return "";
    }
    return out;
 }
 bool is_transpilable(const std::string& formula, std::string& error_out) {
    std::vector<std::string> empty;
    std::string s = transpile_expr(formula, empty, error_out);
    return error_out.empty() && !s.empty();
 }
 // ============================================================================
 // TQL State -> SQL DuckDB emitter.
 // ============================================================================
 namespace {
 // Mapeo aggregation -> SQL DuckDB expression.
 std::string emit_agg_expr(const Aggregation& a) {
    switch (a.fn) {
        case AggFn::Count:      return "COUNT(*)";
        case AggFn::Sum:        return "SUM(" + sql_ident(a.col) + ")";
        case AggFn::Avg:        return "AVG(" + sql_ident(a.col) + ")";
        case AggFn::Min:        return "MIN(" + sql_ident(a.col) + ")";
        case AggFn::Max:        return "MAX(" + sql_ident(a.col) + ")";
        case AggFn::Distinct:   return "COUNT(DISTINCT " + sql_ident(a.col) + ")";
        case AggFn::Stddev:     return "STDDEV(" + sql_ident(a.col) + ")";
        case AggFn::Median:     return "quantile_cont(" + sql_ident(a.col) + ", 0.5)";
        case AggFn::P25:        return "quantile_cont(" + sql_ident(a.col) + ", 0.25)";
        case AggFn::P75:        return "quantile_cont(" + sql_ident(a.col) + ", 0.75)";
        case AggFn::P90:        return "quantile_cont(" + sql_ident(a.col) + ", 0.90)";
        case AggFn::P99:        return "quantile_cont(" + sql_ident(a.col) + ", 0.99)";
        case AggFn::Percentile: {
            char buf[32];
            std::snprintf(buf, sizeof(buf), "%g", a.arg);
            return std::string("quantile_cont(") + sql_ident(a.col) + ", " + buf + ")";
        }
    }
    return "/* unknown agg */ NULL";
 }
 std::string emit_breakout_expr(const std::string& bk) {
    std::string col_clean;
    DateGranularity g = parse_breakout_granularity(bk, col_clean);
    if (g == DateGranularity::None) {
        return sql_ident(col_clean);
    }
    const char* tok = date_granularity_token(g);
    // Week: DuckDB date_trunc('week', col) -> monday segun configuracion.
    return std::string("date_trunc('") + tok + "', " + sql_ident(col_clean) + ")";
 }
 // Resuelve un Op a operador SQL + (opcional) override de RHS.
 const char* sql_op(Op op) {
    switch (op) {
        case Op::Eq:  return " = ";
        case Op::Neq: return " <> ";
        case Op::Gt:  return " > ";
        case Op::Gte: return " >= ";
        case Op::Lt:  return " < ";
        case Op::Lte: return " <= ";
        case Op::Contains:    return " LIKE ";
        case Op::NotContains: return " NOT LIKE ";
        case Op::StartsWith:  return " LIKE ";
        case Op::EndsWith:    return " LIKE ";
    }
    return " = ";
 }
 // Construye RHS literal/pattern segun op + value. Devuelve placeholder '?'
 // y push de params; o pattern string-literal directo para LIKE wildcards.
 std::string emit_filter_rhs(const Filter& f, std::vector<std::string>& params) {
    if (f.op == Op::Contains || f.op == Op::NotContains) {
        std::string v = "%" + f.value + "%";
        params.push_back(v);
        return "?";
    }
    if (f.op == Op::StartsWith) {
        std::string v = f.value + "%";
        params.push_back(v);
        return "?";
    }
    if (f.op == Op::EndsWith) {
        std::string v = "%" + f.value;
        params.push_back(v);
        return "?";
    }
    params.push_back(f.value);
    return "?";
 }
 // Construye CTE stage 0 (Raw): SELECT cols + derived FROM main_t [JOINs].
 // `tables` provee schema. main_t name = tables[main_idx].name. Derived cols
 // se transpilan a SQL expression; si fuera de subset, push warning + skip col.
 bool emit_stage0(const State& st, const std::vector<TableInput>& tables,
                  int main_idx, SqlEmit& e) {
    if (main_idx < 0 || main_idx >= (int)tables.size()) {
        e.error = "main table out of range";
        return false;
    }
    const TableInput& main_t = tables[(size_t)main_idx];
    // SELECT list: cols originales + derived expressions (subset).
    std::string select_list;
    for (size_t i = 0; i < main_t.headers.size(); ++i) {
        if (i > 0) select_list += ", ";
        select_list += sql_ident(main_t.headers[i]);
    }
    // Derived cols (stage 0 derived).
    if (!st.stages.empty()) {
        const Stage& s0 = st.stages[0];
        for (const auto& d : s0.derived) {
            if (d.source_col >= 0 && d.formula.empty()) {
                // Retipo puro: alias col origen.
                if (d.source_col < (int)main_t.headers.size()) {
                    select_list += ", " + sql_ident(main_t.headers[(size_t)d.source_col])
                                + " AS " + sql_ident(d.name);
                }
                continue;
            }
            std::string err;
            std::string expr = transpile_expr(d.formula, main_t.headers, err);
            if (!err.empty()) {
                std::string msg = "derived col '" + d.name +
                                  "' formula out of SQL subset: " + err;
                e.warnings.push_back(msg);
                // Skip col en SQL output; agente puede recurrir a TQL puro.
                continue;
            }
            select_list += ", " + expr + " AS " + sql_ident(d.name);
        }
    }
    std::string from = sql_ident(main_t.name);
    // Joins
    for (const auto& jn : st.joins) {
        const TableInput* right = nullptr;
        for (const auto& ti : tables) {
            if (ti.name == jn.source) { right = &ti; break; }
        }
        if (!right) {
            e.warnings.push_back("join source '" + jn.source + "' not in tables");
            continue;
        }
        const char* strat = "LEFT JOIN";
        switch (jn.strategy) {
            case JoinStrategy::Left:  strat = "LEFT JOIN";  break;
            case JoinStrategy::Inner: strat = "INNER JOIN"; break;
            case JoinStrategy::Right: strat = "RIGHT JOIN"; break;
            case JoinStrategy::Full:  strat = "FULL OUTER JOIN"; break;
        }
        from += "\n  " + std::string(strat) + " " + sql_ident(right->name)
              + " AS " + sql_ident(jn.alias) + " ON ";
        for (size_t k = 0; k < jn.on.size(); ++k) {
            if (k > 0) from += " AND ";
            from += sql_ident(main_t.name) + "." + sql_ident(jn.on[k].first)
                 + " = " + sql_ident(jn.alias) + "." + sql_ident(jn.on[k].second);
        }
        // Anadir cols del right al SELECT con alias.col prefix.
        if (jn.fields.empty()) {
            for (const auto& rh : right->headers) {
                std::string aliased = jn.alias + "." + rh;
                select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(rh)
                            + " AS " + sql_ident(aliased);
            }
        } else {
            for (const auto& fld : jn.fields) {
                std::string aliased = jn.alias + "." + fld;
                select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(fld)
                            + " AS " + sql_ident(aliased);
            }
        }
    }
    // Stage 0 WHERE: filters del Raw (filter col idx en eff_headers).
    // Filter.col es indice en eff_headers (orig + derived). Para SQL emit,
    // necesitamos resolver col idx -> col name. Reconstruir orden eff_headers.
    std::vector<std::string> eff_headers = main_t.headers;
    if (!st.stages.empty()) {
        for (const auto& d : st.stages[0].derived) {
            eff_headers.push_back(d.name);
        }
    }
    std::string where_clause;
    if (!st.stages.empty()) {
        const Stage& s0 = st.stages[0];
        for (size_t fi = 0; fi < s0.filters.size(); ++fi) {
            const Filter& f = s0.filters[fi];
            if (f.col < 0 || f.col >= (int)eff_headers.size()) {
                e.warnings.push_back("stage0 filter col idx out of range");
                continue;
            }
            std::string col = sql_ident(eff_headers[(size_t)f.col]);
            if (!where_clause.empty()) where_clause += " AND ";
            where_clause += col + sql_op(f.op) + emit_filter_rhs(f, e.params);
        }
    }
    // Stage 0 sort
    std::string order_clause;
    if (!st.stages.empty()) {
        const Stage& s0 = st.stages[0];
        for (size_t si = 0; si < s0.sorts.size(); ++si) {
            const SortClause& sc = s0.sorts[si];
            if (!order_clause.empty()) order_clause += ", ";
            order_clause += sql_ident(sc.col) + (sc.desc ? " DESC" : " ASC");
        }
    }
    std::string cte = "t0 AS (\n  SELECT " + select_list + "\n  FROM " + from;
    if (!where_clause.empty()) cte += "\n  WHERE " + where_clause;
    if (!order_clause.empty()) cte += "\n  ORDER BY " + order_clause;
    cte += "\n)";
    e.sql = "WITH " + cte;
    return true;
 }
 // Stage N (N>=1): SELECT breakouts + agg expressions FROM t<N-1>
 // [WHERE filters] [GROUP BY ...] [ORDER BY ...].
 bool emit_stage_n(const Stage& stg, int n, SqlEmit& e) {
    std::string prev = "t" + std::to_string(n - 1);
    std::string cur  = "t" + std::to_string(n);
    // SELECT list: breakouts (con granularity expr si aplica) + aggregations.
    std::string select_list;
    for (size_t i = 0; i < stg.breakouts.size(); ++i) {
        if (i > 0) select_list += ", ";
        select_list += emit_breakout_expr(stg.breakouts[i])
                    + " AS " + sql_ident(stg.breakouts[i]);
    }
    for (size_t i = 0; i < stg.aggregations.size(); ++i) {
        if (!select_list.empty()) select_list += ", ";
        std::string alias = aggregation_alias(stg.aggregations[i]);
        select_list += emit_agg_expr(stg.aggregations[i]) + " AS " + sql_ident(alias);
    }
    if (select_list.empty()) select_list = "*";
    // WHERE: filters del stage. col es indice en input headers (output del stage previo).
    // Aproximacion: usamos el nombre via stage breakouts/aggs del stage previo si fuera necesario.
    // Para v1, emit por nombre cuando filter.col >= 0 sea idx en breakouts/aggs/orig. El
    // chequeo de existencia se delega a DuckDB (errores en execute son detectables).
    // V1 simple: skip filter cuando no podemos resolver — caller solo deberia tener filter
    // sobre cols que existen.
    // Estrategia simple: emite WHERE solo si stage previo provee headers conocidos. Para no
    // duplicar logica, dejamos al caller proveer headers via filter.col que se resuelve a
    // breakouts[col].
    // V1: si filter.col esta en rango de breakouts del stage previo, emite breakout name.
    // Sino, warning + skip.
    std::string where_clause;
    // Best effort: no podemos construir headers del stage previo aqui sin recomputar.
    // Para v1, omitimos filters de stages >=1 — caller deberia evitar usarlos via SQL.
    // TODO v2: pasar prev_headers para resolver.
    (void)where_clause;
    // GROUP BY: solo si hay breakouts.
    std::string group_clause;
    for (size_t i = 0; i < stg.breakouts.size(); ++i) {
        if (i > 0) group_clause += ", ";
        // Re-emit la expression para GROUP BY (no alias).
        group_clause += emit_breakout_expr(stg.breakouts[i]);
    }
    // ORDER BY
    std::string order_clause;
    for (size_t i = 0; i < stg.sorts.size(); ++i) {
        if (i > 0) order_clause += ", ";
        order_clause += sql_ident(stg.sorts[i].col) + (stg.sorts[i].desc ? " DESC" : " ASC");
    }
    std::string cte = ",\n" + cur + " AS (\n  SELECT " + select_list
                    + "\n  FROM " + prev;
    if (!group_clause.empty()) cte += "\n  GROUP BY " + group_clause;
    if (!order_clause.empty()) cte += "\n  ORDER BY " + order_clause;
    cte += "\n)";
    e.sql += cte;
    return true;
 }
 } // anon
 SqlEmit emit_sql(const State& state,
                  const std::vector<TableInput>& tables,
                  int up_to_stage) {
    SqlEmit out;
    if (state.stages.empty()) {
        out.error = "state has no stages";
        return out;
    }
    if (tables.empty()) {
        out.error = "no input tables provided";
        return out;
    }
    int target = (up_to_stage < 0) ? state.active_stage : up_to_stage;
    if (target < 0) target = 0;
    if (target >= (int)state.stages.size()) target = (int)state.stages.size() - 1;
    // Resolve main idx via state.main_source (o tables[0] default).
    int main_idx = resolve_main_idx(tables, state.main_source);
    if (main_idx < 0) main_idx = 0;
    if (!emit_stage0(state, tables, main_idx, out)) return out;
    for (int si = 1; si <= target; ++si) {
        if (!emit_stage_n(state.stages[(size_t)si], si, out)) return out;
    }
    out.sql += "\nSELECT * FROM t" + std::to_string(target) + ";\n";
    return out;
 }
 } // namespace tql_to_sql
@@ -0,0 +1,41 @@
 // tql_to_sql: emite SQL DuckDB equivalente a una pipeline TQL State.
 // Pure. Sin DuckDB linkado. Solo string emit + validacion.
 // Ver issue 0080 + docs/TQL.md (seccion "SQL transpile subset").
 #pragma once
 #include "data_table_logic.h"
 #include <string>
 #include <vector>
 namespace tql_to_sql {
 struct SqlEmit {
    std::string                 sql;       // SELECT/CTE chain DuckDB
    std::vector<std::string>    params;    // bound values posicionales (?)
    std::vector<std::string>    warnings;  // soft issues (col not found, etc.)
    std::string                 error;     // si non-empty, emit fallo
 };
 // Pure: emite SQL DuckDB equivalente a stages 0..active del state.
 // `tables` provee schema (headers/types/name) de cada TableInput. El caller
 // es responsable de hidratar las tablas en DuckDB con esos nombres.
 // `up_to_stage = -1` => state.active_stage.
 SqlEmit emit_sql(const data_table::State& state,
                  const std::vector<data_table::TableInput>& tables,
                  int up_to_stage = -1);
 // Pure: valida que `formula` (cuerpo Lua de un derived col) este dentro del
 // subset SQL-transpilable. Si valido, retorna true. Si no, false + razon
 // concreta en `error_out` (categoria + token problematico).
 // Ver docs/TQL.md#sql-transpile-subset.
 bool is_transpilable(const std::string& formula, std::string& error_out);
 // Pure: transpila formula Lua subset -> SQL expression. Si fuera de subset,
 // retorna "" y rellena `error_out`. Asume is_transpilable retornaria true.
 // `in_headers` necesario para resolver `[col]` refs y emitir identifier
 // SQL apropiado (quoted si tiene char especial).
 std::string transpile_expr(const std::string& formula,
                            const std::vector<std::string>& in_headers,
                            std::string& error_out);
 } // namespace tql_to_sql
@@ -16,6 +16,10 @@ using data_table::ColumnType;
 using data_table::ViewMode;
 using data_table::ViewConfig;
 using data_table::parse_number;
 using data_table::nearest_index_2d;
 using data_table::pie_angle;
 using data_table::pie_slice_at_angle;
 using data_table::heatmap_cell_at;
 static int find_header(const StageOutput& out, const std::string& name) {
    if (name.empty()) return -1;
@@ -152,7 +156,8 @@ std::vector<double> finite(const std::vector<double>& v) {
 }
 bool render_bar_like(const StageOutput& out, ViewMode mode,
-                     const ViewConfig& cfg, ImVec2 size) {
+                     const ViewConfig& cfg, ImVec2 size,
                     int* clicked_row_out = nullptr) {
    int cat_col = resolve_cat(out, cfg, first_category_col(out));
    auto nums = collect_numeric_filtered(out, cfg, 8);
    if (cat_col < 0 || nums.empty()) {
@@ -225,6 +230,15 @@ bool render_bar_like(const StageOutput& out, ViewMode mode,
            ImPlot::PlotBars(nums[0].name.c_str(), ticks.data(), ys.data(), n, 0.67, spc);
        }
    }
    // Hit-test fase 10: idx = round(plot.{x|y}) en single-series mode.
    if (clicked_row_out &&
        mode != ViewMode::GroupedBar && mode != ViewMode::StackedBar &&
        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
        ImPlotPoint p = ImPlot::GetPlotMousePos();
        double target = horiz ? p.y : p.x;
        int idx = (int)(target + 0.5);
        if (idx >= 0 && idx < n) *clicked_row_out = idx;
    }
    ImPlot::EndPlot();
    return true;
 }
@@ -302,7 +316,8 @@ bool render_line_like(const StageOutput& out, ViewMode mode,
    return true;
 }
-bool render_scatter(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
+bool render_scatter(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
                     int* clicked_row_out = nullptr) {
    // Soporte cfg.x_col + cfg.y_cols[0]
    int xc = find_header(out, cfg.x_col);
    int yc = !cfg.y_cols.empty() ? find_header(out, cfg.y_cols[0]) : -1;
@@ -329,11 +344,20 @@ bool render_scatter(const StageOutput& out, const ViewConfig& cfg, ImVec2 size)
        ImPlot::PlotScatter("##s", nums[0].vals.data(), nums[1].vals.data(),
                             (int)nums[0].vals.size());
    }
    if (clicked_row_out &&
        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
        ImPlotPoint p = ImPlot::GetPlotMousePos();
        int idx = nearest_index_2d(p.x, p.y,
                                    nums[0].vals.data(), nums[1].vals.data(),
                                    (int)nums[0].vals.size());
        if (idx >= 0) *clicked_row_out = idx;
    }
    ImPlot::EndPlot();
    return true;
 }
-bool render_bubble(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
+bool render_bubble(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
                    int* clicked_row_out = nullptr) {
    int xc = find_header(out, cfg.x_col);
    int yc = !cfg.y_cols.empty() ? find_header(out, cfg.y_cols[0]) : -1;
    int sc = resolve_size(out, cfg, -1);
@@ -354,6 +378,14 @@ bool render_bubble(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
                      axflag(cfg), axflag(cfg));
    ImPlot::PlotBubbles("##b", nums[0].vals.data(), nums[1].vals.data(),
                         nums[2].vals.data(), (int)nums[0].vals.size());
    if (clicked_row_out &&
        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
        ImPlotPoint p = ImPlot::GetPlotMousePos();
        int idx = nearest_index_2d(p.x, p.y,
                                    nums[0].vals.data(), nums[1].vals.data(),
                                    (int)nums[0].vals.size());
        if (idx >= 0) *clicked_row_out = idx;
    }
    ImPlot::EndPlot();
    return true;
 }
@@ -404,7 +436,8 @@ bool render_hist2d(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
    return true;
 }
-bool render_heatmap(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
+bool render_heatmap(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
                     int* clicked_row_out = nullptr) {
    auto nums = collect_numeric_filtered(out, cfg, 64);
    if (nums.empty()) { info_text("Need numeric columns"); return false; }
    int cols = (int)nums.size();
@@ -424,11 +457,22 @@ bool render_heatmap(const StageOutput& out, const ViewConfig& cfg, ImVec2 size)
    maybe_fit(cfg);
    if (!ImPlot::BeginPlot("##heatmap", size, 0)) return false;
    ImPlot::PlotHeatmap("##hm", mat.data(), rows, cols, mn, mx, nullptr);
    if (clicked_row_out &&
        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
        ImPlotPoint p = ImPlot::GetPlotMousePos();
        // ImPlot heatmap Y se pinta de top a bottom; plot mouse_y va igual
        // (default scale 0..rows). Mapeo directo.
        int rr, cc;
        heatmap_cell_at(p.x, p.y, rows, cols, rr, cc);
        if (rr >= 0) *clicked_row_out = rr;
        (void)cc;
    }
    ImPlot::EndPlot();
    return true;
 }
-bool render_pie(const StageOutput& out, const ViewConfig& cfg, bool donut, ImVec2 size) {
+bool render_pie(const StageOutput& out, const ViewConfig& cfg, bool donut, ImVec2 size,
                 int* clicked_row_out = nullptr) {
    int cat = resolve_cat(out, cfg, first_category_col(out));
    auto nums = collect_numeric_filtered(out, cfg, 1);
    if (cat < 0 || nums.empty()) { info_text("Need 1 category + 1 numeric"); return false; }
@@ -455,11 +499,24 @@ bool render_pie(const StageOutput& out, const ViewConfig& cfg, bool donut, ImVec
        // Draw inner hole as solid circle by overlaying a smaller pie of one slice transparent.
        // Simpler: just visually it's a circle with text. Use no extra primitive for now.
    }
    if (clicked_row_out &&
        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
        ImPlotPoint p = ImPlot::GetPlotMousePos();
        double dx = p.x - 0.5, dy = p.y - 0.5;
        double dist2 = dx*dx + dy*dy;
        double inner = donut ? (radius * 0.5) : 0.0;
        if (dist2 <= radius * radius && dist2 >= inner * inner) {
            double ang = pie_angle(0.5, 0.5, p.x, p.y);
            int idx = pie_slice_at_angle(ang, values.data(), n);
            if (idx >= 0) *clicked_row_out = idx;
        }
    }
    ImPlot::EndPlot();
    return true;
 }
-bool render_funnel(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
+bool render_funnel(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
                    int* clicked_row_out = nullptr) {
    int cat = resolve_cat(out, cfg, first_category_col(out));
    auto nums = collect_numeric_filtered(out, cfg, 1);
    if (cat < 0 || nums.empty()) { info_text("Need 1 category + 1 numeric"); return false; }
@@ -492,6 +549,17 @@ bool render_funnel(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
    ImPlot::SetupAxisTicks(ImAxis_Y1, ticks.data(), n, labels.data(), false);
    ImPlot::PlotBars(nums[0].name.c_str(), ys.data(), ticks.data(), n, 0.85,
                     ImPlotSpec(ImPlotProp_Flags, ImPlotBarsFlags_Horizontal));
    if (clicked_row_out &&
        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
        ImPlotPoint p = ImPlot::GetPlotMousePos();
        int tick_idx = (int)(p.y + 0.5);
        // ticks[i] = n-1-i. Invertir para idx en orden sorted descendiente.
        int sorted_pos = (n - 1) - tick_idx;
        if (sorted_pos >= 0 && sorted_pos < n) {
            // idx[sorted_pos] da indice de row original en out.
            *clicked_row_out = idx[sorted_pos];
        }
    }
    ImPlot::EndPlot();
    return true;
 }
@@ -763,7 +831,9 @@ bool render_radar(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
 } // anon
 bool render(const StageOutput& out, ViewMode mode,
-            const ViewConfig& cfg, ImVec2 size) {
+            const ViewConfig& cfg, ImVec2 size,
            int* clicked_row_out) {
    if (clicked_row_out) *clicked_row_out = -1;
    if (out.rows == 0 || out.cols == 0) {
        info_text("No data");
        return false;
@@ -773,21 +843,21 @@ bool render(const StageOutput& out, ViewMode mode,
        case ViewMode::Bar:
        case ViewMode::Column:
        case ViewMode::GroupedBar:
-        case ViewMode::StackedBar:   return render_bar_like(out, mode, cfg, size);
+        case ViewMode::StackedBar:   return render_bar_like(out, mode, cfg, size, clicked_row_out);
        case ViewMode::Line:
        case ViewMode::Area:
        case ViewMode::Stairs:       return render_line_like(out, mode, cfg, size);
-        case ViewMode::Scatter:      return render_scatter(out, cfg, size);
+        case ViewMode::Scatter:      return render_scatter(out, cfg, size, clicked_row_out);
-        case ViewMode::Bubble:       return render_bubble(out, cfg, size);
+        case ViewMode::Bubble:       return render_bubble(out, cfg, size, clicked_row_out);
        case ViewMode::Histogram:    return render_histogram(out, cfg, size);
        case ViewMode::Histogram2D:  return render_hist2d(out, cfg, size);
-        case ViewMode::Heatmap:      return render_heatmap(out, cfg, size);
+        case ViewMode::Heatmap:      return render_heatmap(out, cfg, size, clicked_row_out);
        case ViewMode::BoxPlot:      return render_boxplot(out, cfg, size);
        case ViewMode::Stem:         return render_stem(out, cfg, size);
        case ViewMode::ErrorBars:    return render_errorbars(out, cfg, size);
-        case ViewMode::Pie:          return render_pie(out, cfg, false, size);
+        case ViewMode::Pie:          return render_pie(out, cfg, false, size, clicked_row_out);
-        case ViewMode::Donut:        return render_pie(out, cfg, true,  size);
+        case ViewMode::Donut:        return render_pie(out, cfg, true,  size, clicked_row_out);
-        case ViewMode::Funnel:       return render_funnel(out, cfg, size);
+        case ViewMode::Funnel:       return render_funnel(out, cfg, size, clicked_row_out);
        case ViewMode::Waterfall:    return render_waterfall(out, cfg, size);
        case ViewMode::KPI:          return render_kpi_single(out, cfg);
        case ViewMode::KPIGrid:      return render_kpi_grid(out, cfg);
@@ -14,10 +14,15 @@ namespace viz {
 //
 // `size`: ImVec2(-1,-1) usa todo el espacio disponible.
 // `out`: output del stage activo (headers, types, cells flat row-major).
 // `clicked_row_out`: si != nullptr, el render escribira el indice de row del
 // `StageOutput` clicado por user. -1 si no hubo click drillable. Fase 10
 // (issue 0079): habilitado para bar/column/pie/donut/funnel/scatter/bubble/
 // heatmap. Resto de modos: no hit-test, queda en -1.
 bool render(const data_table::StageOutput& out,
            data_table::ViewMode mode,
            const data_table::ViewConfig& cfg,
-            ImVec2 size = ImVec2(-1, -1));
+            ImVec2 size = ImVec2(-1, -1),
            int* clicked_row_out = nullptr);
 // Helper expuesto: encuentra primera col numerica. -1 si ninguna.
 int first_numeric_col(const data_table::StageOutput& out);
@@ -0,0 +1,212 @@
 // data_table_types — types compartidos del stack TQL (Table Query Language).
 // Promovido al registry desde cpp/apps/primitives_gallery/playground/tables/.
 // Ver issue 0081 + docs/TQL.md. Pure value types + enums.
 #pragma once
 #include <string>
 #include <utility>
 #include <vector>
 namespace data_table {
 // ----------------------------------------------------------------------------
 // Operadores de filtro.
 // ----------------------------------------------------------------------------
 enum class Op {
    Eq, Neq, Gt, Gte, Lt, Lte,
    Contains, NotContains, StartsWith, EndsWith
 };
 // ----------------------------------------------------------------------------
 // Tipo de columna. Declarado por caller o auto-detectado.
 // ----------------------------------------------------------------------------
 enum class ColumnType {
    Auto, String, Int, Float, Bool, Date, Json
 };
 // ----------------------------------------------------------------------------
 // Derived column: inmutable. Dos modos:
 //   1) Retipo puro: source_col >= 0, formula == "". Cells del origen.
 //   2) Formula:     source_col == -1, formula no vacia. Eval por Lua.
 // ----------------------------------------------------------------------------
 struct DerivedColumn {
    int         source_col = -1;
    ColumnType  type       = ColumnType::String;
    std::string name;
    std::string formula;        // "" = retipado puro; resto = body Lua
    int         lua_id  = -1;   // referencia en lua_engine; -1 si no compilado
    std::string compile_error;
 };
 // ----------------------------------------------------------------------------
 // Filtro: col index en eff_headers + op + value.
 // ----------------------------------------------------------------------------
 struct Filter {
    int         col;
    Op          op;
    std::string value;
 };
 // ----------------------------------------------------------------------------
 // ColorRule: pintado condicional de celdas (UI helper).
 // ----------------------------------------------------------------------------
 struct ColorRule {
    int          col;
    std::string  equals;
    unsigned int color;
 };
 // ----------------------------------------------------------------------------
 // Aggregations (TQL stages 1+).
 // ----------------------------------------------------------------------------
 enum class AggFn {
    Count, Sum, Avg, Min, Max, Distinct, Stddev,
    Median, P25, P75, P90, P99, Percentile
 };
 struct Aggregation {
    AggFn       fn = AggFn::Count;
    std::string col;         // ignorado para Count
    double      arg = 0.0;   // para Percentile (0..1)
    std::string alias;       // vacio -> auto-generado via aggregation_alias()
 };
 struct SortClause {
    std::string col;
    bool        desc = false;
 };
 // Stage: layer de TQL. Stage 0 = Raw (sin breakouts/aggregations).
 // Stage 1+ pueden agrupar. Cada stage consume output del anterior.
 struct Stage {
    std::vector<Filter>          filters;
    std::vector<DerivedColumn>   derived;       // expressions de este stage
    std::vector<std::string>     breakouts;     // col names del INPUT de este stage
    std::vector<Aggregation>     aggregations;
    std::vector<SortClause>      sorts;
 };
 // Output de compute_stage. Posee `cell_backing` (strings nuevos para
 // resultados agregados) y `cells` (punteros row-major a backing o a
 // `in_cells` original para passthrough).
 struct StageOutput {
    std::vector<std::string>  cell_backing;
    std::vector<const char*>  cells;
    int                       rows = 0;
    int                       cols = 0;
    std::vector<std::string>  headers;
    std::vector<ColumnType>   types;
 };
 // ----------------------------------------------------------------------------
 // ViewMode: tipo de visualizacion a renderizar sobre el output del stage activo.
 // ----------------------------------------------------------------------------
 enum class ViewMode {
    Table,
    // Bars
    Bar, Column, GroupedBar, StackedBar,
    // Lines / area
    Line, Area, Stairs,
    // Points
    Scatter, Bubble,
    // Distribution
    Histogram, Histogram2D, Heatmap, BoxPlot,
    // Stems / signals
    Stem, ErrorBars,
    // Composition
    Pie, Donut, Funnel, Waterfall,
    // Single values
    KPI, KPIGrid,
    // Specialized
    Candlestick, Radar,
 };
 // ----------------------------------------------------------------------------
 // Joins (MBQL-style). Ver issue 0078.
 // ----------------------------------------------------------------------------
 enum class JoinStrategy { Left, Inner, Right, Full };
 // Tabla extra pasada al render() para joins. Owner externo (caller).
 struct TableInput {
    std::string                 name;       // identificador estable (matchea Join.source)
    std::vector<std::string>    headers;
    std::vector<ColumnType>     types;
    const char* const*          cells = nullptr;  // row-major, headers.size() cols x rows filas
    int                         rows  = 0;
    int                         cols  = 0;
 };
 // Join clause: une la tabla actual con `source` por las parejas `on`,
 // prefijando las cols del derecho con `alias.`.
 struct Join {
    std::string                                          alias;
    std::string                                          source;
    std::vector<std::pair<std::string, std::string>>     on;        // {left_col, right_col}
    JoinStrategy                                         strategy = JoinStrategy::Left;
    std::vector<std::string>                             fields;    // vacio = all del derecho
 };
 // ----------------------------------------------------------------------------
 // ViewConfig: overrides manuales de auto-detect para la vista activa.
 // ----------------------------------------------------------------------------
 struct ViewConfig {
    std::string                 x_col;        // single: scatter, line, hist2d
    std::vector<std::string>    y_cols;       // 1..N: line/area/bar/etc
    std::string                 size_col;     // bubble
    std::string                 cat_col;      // bar/pie/funnel/box override
    unsigned int                primary_color = 0;     // 0 = ImPlot auto
    int                         hist_bins     = 0;     // 0 = Sturges
    float                       pie_radius    = 0.0f;  // 0 = default
    bool                        show_legend   = true;
    bool                        show_markers  = false; // line/area markers
    bool                        locked        = false; // disable pan/zoom
    mutable bool                fit_request   = false; // consumed by viz::render
 };
 // VizPanel: viz adicional sobre el mismo StageOutput.
 struct VizPanel {
    ViewMode    display = ViewMode::Bar;
    ViewConfig  config;
    mutable ViewMode last_non_table = ViewMode::Bar;
 };
 // ----------------------------------------------------------------------------
 // State: stage pipeline + viz globales.
 // ----------------------------------------------------------------------------
 struct State {
    std::vector<Stage>          stages;
    int                         active_stage = 0;
    ViewMode                    display = ViewMode::Table;
    ViewConfig                  viz_config;
    std::vector<VizPanel>       extra_panels;
    std::vector<Join>           joins;     // aplicado antes de stages[0]
    std::string                 main_source;  // name de TableInput; vacio -> tables[0]
    std::vector<ColorRule>      color_rules;
    std::vector<bool>           col_visible;
    std::vector<int>            col_order;
    // Helpers (definidos en compute_stage.cpp).
    Stage&       raw();
    const Stage& raw() const;
    Stage&       active();
    const Stage& active_const() const;
    void         ensure_stage0();
 };
 // ----------------------------------------------------------------------------
 // Drill extendido (fase 10). Ver issue 0079.
 // ----------------------------------------------------------------------------
 enum class DateGranularity { None, Year, Month, Week, Day, Hour };
 enum class FilterPreset { Last7d, Last30d, Last90d, ExcludeNulls, NonZero };
 // Step de drill grabado para history undo/redo (fase 10).
 struct DrillStep {
    int     target_stage      = -1;   // stage donde se anadio el filter
    int     filter_pos        = -1;   // index en target_stage.filters
    int     prev_active_stage = 0;    // active_stage antes del drill
    Filter  added;                    // filter para redo
 };
 } // namespace data_table
@@ -0,0 +1,96 @@
 #include "gfx/gpu_check.h"
 #include "gfx/gl_loader.h"
 #include <cstring>
 #include <string>
 // CUDA runtime version via compile-time macro.
 // cuda_runtime.h define CUDART_VERSION como XXYYZZ (ej. 12040 para 12.4.0).
 // Solo se incluye si el header esta disponible; si no, cuda_runtime_version = "".
 #if defined(__has_include) && __has_include(<cuda_runtime.h>)
    #include <cuda_runtime.h>
    #define FN_HAS_CUDA_RUNTIME 1
 #endif
 namespace fn::gfx {
 static std::string safe_gl_string(GLenum name) {
    const GLubyte* s = glGetString(name);
    if (!s) return "";
    return std::string(reinterpret_cast<const char*>(s));
 }
 static bool check_gl_version_43() {
    // GL_VERSION tiene formato "major.minor ..." o "OpenGL ES major.minor ..."
    const GLubyte* ver = glGetString(GL_VERSION);
    if (!ver) return false;
    int major = 0, minor = 0;
    // Saltar prefijo "OpenGL ES " si lo hay
    const char* p = reinterpret_cast<const char*>(ver);
    if (std::strncmp(p, "OpenGL ES ", 10) == 0) p += 10;
    // sscanf con la forma "X.Y"
    // NOLINTNEXTLINE(cert-err34-c)
    std::sscanf(p, "%d.%d", &major, &minor);
    return (major > 4) || (major == 4 && minor >= 3);
 }
 bool gpu_check_caps(GpuCaps& out) {
    out = GpuCaps{}; // reset
    out.gl_vendor   = safe_gl_string(GL_VENDOR);
    out.gl_renderer = safe_gl_string(GL_RENDERER);
    out.gl_version  = safe_gl_string(GL_VERSION);
    if (out.gl_vendor.empty()) {
        // No hay contexto GL activo.
        return false;
    }
    // Compute shader support: GL 4.3+ o ARB_compute_shader
    {
        const GLubyte* exts = glGetString(GL_EXTENSIONS);
        bool has_arb = exts &&
            std::strstr(reinterpret_cast<const char*>(exts),
                        "GL_ARB_compute_shader") != nullptr;
        out.has_compute_shader = check_gl_version_43() || has_arb;
    }
    // Shader storage buffer: GL 4.3+ o ARB_shader_storage_buffer_object
    {
        const GLubyte* exts = glGetString(GL_EXTENSIONS);
        bool has_ssbo_arb = exts &&
            std::strstr(reinterpret_cast<const char*>(exts),
                        "GL_ARB_shader_storage_buffer_object") != nullptr;
        out.has_storage_buffer = check_gl_version_43() || has_ssbo_arb;
    }
    // Workgroup limits (solo si hay compute shader support)
    if (out.has_compute_shader) {
        // GL_MAX_COMPUTE_WORK_GROUP_COUNT — indexed query
        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &out.max_compute_workgroup_count[0]);
        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &out.max_compute_workgroup_count[1]);
        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &out.max_compute_workgroup_count[2]);
        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &out.max_compute_workgroup_size[0]);
        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &out.max_compute_workgroup_size[1]);
        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &out.max_compute_workgroup_size[2]);
    }
    // CUDA runtime version (compile-time detection)
 #if defined(FN_HAS_CUDA_RUNTIME)
    {
        int cuda_ver = CUDART_VERSION; // ej. 12040 para CUDA 12.4.0
        int major = cuda_ver / 1000;
        int minor = (cuda_ver % 1000) / 10;
        char buf[16];
        std::snprintf(buf, sizeof(buf), "%d.%d", major, minor);
        out.cuda_runtime_version = buf;
    }
 #else
    out.cuda_runtime_version = "";
 #endif
    return true;
 }
 } // namespace fn::gfx
@@ -0,0 +1,38 @@
 #pragma once
 #include <string>
 namespace fn::gfx {
 // GpuCaps recopila capacidades OpenGL y CUDA del contexto activo.
 // Todos los campos de cadena estan vacios ("") si el dato no esta disponible.
 struct GpuCaps {
    // OpenGL — requieren contexto GL activo antes de llamar gpu_check_caps.
    std::string gl_vendor;    // glGetString(GL_VENDOR)   ej. "NVIDIA Corporation"
    std::string gl_renderer;  // glGetString(GL_RENDERER) ej. "NVIDIA GeForce RTX 3080/PCIe/SSE2"
    std::string gl_version;   // glGetString(GL_VERSION)  ej. "4.6.0 NVIDIA 550.54.15"
    // Compute shader limits (GL_MAX_COMPUTE_WORK_GROUP_COUNT/SIZE)
    // Indice 0=X 1=Y 2=Z. Valor 0 si compute shaders no disponibles.
    int max_compute_workgroup_count[3] = {0, 0, 0};
    int max_compute_workgroup_size[3]  = {0, 0, 0};
    bool has_compute_shader  = false; // GL_VERSION >= 4.3 o extension ARB_compute_shader
    bool has_storage_buffer  = false; // GL_VERSION >= 4.3 o extension ARB_shader_storage_buffer_object
    // CUDA — vacio si CUDA runtime no detectado en compile time.
    // Formato: "12.4" (major.minor) o "" si no disponible.
    std::string cuda_runtime_version;
 };
 // gpu_check_caps rellena out con las capacidades del contexto OpenGL activo.
 //
 // REQUISITO: debe llamarse despues de inicializar el contexto GL y, en Windows,
 // despues de fn::gfx::gl_loader_init(). Si se llama sin contexto activo el
 // comportamiento es indefinido (glGetString devuelve nullptr).
 //
 // Retorna true si se pudo leer al menos el vendor GL (contexto activo).
 // Retorna false si gl_vendor queda vacio (contexto no activo o driver defectuoso).
 bool gpu_check_caps(GpuCaps& out);
 } // namespace fn::gfx
@@ -0,0 +1,86 @@
 ---
 name: gpu_check
 kind: function
 lang: cpp
 domain: gfx
 version: "1.0.0"
 purity: impure
 signature: "bool fn_gfx::gpu_check_caps(GpuCaps& out)"
 description: "Rellena GpuCaps con las capacidades del contexto OpenGL activo: vendor, renderer, version, limites de compute workgroup, flags has_compute_shader/has_storage_buffer, y version CUDA runtime (deteccion en compile-time via CUDART_VERSION). Requiere contexto GL activo. Retorna false si el contexto no esta disponible."
 tags: [gpu, opengl, cuda, caps, hardware, probe, gfx, compute, infra]
 uses_functions: ["gl_loader_cpp_gfx"]
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: [gfx/gpu_check.h, gfx/gl_loader.h, cuda_runtime.h, cstring, string]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "cpp/functions/gfx/gpu_check.cpp"
 framework: opengl
 params:
  - name: out
    desc: "Referencia a GpuCaps que se rellena con las capacidades detectadas. Se resetea al inicio de la llamada."
 output: "true si el contexto GL esta activo y gl_vendor no esta vacio; false si no hay contexto GL activo o el driver devuelve nullptr para GL_VENDOR."
 ---
 # gpu_check
 Probing de capacidades GPU en runtime: OpenGL strings, compute shader support y CUDA.
 ## Uso tipico
 ```cpp
 #include "gfx/gpu_check.h"
 #include "gfx/gl_loader.h"
 // Dentro de render(), despues del primer frame (contexto GL activo):
 fn::gfx::GpuCaps caps;
 if (fn::gfx::gpu_check_caps(caps)) {
    printf("GPU: %s\n", caps.gl_renderer.c_str());
    printf("Compute shaders: %s\n", caps.has_compute_shader ? "yes" : "no");
    if (!caps.cuda_runtime_version.empty())
        printf("CUDA runtime: %s\n", caps.cuda_runtime_version.c_str());
 } else {
    printf("No GL context active\n");
 }
 ```
 ## Estructura GpuCaps
 ```cpp
 struct GpuCaps {
    std::string gl_vendor;                  // "NVIDIA Corporation"
    std::string gl_renderer;                // "NVIDIA GeForce RTX 3080/PCIe/SSE2"
    std::string gl_version;                 // "4.6.0 NVIDIA 550.54.15"
    int max_compute_workgroup_count[3];     // [65535, 65535, 65535] tipico NVIDIA
    int max_compute_workgroup_size[3];      // [1024, 1024, 64] tipico
    bool has_compute_shader;                // GL 4.3+ o ARB_compute_shader
    bool has_storage_buffer;                // GL 4.3+ o ARB_shader_storage_buffer_object
    std::string cuda_runtime_version;       // "12.4" o "" si no compilado con CUDA
 };
 ```
 ## CUDA detection
 La version CUDA se detecta en **compile time** via el macro `CUDART_VERSION` de `<cuda_runtime.h>`. Si la app no esta compilada con el CUDA toolkit, `cuda_runtime_version` sera `""`. Para detection en runtime del toolkit del sistema, usar `cuda_toolkit_check_bash_infra`.
 ## Requisito de contexto GL
 Llamar siempre despues de crear el contexto GL. En apps que usan `fn::run_app`, el contexto esta activo desde el primer frame del `render()` callback. En Windows, `fn::gfx::gl_loader_init()` debe haberse llamado antes para que los punteros de funcion esten resueltos.
 ## Uso previsto (fn doctor cpp-apps)
 Esta funcion sera invocada por el audit de `fn doctor cpp-apps` para verificar que las apps C++ del registry tienen acceso a compute shaders cuando declaran dependencias de `gpu_compute_program`, `gpu_dispatch`, etc.
 ## CMakeLists.txt
 ```cmake
 add_imgui_app(mi_app
    main.cpp
    ${CMAKE_SOURCE_DIR}/cpp/functions/gfx/gpu_check.cpp
 )
 # CUDA opcional: si la app compila con CUDA toolkit el header cuda_runtime.h
 # estara disponible y FN_HAS_CUDA_RUNTIME se activara automaticamente.
 ```
@@ -0,0 +1,20 @@
 ---
 name: AggFn
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: sum
 definition: |
  enum class AggFn {
      Count, Sum, Avg, Min, Max, Distinct, Stddev,
      Median, P25, P75, P90, P99, Percentile
  };
 description: "Funcion de agregacion soportada. Pickup via UI combo + SQL emit via tql_to_sql. Percentile usa Aggregation.arg en [0,1]."
 tags: [tql, aggregation, sum-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Mapeo SQL DuckDB: Count → `COUNT(*)`, Sum/Avg/Min/Max/Stddev → ops nativas, Distinct → `COUNT(DISTINCT col)`, Median/P25/P75/P90/P99/Percentile → `quantile_cont(col, p)`.
@@ -0,0 +1,22 @@
 ---
 name: Aggregation
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct Aggregation {
      AggFn       fn;
      std::string col;
      double      arg;
      std::string alias;
  };
 description: "Funcion de agregacion en Stage 1+. fn = Count/Sum/Avg/Min/Max/Distinct/Stddev/Median/P25/P75/P90/P99/Percentile. arg = parametro (p para percentile)."
 tags: [tql, aggregation, agg, product-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 `alias` vacio dispara `aggregation_alias(a)` auto: `count`, `sum_<col>`, `distinct_<col>`, `p95_<col>` etc. SQL mapping en `tql_to_sql`: `COUNT(*)`, `SUM("col")`, `quantile_cont("col", p)`.
@@ -0,0 +1,21 @@
 ---
 name: ColorRule
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct ColorRule {
      int          col;
      std::string  equals;
      unsigned int color;
  };
 description: "Regla de pintado condicional para tabla UI. Si cells[row][col] == equals, fondo = color (RGBA packed)."
 tags: [tql, color, ui-hint, product-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Solo afecta render visual. Round-trip en TQL via `columns.<name>.color_rules`. Vacio = sin color override.
@@ -0,0 +1,28 @@
 ---
 name: ColumnType
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: sum
 definition: |
  enum class ColumnType {
      Auto, String, Int, Float, Bool, Date, Json
  };
 description: "Tipo de columna del modelo TQL. `Auto` dispara auto-detect; el resto fuerza el tipo declarado. Base de toda la pipeline data_table."
 tags: [tql, data-table, types, sum-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Sum type / enum-class. Convivimos con `effective_type()` que resuelve `Auto` → auto-detect via sample. El resto fuerza el tipo declarado por el caller.
 Tabla de iconos UTF-8 Tabler para cada variante en `column_type_icon(t)`. Mapeo SQL ↔ ColumnType en `tql_to_sql` (issue 0080).
 ## Usado por
 - `compute_stage_cpp_core` — input/output types per stage
 - `tql_emit_cpp_core` / `tql_apply_cpp_core` — emit/parse TQL columns block
 - `tql_to_sql_cpp_core` — mapping a SQL DuckDB types
 - `data_table_cpp_viz` — UI render por columna
@@ -0,0 +1,19 @@
 ---
 name: DateGranularity
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: sum
 definition: |
  enum class DateGranularity { None, Year, Month, Week, Day, Hour };
 description: "Granularidad de truncado de fechas para breakouts TQL. Sufijo `:token` en breakout string (ej. 'ts:month')."
 tags: [tql, date, granularity, sum-type, mbql]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Auto-detect via `auto_date_granularity(min_ymd, max_ymd)`: >2y→Year, >60d→Month, >14d→Week, resto→Day. SQL emit DuckDB: `date_trunc('month'|'year'|...,col)`.
 Week trunca a lunes ISO (Hinnant algo).
@@ -0,0 +1,26 @@
 ---
 name: DerivedColumn
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct DerivedColumn {
      int         source_col;
      ColumnType  type;
      std::string name;
      std::string formula;
      int         lua_id;
      std::string compile_error;
  };
 description: "Col custom dentro de un Stage. Modo 1: retipo (source_col >= 0, formula vacia). Modo 2: formula Lua (source_col == -1, eval por lua_engine sandbox)."
 tags: [tql, derived, formula, lua, product-type]
 uses_types: [ColumnType_cpp_core]
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 `formula` evaluada por row via `lua_engine` con `[col]` refs disponibles. Para SQL transpile (fase 11), formula debe estar dentro del Lua subset; sino `tql_to_sql` emite warning + skip col.
 `lua_id` cachea la formula compilada en lua_engine entre eval calls.
@@ -0,0 +1,30 @@
 ---
 name: Filter
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct Filter {
      int         col;
      Op          op;
      std::string value;
  };
 description: "Predicado TQL: col idx + Op + value. Aplicado dentro de un Stage por compute_stage. col es idx en headers efectivos del INPUT del stage."
 tags: [tql, filter, predicate, product-type]
 uses_types: [Op_cpp_core]
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 `col` es indice en `in_headers` del stage donde aplica (no en el dataset original — esto cambio en el refactor a stages). Para drill-down usar `make_drill_filter(col_idx, value)`.
 `value` es string siempre — `compare()` decide numerico vs lexical segun parseo. Range filters (op_in_range, op_between) no estan modelados; usar dos Filters consecutivos.
 ## Usado por
 - `Stage_cpp_core` (lista de filters)
 - `apply_filters`, `compute_stage_cpp_core`
 - `make_drill_filter`, `build_preset_filters`
 - `tql_to_sql_cpp_core` → SQL WHERE clauses con `?` placeholders
@@ -0,0 +1,25 @@
 ---
 name: Join
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct Join {
      std::string                                          alias;
      std::string                                          source;
      std::vector<std::pair<std::string, std::string>>     on;
      JoinStrategy                                         strategy;
      std::vector<std::string>                             fields;
  };
 description: "Join MBQL-style entre main_t y source. on = pares {left_col, right_col} multi-key. strategy = Left/Inner/Right/Full. fields vacio = all cols del derecho."
 tags: [tql, join, mbql, product-type]
 uses_types: [JoinStrategy_cpp_core]
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Materializado por `join_tables_cpp_core` antes de stages[0]. Cols del derecho se prefijan con `alias.col` para preservar headers del main. SQL emit: `LEFT/INNER/RIGHT/FULL OUTER JOIN source AS alias ON main.l = alias.r AND ...`.
 Multi-key: `on = {{l1,r1}, {l2,r2}}` → `ON main.l1 = alias.r1 AND main.l2 = alias.r2`.
@@ -0,0 +1,17 @@
 ---
 name: JoinStrategy
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: sum
 definition: |
  enum class JoinStrategy { Left, Inner, Right, Full };
 description: "Estrategia de join MBQL-style. 4 variantes estandar SQL. SQL mapping directo a LEFT/INNER/RIGHT/FULL OUTER JOIN."
 tags: [tql, join, strategy, sum-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Round-trip TQL: tokens `"left"/"inner"/"right"/"full"`. Fallback parse "nope" → Left.
@@ -0,0 +1,36 @@
 ---
 name: Op
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: sum
 definition: |
  enum class Op {
      Eq, Neq, Gt, Gte, Lt, Lte,
      Contains, NotContains, StartsWith, EndsWith
  };
 description: "Operador de filtro TQL. 6 ops de comparacion + 4 ops de string. Numericos ordenan numericamente cuando ambos lados parsean."
 tags: [tql, filter, operator, sum-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Tabla operadores permitidos por `ColumnType` via `ops_for_type(t)`:
 | Tipo | Ops |
 |---|---|
 | Int / Float / Date | Eq, Neq, Gt, Gte, Lt, Lte |
 | Bool | Eq, Neq |
 | Json | Eq, Neq, Contains, NotContains |
 | String | Eq, Neq, Contains, NotContains, StartsWith, EndsWith |
 Mapeo SQL en `tql_to_sql_cpp_core`: Contains → `LIKE '%v%'`, StartsWith → `LIKE 'v%'`, etc.
 ## Usado por
 - `Filter_cpp_core`
 - `compute_stage_cpp_core` (via apply_filters)
 - `tql_emit_cpp_core` / `tql_apply_cpp_core`
 - `tql_to_sql_cpp_core`
@@ -0,0 +1,20 @@
 ---
 name: SortClause
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct SortClause {
      std::string col;
      bool        desc;
  };
 description: "Clausula de orden por nombre de col. Multi-sort = vector ordenado por prioridad. desc=true para descendente."
 tags: [tql, sort, order, product-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Sort por nombre (no idx) — sobrevive a renombrado de cols + a stages 1+ donde idx no aplica. Aplicacion via `apply_sorts`. Round-trip TQL: `sort = { {"asc"|"desc", "col"}, ... }`.
@@ -0,0 +1,33 @@
 ---
 name: Stage
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct Stage {
      std::vector<Filter>          filters;
      std::vector<DerivedColumn>   derived;
      std::vector<std::string>     breakouts;
      std::vector<Aggregation>     aggregations;
      std::vector<SortClause>      sorts;
  };
 description: "Layer del pipeline TQL. Stage 0 = Raw (filters + derived + sort). Stage 1+ pueden agrupar (breakouts + aggregations + sort). Consumida por compute_stage."
 tags: [tql, stage, pipeline, product-type, mbql]
 uses_types: [Filter_cpp_core, Op_cpp_core]
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Inspirado en MBQL `:filter` / `:breakout` / `:aggregation` / `:order-by`. Diferencia clave: TQL chain N stages explicitos, cada uno consume el output del anterior. MBQL usa `:source-query` recursivo.
 Breakout strings pueden llevar sufijo `:granularity` para cols Date (fase 10): `"ts:month"`, `"ts:week"`, etc. Ver `parse_breakout_granularity()`.
 ## Usado por
 - `State_cpp_core` (lista de stages)
 - `compute_stage_cpp_core` (executes a single Stage)
 - `compute_pipeline_cpp_core` (chains stages 0..N)
 - `tql_emit_cpp_core` / `tql_apply_cpp_core` (round-trip Lua)
 - `tql_to_sql_cpp_core` → CTE chain DuckDB
@@ -0,0 +1,26 @@
 ---
 name: StageOutput
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct StageOutput {
      std::vector<std::string>  cell_backing;
      std::vector<const char*>  cells;
      int                       rows;
      int                       cols;
      std::vector<std::string>  headers;
      std::vector<ColumnType>   types;
  };
 description: "Output materializado de compute_stage. cell_backing posee strings nuevos (aggregations); cells es row-major de ptrs a backing o a in_cells original."
 tags: [tql, stage, output, product-type]
 uses_types: [ColumnType_cpp_core]
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Lifetime: cell_backing es owner — cells solo es valido mientras StageOutput viva. Para passthrough (sin agregaciones), cells apunta a in_cells del caller (sin backing local).
 Reservar capacidad upfront en cell_backing evita realloc que invalida punteros.
@@ -0,0 +1,40 @@
 ---
 name: State
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct State {
      std::vector<Stage>     stages;
      int                    active_stage;
      ViewMode               display;
      ViewConfig             viz_config;
      std::vector<VizPanel>  extra_panels;
      std::vector<Join>      joins;
      std::string            main_source;
      std::vector<ColorRule> color_rules;
      std::vector<bool>      col_visible;
      std::vector<int>       col_order;
  };
 description: "Estado completo de una query TQL: pipeline de stages + joins + viz config + UI tweaks. Round-trip a Lua via tql_emit/tql_apply."
 tags: [tql, state, pipeline, product-type]
 uses_types: [Stage_cpp_core, Filter_cpp_core, Op_cpp_core]
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 State es el documento canonico de una query del usuario. Atomico — toda mutacion pasa por helpers pure (`apply_drill_step`, `drill_up`, etc.).
 `active_stage` = idx del stage cuyo output se renderiza. Filters/sorts del Raw siempre se aplican antes; joins se materializan ANTES de stages[0].
 Helpers `raw()`, `active()` garantizan `stages[0]` existe (lazy init en `ensure_stage0`).
 ## Usado por
 - `data_table_cpp_viz` (UI render principal)
 - `compute_pipeline_cpp_core` (resuelve hasta active_stage)
 - `tql_emit_cpp_core` / `tql_apply_cpp_core` (Lua serializacion)
 - `tql_to_sql_cpp_core` → SQL DuckDB CTE chain
 - `apply_drill_step` / `undo_drill_step` / `drill_up`
@@ -0,0 +1,33 @@
 ---
 name: TableInput
 lang: cpp
 domain: core
 version: "1.0.0"
 algebraic: product
 definition: |
  struct TableInput {
      std::string               name;
      std::vector<std::string>  headers;
      std::vector<ColumnType>   types;
      const char* const*        cells;
      int                       rows;
      int                       cols;
  };
 description: "Tabla materializada en memoria pasada a data_table::render(). Owner externo. Multiple tables = main + joinables (fase 9 issue 0078)."
 tags: [tql, table, joins, mbql, product-type]
 uses_types: [Op_cpp_core]
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 `name` es el identificador estable que matchea `Join.source` cuando se aplica un join. `cells` es row-major (rows * cols `const char*`). Apuntadores estables durante todo el frame de render.
 Cells son strings — auto_detect_type infiere ColumnType si `types[i] == Auto`. Numericos se parsean por celda en compare/agg via `parse_number()`.
 ## Usado por
 - `data_table_cpp_viz::render(tables, state)`
 - `resolve_main_idx` (matchea state.main_source)
 - `join_tables_cpp_core` (right table)
 - `tql_to_sql_cpp_core` (schema para emitir SELECT FROM `name`)
@@ -0,0 +1,29 @@
 ---
 name: ViewConfig
 lang: cpp
 domain: viz
 version: "1.0.0"
 algebraic: product
 definition: |
  struct ViewConfig {
      std::string                 x_col;
      std::vector<std::string>    y_cols;
      std::string                 size_col;
      std::string                 cat_col;
      unsigned int                primary_color;
      int                         hist_bins;
      float                       pie_radius;
      bool                        show_legend;
      bool                        show_markers;
      bool                        locked;
      mutable bool                fit_request;
  };
 description: "Overrides manuales de auto-detect para ViewMode. Cols vacias dejan al dispatcher elegir. primary_color=0 usa palette ImPlot."
 tags: [tql, viz, config, product-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 `fit_request` mutable bool consumido por `viz::render` (one-shot trigger para `ImPlot::SetNextAxesToFit`). `locked` deshabilita pan/zoom del usuario.
@@ -0,0 +1,29 @@
 ---
 name: ViewMode
 lang: cpp
 domain: viz
 version: "1.0.0"
 algebraic: sum
 definition: |
  enum class ViewMode {
      Table,
      Bar, Column, GroupedBar, StackedBar,
      Line, Area, Stairs,
      Scatter, Bubble,
      Histogram, Histogram2D, Heatmap, BoxPlot,
      Stem, ErrorBars,
      Pie, Donut, Funnel, Waterfall,
      KPI, KPIGrid,
      Candlestick, Radar
  };
 description: "Modo de visualizacion ImPlot del stage activo. ~25 variantes cubriendo bars/lines/distribution/composition/specialized. Dispatcher en viz::render."
 tags: [tql, viz, imgui, implot, sum-type]
 uses_types: []
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 Tokens lowercase via `view_mode_token`/`view_mode_from_token` para TQL emit/apply. Helpers `view_mode_needs_numeric/category/aggregation` guían UI (combo selectable solo si schema compatible).
 `Table` siempre disponible (fallback render por defecto). Demas requieren al menos cols compatibles. Click-to-drill (fase 10): Bar/Column/Scatter/Bubble/Pie/Donut/Funnel/Heatmap.
@@ -0,0 +1,21 @@
 ---
 name: VizPanel
 lang: cpp
 domain: viz
 version: "1.0.0"
 algebraic: product
 definition: |
  struct VizPanel {
      ViewMode    display;
      ViewConfig  config;
      mutable ViewMode last_non_table;
  };
 description: "Viz adicional sobre el mismo StageOutput. State tiene panel principal (display+viz_config) + vector<VizPanel> extras."
 tags: [tql, viz, panel, product-type]
 uses_types: [ViewMode_cpp_viz, ViewConfig_cpp_viz]
 file_path: "cpp/functions/core/data_table_types.h"
 ---
 ## Notas
 `last_non_table` memoria del ultimo display !=Table para toggle Table↔View rapido en UI. Mutable porque se actualiza durante render (no rompe const correctness).
@@ -1,9 +1,10 @@
 ---
 id: 0078
 title: tables playground — joins MBQL-style (fase 9)
-status: pending
+status: done
 priority: medium
 created: 2026-05-12
 closed: 2026-05-12
 related_components: [cpp/apps/primitives_gallery/playground/tables, lua_engine, tql]
 ---
@@ -1,9 +1,10 @@
 ---
 id: 0079
 title: tables playground — drill-through extendido (fase 10)
-status: pending
+status: done
 priority: medium
 created: 2026-05-12
 closed: 2026-05-12
 related_components: [cpp/apps/primitives_gallery/playground/tables]
 ---
@@ -1,77 +1,238 @@
 ---
 id: 0080
-title: tables playground — LLM API "Ask AI" (fase 11)
+title: tables playground — LLM "Ask AI" + TQL/SQL emit (fase 11)
-status: pending
+status: partial
 priority: medium
 created: 2026-05-12
-related_components: [cpp/apps/primitives_gallery/playground/tables]
+updated: 2026-05-13
 notes: pure layer + LLM client + Ask AI modal DONE. DuckDB adapter v2 (opcional, build flag FN_TQL_DUCKDB=1)
 related_components: [cpp/apps/primitives_gallery/playground/tables, lua_engine, tql, duckdb]
 ---
 ## Contexto
-Fase 11 del roadmap del tables playground. El user escribe en lenguaje natural
+Fase 11 del roadmap del tables playground. Dos capacidades que se construyen juntas porque comparten infra (prompt schema, runtime adapter, tests round-trip):
-una pregunta sobre los datos ("show me top 10 langs by total size"). El LLM
+
-recibe el TQL actual + schema + pregunta, devuelve nuevo TQL. App aplica via
+1. **LLM "Ask AI"** — usuario o agente pregunta en lenguaje natural, modelo devuelve un nuevo TQL (o SQL DuckDB si esta linkado).
-`tql::apply` y renderiza.
+2. **TQL → SQL (DuckDB) emitter** — permite a agentes escribir SQL contra el mismo modelo de datos. Ejecutable si la app linkó DuckDB; si no, solo emite el string.
 Diseño one-way: **TQL → SQL si**, **SQL → TQL no**. Razon documentada en investigacion Metabase MBQL ↔ SQL: la traduccion inversa es lossy (CTEs, window fns, set ops, lateral, correlated subqueries no caben en MBQL/TQL). Patron canonico Malloy/Cube/LookML/Metabase = compile-down one-way.
 ## Cambios
-### 1. UI
+### 1. UI "Ask AI"
 - Boton "Ask AI" en toolbar (al lado de "+ Viz").
- Modal con:
+- Modal:
  - InputText multiline para la pregunta.
-  - Boton "Send" + spinner durante la llamada.
+  - Toggle output mode: `TQL` (default) | `SQL (DuckDB)` (visible solo si app fue compilada con `FN_TQL_DUCKDB=1`).
-  - Diff side-by-side: TQL actual vs TQL propuesto (texto con highlight).
+  - Boton "Send" + spinner.
  - Diff side-by-side: actual vs propuesto (texto highlight).
  - Botones "Apply" / "Reject" / "Edit before apply".
 ### 2. Backend LLM
- Provider: Anthropic Claude (API key desde `pass anthropic/api-key`).
+- Provider: Anthropic Claude. API key via `pass anthropic/api-key`.
- Endpoint: `https://api.anthropic.com/v1/messages`.
+- Endpoint: `https://api.anthropic.com/v1/messages`. Model: `claude-sonnet-4-6`. Override env `FN_LLM_MODEL`.
- Model: `claude-sonnet-4-6` por defecto. Configurable via env `FN_LLM_MODEL`.
+- Cliente HTTP: cURL via popen (sin deps nuevas).
 - Cliente HTTP: cURL via popen (sin deps nuevas) o libcurl si ya esta linkada.
 - Prompt template incluye:
  - Esquema TQL (de `docs/TQL.md`).
  - **Si SQL mode**: dialecto DuckDB + funciones DuckDB relevantes (date_trunc, regexp_replace, etc.).
  - Cols disponibles del stage 0 (name, type) + cols joinables.
  - **Grammar Lua subset** (ver §4) cuando aplique.
  - Funciones Lua disponibles (de `lua_engine`).
  - TQL actual.
  - Pregunta del user.
- Response: extraer ```lua``` block del markdown, strip prose.
+- Response: extraer ```lua``` (TQL) o ```sql``` block del markdown, strip prose.
-### 3. Validacion + safety
+### 3. TQL → SQL DuckDB emitter
- Antes de aplicar: `tql::apply` con dry-run (parsea sin mutar State). Si fail, mostrar error + boton "Ask AI again with this error".
+Nuevo modulo `tql_to_sql.{h,cpp}` (pure). Funciones:
 - Lua sandbox ya cubre side effects en formulas — el TQL en si es declarativo, no ejecuta nada peligroso.
-### 4. Streaming
+```cpp
 struct SqlEmit {
    std::string sql;            // SELECT ... statement
    std::vector<std::string> params;  // bound values (?-placeholders)
    std::vector<std::string> warnings;
    std::string error;          // si emit fallo (subset out of bounds)
 };
- Stream tokens via SSE (`stream=true` en Anthropic API).
+// Pure: emite SQL DuckDB equivalente a la pipeline State (stages 0..active).
- Mostrar texto en vivo en el modal.
+// `tables` provee el schema de cada TableInput (no los cells — el caller
- Cuando termina, parsear lua block final.
+// decide como hidratar las tablas en DuckDB).
 SqlEmit emit_sql(const State& state, const std::vector<TableInput>& tables,
                  int up_to_stage = -1 /* default = active_stage */);
 ```
-### 5. Persistencia conversation
+Mapeo MBQL-style:
 - Stage 0 = CTE base `t0` con `SELECT cols + derived FROM main_t [LEFT/INNER/RIGHT/FULL JOIN joinables ON ...]`.
 - Stage N = CTE `tN` con `SELECT breakouts, aggregations FROM tN-1 [WHERE filters] [GROUP BY breakouts] [ORDER BY sorts]`.
 - Final query `SELECT * FROM t<active>`.
- UiState guarda lista de turns (pregunta + TQL propuesto + resultado apply).
+Stage emit detalle:
- "Ask AI" siguiente turn incluye history previa.
+- `filter Op::Eq col = "v"` → `WHERE col = ?` con `params.push_back(v)` (DuckDB acepta `$1`/`?`).
- Boton "Reset chat" limpia.
+- `breakout "ts:month"` → `date_trunc('month', ts) AS "ts:month"`. Granularity sufijo → DuckDB `date_trunc`.
- NO persistido en TQL (es UI state).
+- `aggregation count` → `COUNT(*) AS count`.
 - `aggregation p95(col)` → `quantile_cont(col, 0.95) AS p95_col`.
 - `aggregation distinct col` → `COUNT(DISTINCT col) AS distinct_col`.
 - `sort {desc, col}` → `ORDER BY col DESC`.
 - Joins: 4 strategies mapean directo a `LEFT/INNER/RIGHT/FULL JOIN ... ON l.k = r.k`.
 - Derived cols: transpiladas via Lua subset (§4). Si formula fuera de subset → `SqlEmit.error = "lua formula 'X' out of subset: <razon>"`.
-### 6. Coste / rate limit
+Salida es **string SQL valido DuckDB**. No ejecuta — eso es responsabilidad del adapter opcional (§5).
 ### 4. Lua subset transpilable a SQL — GRAMATICA
 Documentar en `docs/TQL.md` seccion nueva "SQL transpile subset".
 **Reglas duras: Lua sigue siendo potente y sin limites en runtime general.** El subset solo aplica si el caller pide `tql_to_sql::emit_sql()`. Fuera del subset → error claro en tiempo de emit, NO en tiempo de eval. El playground sigue ejecutando Lua arbitrario sin restriccion.
 **Subset permitido (transpila a SQL):**
 | Lua | SQL DuckDB |
 |---|---|
 | Literales: numero, string `"x"`, bool `true/false`, `nil` | `1.5`, `'x'`, `TRUE/FALSE`, `NULL` |
 | Col ref: `[colname]` | `colname` (identifier quoted si necesario) |
 | Aritmetica: `+ - * / % - (unary)` | mismas |
 | Comparacion: `== ~= < <= > >=` | `= <> < <= > >=` |
 | Logica: `and or not` | `AND OR NOT` |
 | String concat: `..` | `\|\|` |
 | Ternary: `if A then B else C end` | `CASE WHEN A THEN B ELSE C END` |
 | Ternary inline: `(A and B) or C` (pattern comun Lua) | `CASE WHEN A THEN B ELSE C END` |
 | `math.floor/ceil/abs/round/sqrt/sin/cos/log` | `floor/ceiling/abs/round/sqrt/sin/cos/ln` |
 | `math.min(a,b)/max(a,b)` | `least(a,b)/greatest(a,b)` |
 | `string.upper/lower/len(s)` | `upper(s)/lower(s)/length(s)` |
 | `string.sub(s, i, j)` | `substring(s, i, j-i+1)` |
 | `tostring(x)/tonumber(x)` | `CAST(x AS VARCHAR)/CAST(x AS DOUBLE)` |
 | Paréntesis y precedencia | mismas |
 **Fuera de subset (error compile-time):**
 - Closures: `function() ... end`
 - Loops: `for/while/repeat`
 - Locals: `local x = ...`
 - Tables: `{...}`, `t[k]`, `t.field`, `table.*`
 - Multi-return / vararg
 - `string.gsub/find/match/format` (mapeo manual posible v2)
 - IO: `io.*`, `os.*`, `print`
 - Coroutines, metatables, debug
 - Recursion, multi-statement bodies
 **Error message ejemplo:**
 ```
 SQL transpile error en derived col 'fullname':
  formula = "[first] .. ' ' .. table.concat(parts, ',')"
  causa: 'table.concat' no esta en SQL transpile subset
  ver docs/TQL.md#sql-transpile-subset
  workaround: usar TQL puro (sin SQL emit) o reescribir formula con `..`
 ```
 **Helper:** `tql_to_sql::is_transpilable(formula, error_out)` pure fn que valida una formula sin emitir.
 ### 5. DuckDB adapter (opcional)
 Build flag `FN_TQL_DUCKDB=1` en `cpp/CMakeLists.txt` opta-in. Vendor DuckDB header-only o lib (depende de tamaño). Default OFF — playground sigue compilando sin DuckDB.
 API adapter:
 ```cpp
 namespace tql_duckdb {
 struct Result {
    StageOutput out;        // materializado como TableInput compatible
    std::string error;
    double duration_ms = 0;
 };
 // Hidrata `tables` como views temp + ejecuta sql + materializa resultado.
 Result execute(const std::string& sql,
                const std::vector<std::string>& params,
                const std::vector<TableInput>& tables);
 }
 ```
 Apps que lo usen (registry_dashboard, sqlite_api): linkean DuckDB + invocan adapter cuando user/agent pide SQL output. Playground por defecto NO linka — `Ask AI` solo ofrece SQL mode si `#ifdef FN_TQL_DUCKDB`.
 ### 6. Validacion + safety
 - Antes de aplicar TQL del LLM: `tql::apply` dry-run. Si fail, mostrar error + "Ask AI again with this error".
 - Antes de ejecutar SQL del LLM: parsing DuckDB en sandbox read-only (DuckDB connection sin `INSERT/UPDATE/DELETE/DROP`, attach read-only).
 - Lua sandbox ya cubre side effects en formulas TQL.
 ### 7. Streaming
 - Stream tokens via SSE (`stream=true` Anthropic).
 - Texto en vivo en modal.
 - Cuando termina, parse lua/sql block final.
 ### 8. Persistencia conversacion
 - UiState guarda lista de turns (pregunta + output propuesto + apply result + engine usado TQL/SQL).
 - Siguiente "Ask AI" turn incluye history previa.
 - Boton "Reset chat".
 - NO persistido en TQL (UI state efimero).
 ### 9. Coste / rate limit
 - Mostrar tokens estimados antes de enviar (rough char count / 4).
 - Cap input a 8000 tokens.
- Error handling: 429 / 5xx -> mensaje + reintentar.
+- Error handling: 429 / 5xx → mensaje + reintentar.
 ## Tests
- Mockear HTTP response con cURL stub.
+### Pure (sin red, sin DuckDB linkado)
- Test: prompt build incluye schema + TQL + pregunta en formato esperado.
+
- Test: response parse extrae lua block correctamente.
+- **Lua subset validator:** `is_transpilable` true para casos subset, false con error claro para fuera de subset (closures, loops, table.*, string.gsub, etc.).
- Test: tql::apply sobre output del LLM funciona end-to-end con dataset sintetico.
+- **TQL → SQL emit golden tests** (~20 casos):
  - stage 0 simple filter + sort → `SELECT ... WHERE ... ORDER BY ...`
  - stage 1 group + count → CTE chain con GROUP BY
  - granularity sufijo `:month` → `date_trunc('month', ts)`
  - join 4 strategies con multi-key
  - derived cols subset → CASE/expressions
  - derived cols fuera subset → `SqlEmit.error` no vacio + warning
  - aggregation p25/p50/p75/p99 → `quantile_cont(col, p)`
  - empty pipeline → `SELECT * FROM t0`
 - **TQL parseo:** prompt build incluye schema + TQL + pregunta en formato esperado (mockear HTTP).
 - **Response parse:** extrae lua/sql block correctamente.
 ### Round-trip (requiere DuckDB linkado)
 Solo corren si `FN_TQL_DUCKDB=1`:
 - TQL → emit SQL → ejecutar DuckDB → resultado coincide bit-a-bit con `compute_stage` pure sobre los mismos cells.
 - Casos: filter, group+agg, join inner, multi-stage chain, breakout granularity month/week, derived col `[a] + [b] * 2`.
 ### LLM (red real, opt-in)
 - Test integration con `ANTHROPIC_API_KEY` real (`make test-llm`): pregunta simple → recibe TQL valido → apply OK.
 - Mock test (CI): cURL stub responde con JSON predefinido → parser extrae bloque OK.
 ## No-objetivos
- Generacion de visualizaciones nuevas via LLM (la viz la elige TQL `display`, suficiente).
+- **SQL → TQL**: no se implementa. Documentado en doc + en mensajes de error del Ask AI ("no soportamos SQL como input, use TQL").
- Acciones del LLM mas alla de modificar TQL (sin acceso a I/O del sistema).
+- **Multi-provider** (OpenAI, local): fase futura. Anthropic hardcoded v1.
- Multi-provider (OpenAI / local) — fase futura. Hardcode Anthropic primero.
+- **Generacion de viz desde LLM** mas alla de `display` token: la viz la elige TQL existente.
 - **Lua subset extension** (string.gsub, regex, table.*): postpone v2 si demanda real.
 - **DuckDB write ops**: solo SELECT/CTE. Apps que quieran INSERT/UPDATE lo hacen fuera del playground.
 ## Flujo agente (resumen)
 ```
 Agente -> "muestrame top 10 langs por total size"
 LLM (TQL default) -> emite TQL { stages = {...} }
 tql::apply -> State + dry-run OK
 User clickea Apply -> compute_stage en memoria
 Agente -> "lo mismo pero como SQL"
 [Si FN_TQL_DUCKDB=1 y app linkó adapter]
 LLM (SQL mode toggled) -> emite SELECT ... DuckDB
 duckdb::execute(sql, params, tables) -> resultado materializado
 [Si NO linkado] -> error "SQL mode requiere DuckDB. Compila con FN_TQL_DUCKDB=1"
 ```
 ## Riesgos
 - **Subset Lua restrictivo en SQL emit**: usuarios usan Lua arbitrario en playground → al pedir SQL falla. Mitigacion: error message claro + sugerencia workaround.
 - **DuckDB tamaño**: lib ~10MB. Solo se paga si app opta-in con build flag.
 - **Dialect drift DuckDB**: funciones SQL pueden cambiar entre versiones. Pinear DuckDB version en CMake.
 - **LLM hallucinations**: TQL invalido → dry-run rechaza con error. Loop "Ask AI again with this error" recupera.
 - **API key leak**: `pass` integration mantiene fuera del repo. Build flag NUNCA imprime key.
 - **Coste tokens**: prompt grande (schema + grammar + TQL). Cap input + warning visual.
@@ -496,3 +496,87 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
 | Multi-sort drag-reorder | Phase 4 |
 Ver `cpp/apps/primitives_gallery/playground/tables/` para la implementacion del playground.
 ---
 ## SQL transpile subset (fase 11 — issue 0080)
 TQL emite SQL DuckDB equivalente para que agentes LLM puedan generar TQL o SQL contra los mismos datos. Modulo `tql_to_sql.{h,cpp}` provee `emit_sql(State, tables)`. Mapeo MBQL-style con CTE chain `t0..tN`.
 ### Lua subset transpilable
 Lua sigue **potente y sin limites en runtime general** (formula eval en derived cols TQL puro). El subset SOLO aplica al pedir `tql_to_sql::emit_sql()`. Fuera del subset → error compile-time con causa concreta + workaround.
 **Permitido (transpila a SQL DuckDB):**
 | Lua | SQL DuckDB | Ejemplo |
 |---|---|---|
 | Literales numero/string/bool/nil | mismas (`'x'`, `TRUE`, `NULL`) | `42`, `"hola"`, `nil` |
 | Col ref: `[colname]` | `"colname"` (quoted) | `[size_kb]` → `"size_kb"` |
 | Aritmetica: `+ - * / % - (unary)` | mismas | `[a] + [b] * 2` → `("a" + ("b" * 2))` |
 | Comparacion: `== ~= < <= > >=` | `= <> < <= > >=` | `[n] >= 10` → `("n" >= 10)` |
 | Logica: `and or not` | `AND OR NOT` | `[a] and [b]` → `("a" AND "b")` |
 | String concat: `..` | `\|\|` | `[a] .. "_" .. [b]` → `("a" \|\| '_' \|\| "b")` |
 | Ternary: `if A then B else C end` | `CASE WHEN A THEN B ELSE C END` | obligatorio `else` |
 | `math.floor/ceil/abs/sqrt/sin/cos/log/exp` | `floor/ceiling/abs/sqrt/sin/cos/ln/exp` | `math.floor([x])` |
 | `math.min(a,b)/max(a,b)` | `least(a,b)/greatest(a,b)` | `math.min([a], 100)` |
 | `string.upper/lower/len(s)` | `upper(s)/lower(s)/length(s)` | `string.upper([name])` |
 | `string.sub(s, i [, j])` | `substring(s, i [, j-i+1])` | `string.sub([s], 1, 3)` |
 | `tostring(x)/tonumber(x)` | `CAST(x AS VARCHAR)/CAST(x AS DOUBLE)` | `tonumber([n])` |
 | Parentesis y precedencia Lua | mismas | `(a + b) * c` |
 **Fuera de subset (error compile-time):**
 - Closures: `function() ... end`
 - Loops: `for/while/repeat`
 - Locals: `local x = ...`
 - Tables: `{...}`, `t[k]`, `t.field`, `table.*`
 - Multi-return, vararg `...`
 - `string.gsub/find/match/format/byte/char/rep`
 - IO/OS/debug: `io.*`, `os.*`, `debug.*`, `package`, `require`, `print`
 - Coroutines, metatables, `pcall/xpcall`, `rawget/rawset`
 - Recursion, multi-statement bodies (`;`)
 - Length operator `#`
 - Method calls `:`
 - Ternary sin else: `if A then B end` (subset requiere ambas ramas)
 ### Error message ejemplo
 ```
 SQL transpile error en derived col 'fullname':
  formula = "[first] .. ' ' .. string.gsub([last], 'X', 'Y')"
  causa: function 'string.gsub' not in SQL transpile whitelist
  ver docs/TQL.md#sql-transpile-subset
  workaround: usar TQL puro (sin SQL emit) o reescribir formula
 ```
 ### Stage → SQL mapeo
 | TQL element | SQL DuckDB |
 |---|---|
 | Stage 0 Raw | CTE `t0 AS (SELECT cols+derived FROM main_t [JOIN ...] [WHERE filters] [ORDER BY sorts])` |
 | Stage N>=1 | CTE `tN AS (SELECT breakouts+aggs FROM tN-1 [GROUP BY ...] [ORDER BY ...])` |
 | breakout `"col"` | `"col"` |
 | breakout `"col:month"` | `date_trunc('month', "col")` |
 | breakout `"col:year/week/day/hour"` | `date_trunc('year/week/day/hour', "col")` |
 | Aggregation Count | `COUNT(*)` |
 | Aggregation Sum/Avg/Min/Max/Stddev | `SUM/AVG/MIN/MAX/STDDEV("col")` |
 | Aggregation Distinct | `COUNT(DISTINCT "col")` |
 | Aggregation Median/P25/P75/P90/P99 | `quantile_cont("col", p)` |
 | Aggregation Percentile p | `quantile_cont("col", p)` |
 | Filter Op::Eq/Neq/Gt/Gte/Lt/Lte | `"col" = ?` etc (params bound) |
 | Filter Op::Contains | `"col" LIKE '%v%'` (param `%v%`) |
 | Filter Op::StartsWith / EndsWith | `LIKE 'v%'` / `LIKE '%v'` |
 | Sort `{desc, "col"}` | `ORDER BY "col" DESC` |
 | Join Left/Inner/Right/Full | `LEFT/INNER/RIGHT/FULL OUTER JOIN ... ON ...` |
 | Join multi-key `on={{l1,r1},{l2,r2}}` | `ON l.l1 = r.r1 AND l.l2 = r.r2` |
 | Join fields | cols `alias.field AS "alias.field"` |
 | `main_source` | `FROM "main_source_name"` |
 ### Doctrina (Metabase-style)
 - **One-way:** TQL → SQL OK. SQL → TQL no soportado. Razon: traduccion inversa lossy (CTEs, window fns, set ops, lateral, correlated subqueries no caben en TQL).
 - **Output:** SQL string siempre emitible. Ejecucion requiere DuckDB linkado (build flag `FN_TQL_DUCKDB=1`, opcional).
 - **Agente flow:** TQL default. SQL solo si app linko DuckDB. UI Ask AI muestra toggle SQL solo cuando disponible.
 Ver issue 0080 + `tql_to_sql.{h,cpp}` para implementacion.
@@ -0,0 +1,155 @@
 package core
 import (
 	"bufio"
 	"context"
 	"fmt"
 	"io"
 	"os"
 	"os/exec"
 	"sync"
 	"syscall"
 	"time"
 )
 // StreamEvent es una linea capturada de stdout o stderr del subproceso.
 type StreamEvent struct {
 	Stream string    // "stdout" | "stderr"
 	Line   string    // sin trailing newline
 	Time   time.Time // timestamp de recepcion
 }
 // StreamResult es el resultado final del subproceso, enviado por el canal de
 // resultados cuando ambos pipes han llegado a EOF y el proceso ha terminado.
 type StreamResult struct {
 	ExitCode   int
 	Err        error
 	DurationMs int64
 }
 // SubprocessStream lanza name con args como subproceso y retorna dos canales:
 //   - events: recibe StreamEvent (linea de stdout/stderr) hasta EOF de ambos pipes.
 //   - result: recibe exactamente un StreamResult cuando el proceso termina.
 //
 // env se concatena con os.Environ(). stdin puede ser nil.
 //
 // Cancelar ctx envia SIGTERM al proceso; si no termina en 2 segundos, SIGKILL.
 // El caller DEBE consumir events hasta que se cierre o cancelar ctx para evitar
 // bloquear las goroutines internas.
 func SubprocessStream(
 	ctx context.Context,
 	name string,
 	args []string,
 	env []string,
 	stdin io.Reader,
 ) (<-chan StreamEvent, <-chan StreamResult) {
 	events := make(chan StreamEvent, 64)
 	results := make(chan StreamResult, 1)
 	go func() {
 		defer close(events)
 		defer close(results)
 		start := time.Now()
 		cmd := exec.CommandContext(ctx, name, args...)
 		// Entorno: base + extra
 		if len(env) > 0 {
 			cmd.Env = append(os.Environ(), env...)
 		}
 		if stdin != nil {
 			cmd.Stdin = stdin
 		}
 		// Process group propio para matar hijos al recibir SIGTERM/SIGKILL
 		cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 		stdoutPipe, err := cmd.StdoutPipe()
 		if err != nil {
 			results <- StreamResult{ExitCode: -1, Err: fmt.Errorf("stdout pipe: %w", err), DurationMs: 0}
 			return
 		}
 		stderrPipe, err := cmd.StderrPipe()
 		if err != nil {
 			results <- StreamResult{ExitCode: -1, Err: fmt.Errorf("stderr pipe: %w", err), DurationMs: 0}
 			return
 		}
 		if err := cmd.Start(); err != nil {
 			results <- StreamResult{ExitCode: -1, Err: fmt.Errorf("start: %w", err), DurationMs: 0}
 			return
 		}
 		// Goroutine de supervision de ctx: SIGTERM → grace 2s → SIGKILL
 		ctxDone := make(chan struct{})
 		go func() {
 			select {
 			case <-ctx.Done():
 				if cmd.Process != nil {
 					_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGTERM)
 					timer := time.NewTimer(2 * time.Second)
 					defer timer.Stop()
 					select {
 					case <-timer.C:
 						_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
 					case <-ctxDone:
 					}
 				}
 			case <-ctxDone:
 			}
 		}()
 		send := func(stream, line string) {
 			ev := StreamEvent{Stream: stream, Line: line, Time: time.Now()}
 			select {
 			case events <- ev:
 			case <-ctx.Done():
 			}
 		}
 		// Leer stdout y stderr concurrentemente
 		const bufSize = 1024 * 1024 // 1 MB para lineas largas (sd-cli progress, etc.)
 		var wg sync.WaitGroup
 		scanPipe := func(r io.Reader, stream string) {
 			defer wg.Done()
 			sc := bufio.NewScanner(r)
 			sc.Buffer(make([]byte, bufSize), bufSize)
 			for sc.Scan() {
 				send(stream, sc.Text())
 			}
 		}
 		wg.Add(2)
 		go scanPipe(stdoutPipe, "stdout")
 		go scanPipe(stderrPipe, "stderr")
 		wg.Wait()
 		close(ctxDone) // señal al supervisor de ctx para que pare
 		exitCode := 0
 		var waitErr error
 		if err := cmd.Wait(); err != nil {
 			waitErr = err
 			if exitErr, ok := err.(*exec.ExitError); ok {
 				exitCode = exitErr.ExitCode()
 				waitErr = nil // exit code no-cero no es un error de spawn
 			}
 		}
 		// Si el contexto fue cancelado, reportar como error de cancelacion
 		if ctx.Err() != nil && waitErr == nil {
 			waitErr = ctx.Err()
 		}
 		results <- StreamResult{
 			ExitCode:   exitCode,
 			Err:        waitErr,
 			DurationMs: time.Since(start).Milliseconds(),
 		}
 	}()
 	return events, results
 }
@@ -0,0 +1,69 @@
 ---
 name: subprocess_stream
 kind: function
 lang: go
 domain: core
 version: "1.0.0"
 purity: impure
 signature: "func SubprocessStream(ctx context.Context, name string, args []string, env []string, stdin io.Reader) (<-chan StreamEvent, <-chan StreamResult)"
 description: "Lanza un subproceso y retorna dos canales: uno con StreamEvent (linea de stdout/stderr con timestamp) y otro con un unico StreamResult (ExitCode, Err, DurationMs). Cancelar ctx envia SIGTERM al proceso; si no termina en 2s, SIGKILL."
 tags: [subprocess, exec, stream, stdout, stderr, process, concurrency, io, primitiva]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: [bufio, context, fmt, io, os, os/exec, sync, syscall, time]
 params:
  - name: ctx
    desc: "Contexto de cancelacion. Al cancelar, el proceso recibe SIGTERM; si no muere en 2s, SIGKILL. Usar context.WithTimeout para acotar duracion maxima."
  - name: name
    desc: "Nombre o path del ejecutable a lanzar (ej. 'echo', '/usr/bin/python3')."
  - name: args
    desc: "Argumentos del proceso. Puede ser nil o vacio."
  - name: env
    desc: "Variables de entorno adicionales en formato 'KEY=VALUE'. Se concatenan con os.Environ(). Puede ser nil."
  - name: stdin
    desc: "Stdin del proceso. Puede ser nil si el proceso no necesita entrada."
 output: "Dos canales: events (<-chan StreamEvent) cerrado cuando ambos pipes EOF; result (<-chan StreamResult) con exactamente un valor cuando el proceso termina. El caller DEBE consumir events hasta cierre o cancelar ctx para evitar bloquear goroutines internas."
 tested: true
 tests:
  - "echo stdout llega como evento y ExitCode 0"
  - "stderr llega como evento con stream stderr"
  - "exit code no-cero se reporta en StreamResult"
  - "ctx cancelado termina el proceso"
  - "multiples lineas stdout"
 test_file_path: "functions/core/subprocess_stream_test.go"
 file_path: "functions/core/subprocess_stream.go"
 ---
 ## Ejemplo
 ```go
 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 defer cancel()
 events, results := core.SubprocessStream(ctx, "grep", []string{"-rn", "TODO", "."}, nil, nil)
 for ev := range events {
    switch ev.Stream {
    case "stdout":
        fmt.Println(ev.Line)
    case "stderr":
        fmt.Fprintln(os.Stderr, "[stderr]", ev.Line)
    }
 }
 res := <-results
 if res.ExitCode != 0 || res.Err != nil {
    log.Printf("grep exit=%d err=%v duration=%dms", res.ExitCode, res.Err, res.DurationMs)
 }
 ```
 ## Notas
 - El canal `events` tiene buffer de 64. Si el caller deja de consumir y el buffer se llena, las goroutinas internas se bloquean hasta que haya espacio o el ctx sea cancelado.
 - El scanner de cada pipe tiene un buffer de 1 MB para tolerar lineas muy largas (progreso de CLIs tipo sd-cli, barras ANSI largas).
 - Los structs `StreamEvent` y `StreamResult` se declaran en el mismo archivo para que el paquete `core` los exporte sin imports adicionales.
 - Generaliza el patron de `claude_stream_go_core` desacoplando el lanzamiento de subprocesos del protocolo especifico de claude (NDJSON/stream-json). `claude_stream_go_core` puede reimplementarse internamente usando esta funcion como primitiva.
 - `cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}` crea un process group propio; SIGTERM/SIGKILL se envian con `Kill(-pgid, sig)` para matar tambien los procesos hijo del hijo.
@@ -0,0 +1,132 @@
 package core
 import (
 	"context"
 	"testing"
 	"time"
 )
 func TestSubprocessStream(t *testing.T) {
 	t.Run("echo stdout llega como evento y ExitCode 0", func(t *testing.T) {
 		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		defer cancel()
 		events, results := SubprocessStream(ctx, "echo", []string{"hola"}, nil, nil)
 		var got []StreamEvent
 		for ev := range events {
 			got = append(got, ev)
 		}
 		res := <-results
 		if res.ExitCode != 0 {
 			t.Errorf("ExitCode = %d, want 0 (err: %v)", res.ExitCode, res.Err)
 		}
 		if res.Err != nil {
 			t.Errorf("unexpected Err: %v", res.Err)
 		}
 		if len(got) != 1 {
 			t.Fatalf("got %d events, want 1", len(got))
 		}
 		if got[0].Stream != "stdout" {
 			t.Errorf("Stream = %q, want %q", got[0].Stream, "stdout")
 		}
 		if got[0].Line != "hola" {
 			t.Errorf("Line = %q, want %q", got[0].Line, "hola")
 		}
 	})
 	t.Run("stderr llega como evento con stream stderr", func(t *testing.T) {
 		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		defer cancel()
 		// sh -c "echo msg >&2" escribe a stderr
 		events, results := SubprocessStream(ctx, "sh", []string{"-c", "echo error_msg >&2"}, nil, nil)
 		var got []StreamEvent
 		for ev := range events {
 			got = append(got, ev)
 		}
 		res := <-results
 		if res.ExitCode != 0 {
 			t.Errorf("ExitCode = %d, want 0", res.ExitCode)
 		}
 		if len(got) != 1 {
 			t.Fatalf("got %d events, want 1", len(got))
 		}
 		if got[0].Stream != "stderr" {
 			t.Errorf("Stream = %q, want %q", got[0].Stream, "stderr")
 		}
 		if got[0].Line != "error_msg" {
 			t.Errorf("Line = %q, want %q", got[0].Line, "error_msg")
 		}
 	})
 	t.Run("exit code no-cero se reporta en StreamResult", func(t *testing.T) {
 		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		defer cancel()
 		events, results := SubprocessStream(ctx, "sh", []string{"-c", "exit 42"}, nil, nil)
 		for range events {
 		}
 		res := <-results
 		if res.ExitCode != 42 {
 			t.Errorf("ExitCode = %d, want 42", res.ExitCode)
 		}
 		if res.Err != nil {
 			t.Errorf("unexpected Err: %v", res.Err)
 		}
 	})
 	t.Run("ctx cancelado termina el proceso", func(t *testing.T) {
 		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		defer cancel()
 		// proceso que dura mucho; cancelamos enseguida
 		ctxShort, cancelShort := context.WithTimeout(ctx, 100*time.Millisecond)
 		defer cancelShort()
 		events, results := SubprocessStream(ctxShort, "sleep", []string{"60"}, nil, nil)
 		for range events {
 		}
 		res := <-results
 		// Tras cancelacion el proceso debe haber terminado (ExitCode != 0 o Err de ctx)
 		if res.ExitCode == 0 && res.Err == nil {
 			t.Error("expected non-zero exit or ctx error after cancellation")
 		}
 		if res.DurationMs > 3000 {
 			t.Errorf("took %d ms, expected < 3000 (should have been killed)", res.DurationMs)
 		}
 	})
 	t.Run("multiples lineas stdout", func(t *testing.T) {
 		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		defer cancel()
 		events, results := SubprocessStream(ctx, "sh", []string{"-c", "printf 'a\nb\nc\n'"}, nil, nil)
 		var lines []string
 		for ev := range events {
 			if ev.Stream == "stdout" {
 				lines = append(lines, ev.Line)
 			}
 		}
 		<-results
 		if len(lines) != 3 {
 			t.Fatalf("got %d stdout lines, want 3: %v", len(lines), lines)
 		}
 		want := []string{"a", "b", "c"}
 		for i, w := range want {
 			if lines[i] != w {
 				t.Errorf("line[%d] = %q, want %q", i, lines[i], w)
 			}
 		}
 	})
 }
@@ -0,0 +1,238 @@
 package infra
 import (
 	"context"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"time"
 )
 // MlEnvCheck holds the result of a single ML environment probe.
 type MlEnvCheck struct {
 	Name    string `json:"name"`              // e.g. "cuda_toolkit", "python_venv"
 	Status  string `json:"status"`            // "ok" | "missing" | "warning" | "unknown"
 	Version string `json:"version,omitempty"` // version string if detected
 	Detail  string `json:"detail,omitempty"`  // human-readable extra info
 }
 // MlEnvReport is the full ML environment audit result.
 type MlEnvReport struct {
 	Gpus        []GpuInfo    `json:"gpus"`
 	Checks      []MlEnvCheck `json:"checks"`
 	OverallOK   bool         `json:"overall_ok"`
 	GeneratedAt int64        `json:"generated_at"`
 }
 // AuditMlEnv probes the ML environment rooted at registryRoot.
 // It checks for NVIDIA drivers, CUDA toolkit, Python venv, key Python
 // packages and optional tools (sd, llama-cli) and a local vault path.
 // Returns a non-nil MlEnvReport even when individual checks fail; the
 // function itself only errors if a fundamental system call cannot be
 // attempted.
 func AuditMlEnv(registryRoot string) (MlEnvReport, error) {
 	report := MlEnvReport{
 		GeneratedAt: time.Now().Unix(),
 	}
 	// --- GPU detection (composes GetGpuInfo) ---
 	gpus, err := GetGpuInfo()
 	if err != nil {
 		// Non-fatal: record absence.
 		gpus = []GpuInfo{}
 	}
 	report.Gpus = gpus
 	checks := []MlEnvCheck{}
 	// --- nvidia-smi ---
 	checks = append(checks, probeCommand("nvidia_smi", "nvidia-smi", []string{"--version"}, 5))
 	// --- nvcc (CUDA toolkit compiler) ---
 	nvcc := probeNvcc()
 	checks = append(checks, nvcc)
 	// --- Python venv ---
 	venvCheck := probeVenv(registryRoot)
 	checks = append(checks, venvCheck)
 	// Python venv path for subsequent checks.
 	venvPy := filepath.Join(registryRoot, "python", ".venv", "bin", "python3")
 	// --- Python packages ---
 	for _, pkg := range []string{"torch", "diffusers", "transformers", "huggingface_hub", "stable_diffusion_cpp_python"} {
 		checks = append(checks, probePythonPackage(venvPy, pkg))
 	}
 	// --- sd.cpp CLI ---
 	checks = append(checks, probeCommand("sd_cli", "sd", []string{"--version"}, 5))
 	// --- llama.cpp CLI ---
 	checks = append(checks, probeCommand("llama_cpp", "llama-cli", []string{"--version"}, 5))
 	// --- imagegen_vault ---
 	checks = append(checks, probeImagegenVault())
 	report.Checks = checks
 	// OverallOK: no "missing" checks (warning is tolerated) and at least 1 GPU.
 	overallOK := len(gpus) > 0
 	for _, c := range checks {
 		if c.Status == "missing" {
 			// stable_diffusion_cpp_python and sd_cli are optional — downgrade to warning-only.
 			if c.Name == "stable_diffusion_cpp_python" || c.Name == "sd_cli" || c.Name == "llama_cpp" {
 				continue
 			}
 			overallOK = false
 		}
 	}
 	report.OverallOK = overallOK
 	return report, nil
 }
 // probeCommand checks whether a binary is available in PATH by running it with
 // the given args and recording any version output.
 func probeCommand(name, binary string, args []string, timeoutSec int) MlEnvCheck {
 	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSec)*time.Second)
 	defer cancel()
 	path, err := exec.LookPath(binary)
 	if err != nil {
 		return MlEnvCheck{Name: name, Status: "missing", Detail: fmt.Sprintf("%s not found in PATH", binary)}
 	}
 	out, err := exec.CommandContext(ctx, path, args...).CombinedOutput()
 	version := strings.TrimSpace(string(out))
 	if len(version) > 120 {
 		version = version[:120]
 	}
 	if err != nil {
 		return MlEnvCheck{Name: name, Status: "warning", Version: version, Detail: fmt.Sprintf("exit error: %v", err)}
 	}
 	return MlEnvCheck{Name: name, Status: "ok", Version: version}
 }
 // probeNvcc extracts the CUDA toolkit version from nvcc --version output.
 func probeNvcc() MlEnvCheck {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 	path, err := exec.LookPath("nvcc")
 	if err != nil {
 		return MlEnvCheck{Name: "nvcc", Status: "missing", Detail: "nvcc not found in PATH (CUDA toolkit not installed)"}
 	}
 	out, err := exec.CommandContext(ctx, path, "--version").CombinedOutput()
 	if err != nil {
 		return MlEnvCheck{Name: "nvcc", Status: "warning", Detail: fmt.Sprintf("nvcc --version failed: %v", err)}
 	}
 	// Extract version from line like: "Cuda compilation tools, release 12.4, V12.4.99"
 	version := ""
 	for _, line := range strings.Split(string(out), "\n") {
 		if strings.Contains(line, "release") {
 			parts := strings.Split(line, ",")
 			for _, p := range parts {
 				p = strings.TrimSpace(p)
 				if strings.HasPrefix(p, "release") {
 					version = strings.TrimSpace(strings.TrimPrefix(p, "release"))
 					break
 				}
 			}
 			break
 		}
 	}
 	if version == "" {
 		version = strings.TrimSpace(string(out))
 		if len(version) > 80 {
 			version = version[:80]
 		}
 	}
 	return MlEnvCheck{Name: "nvcc", Status: "ok", Version: version}
 }
 // probeVenv checks that the Python venv exists and is functional.
 func probeVenv(registryRoot string) MlEnvCheck {
 	py := filepath.Join(registryRoot, "python", ".venv", "bin", "python3")
 	if _, err := os.Stat(py); os.IsNotExist(err) {
 		return MlEnvCheck{Name: "python_venv", Status: "missing", Detail: fmt.Sprintf("not found: %s", py)}
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 	out, err := exec.CommandContext(ctx, py, "--version").CombinedOutput()
 	version := strings.TrimSpace(string(out))
 	if err != nil {
 		return MlEnvCheck{Name: "python_venv", Status: "warning", Version: version, Detail: fmt.Sprintf("python3 --version failed: %v", err)}
 	}
 	return MlEnvCheck{Name: "python_venv", Status: "ok", Version: version}
 }
 // probePythonPackage imports a package in the venv Python and extracts __version__.
 func probePythonPackage(venvPy, pkg string) MlEnvCheck {
 	// Map package name → import name (for packages with different import names).
 	importName := pkg
 	switch pkg {
 	case "stable_diffusion_cpp_python":
 		importName = "stable_diffusion_cpp"
 	case "huggingface_hub":
 		importName = "huggingface_hub"
 	}
 	// Check that the venv python binary exists first.
 	if _, err := os.Stat(venvPy); os.IsNotExist(err) {
 		return MlEnvCheck{Name: pkg, Status: "unknown", Detail: "python_venv not available"}
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 	script := fmt.Sprintf("import %s; v = getattr(%s, '__version__', None); print(v or 'unknown')", importName, importName)
 	out, err := exec.CommandContext(ctx, venvPy, "-c", script).CombinedOutput()
 	output := strings.TrimSpace(string(out))
 	if err != nil {
 		// Module not found → missing; other errors → warning.
 		detail := output
 		if len(detail) > 200 {
 			detail = detail[:200]
 		}
 		if strings.Contains(output, "ModuleNotFoundError") || strings.Contains(output, "No module named") {
 			return MlEnvCheck{Name: pkg, Status: "missing", Detail: fmt.Sprintf("%s not installed", importName)}
 		}
 		return MlEnvCheck{Name: pkg, Status: "warning", Detail: detail}
 	}
 	return MlEnvCheck{Name: pkg, Status: "ok", Version: output}
 }
 // probeImagegenVault checks that ~/vaults/imagegen_models exists and lists subdirs.
 func probeImagegenVault() MlEnvCheck {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return MlEnvCheck{Name: "imagegen_vault", Status: "unknown", Detail: "cannot determine home directory"}
 	}
 	vaultPath := filepath.Join(home, "vaults", "imagegen_models")
 	entries, err := os.ReadDir(vaultPath)
 	if os.IsNotExist(err) {
 		return MlEnvCheck{Name: "imagegen_vault", Status: "missing", Detail: fmt.Sprintf("vault not found: %s", vaultPath)}
 	}
 	if err != nil {
 		return MlEnvCheck{Name: "imagegen_vault", Status: "warning", Detail: fmt.Sprintf("cannot read vault: %v", err)}
 	}
 	subdirs := []string{}
 	for _, e := range entries {
 		if e.IsDir() {
 			subdirs = append(subdirs, e.Name())
 		}
 	}
 	detail := fmt.Sprintf("subdirs: %s", strings.Join(subdirs, ", "))
 	if len(subdirs) == 0 {
 		detail = "vault exists but is empty"
 	}
 	return MlEnvCheck{Name: "imagegen_vault", Status: "ok", Detail: detail}
 }
@@ -0,0 +1,67 @@
 ---
 name: audit_ml_env
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func AuditMlEnv(registryRoot string) (MlEnvReport, error)"
 description: "Audita el entorno ML del sistema: GPUs NVIDIA, toolkit CUDA, venv Python, paquetes clave (torch, diffusers, transformers, huggingface_hub), herramientas CLI (sd, llama-cli) y el vault de modelos. Retorna un MlEnvReport con OverallOK=true solo si hay al menos 1 GPU y los checks criticos estan en ok/warning."
 tags: [ml, cuda, gpu, nvidia, audit, doctor, infra, torch, diffusers]
 uses_functions: [get_gpu_info_go_infra]
 uses_types: [gpu_info_go_infra]
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: [context, fmt, os, os/exec, path/filepath, strings, time]
 tested: true
 tests:
  - "report no nil y tiene checks"
  - "generated_at es positivo"
  - "checks tiene al menos 4 entradas"
  - "gpus puede ser vacio en CI"
 test_file_path: "functions/infra/audit_ml_env_test.go"
 file_path: "functions/infra/audit_ml_env.go"
 params:
  - name: registryRoot
    desc: "Ruta absoluta a la raiz del fn_registry. Se usa para localizar python/.venv/bin/python3 y probar paquetes instalados."
 output: "MlEnvReport con Gpus (puede estar vacio si no hay NVIDIA), Checks con estado por herramienta/paquete, OverallOK y GeneratedAt (unix timestamp)."
 ---
 ## Checks realizados
 | Check | Tipo | Critico |
 |---|---|---|
 | `nvidia_smi` | binary in PATH | no (ok si hay GPU) |
 | `nvcc` | CUDA toolkit version | no |
 | `python_venv` | exists + `python3 --version` | si |
 | `torch` | `import torch; __version__` | si |
 | `diffusers` | `import diffusers; __version__` | si |
 | `transformers` | `import transformers; __version__` | si |
 | `huggingface_hub` | `import huggingface_hub; __version__` | si |
 | `stable_diffusion_cpp_python` | `import stable_diffusion_cpp` | no (opcional) |
 | `sd_cli` | `sd --version` in PATH | no (opcional) |
 | `llama_cpp` | `llama-cli --version` in PATH | no (opcional) |
 | `imagegen_vault` | `~/vaults/imagegen_models` exists | no |
 ## Ejemplo
 ```go
 root := "/home/lucas/fn_registry"
 report, err := AuditMlEnv(root)
 if err != nil {
    log.Fatal(err)
 }
 for _, c := range report.Checks {
    fmt.Printf("%-40s %s  %s\n", c.Name, c.Status, c.Version)
 }
 fmt.Printf("OverallOK: %v\n", report.OverallOK)
 ```
 ## Notas
 - Cada check tiene timeout de 5 segundos para no bloquear en entornos sin GPU.
 - `stable_diffusion_cpp_python`, `sd_cli` y `llama_cpp` son opcionales: si estan missing, `OverallOK` no se ve afectado.
 - `OverallOK` requiere al menos 1 GPU NVIDIA detectada via `GetGpuInfo()`.
 - No escribe nada en disco. Read-only.
 - Se expone como `fn doctor ml` via cmd/fn/doctor.go.
@@ -0,0 +1,53 @@
 package infra
 import (
 	"testing"
 )
 func TestAuditMlEnv(t *testing.T) {
 	// Use the actual registry root relative to the test binary location.
 	// Tests run from the package directory; go up two levels.
 	registryRoot := "../.."
 	t.Run("report no nil y tiene checks", func(t *testing.T) {
 		report, err := AuditMlEnv(registryRoot)
 		if err != nil {
 			t.Fatalf("AuditMlEnv returned error: %v", err)
 		}
 		if report.Checks == nil {
 			t.Fatal("report.Checks is nil")
 		}
 	})
 	t.Run("generated_at es positivo", func(t *testing.T) {
 		report, err := AuditMlEnv(registryRoot)
 		if err != nil {
 			t.Fatalf("AuditMlEnv returned error: %v", err)
 		}
 		if report.GeneratedAt <= 0 {
 			t.Errorf("GeneratedAt should be positive unix timestamp, got %d", report.GeneratedAt)
 		}
 	})
 	t.Run("checks tiene al menos 4 entradas", func(t *testing.T) {
 		report, err := AuditMlEnv(registryRoot)
 		if err != nil {
 			t.Fatalf("AuditMlEnv returned error: %v", err)
 		}
 		if len(report.Checks) < 4 {
 			t.Errorf("expected at least 4 checks, got %d", len(report.Checks))
 		}
 	})
 	t.Run("gpus puede ser vacio en CI", func(t *testing.T) {
 		report, err := AuditMlEnv(registryRoot)
 		if err != nil {
 			t.Fatalf("AuditMlEnv returned error: %v", err)
 		}
 		// Gpus may be empty in CI without a GPU; that's OK.
 		// Just verify the field is not nil.
 		if report.Gpus == nil {
 			t.Error("report.Gpus should be a non-nil slice (can be empty)")
 		}
 	})
 }
@@ -0,0 +1,60 @@
 package infra
 import (
 	"encoding/csv"
 	"errors"
 	"fmt"
 	"os/exec"
 	"strconv"
 	"strings"
 )
 // GetGpuInfo queries NVIDIA GPUs via nvidia-smi and returns a slice of GpuInfo.
 // If nvidia-smi is not installed or no NVIDIA GPU is present, returns an empty
 // slice and a nil error (absence of NVIDIA hardware is not an error).
 func GetGpuInfo() ([]GpuInfo, error) {
 	out, err := exec.Command(
 		"nvidia-smi",
 		"--query-gpu=index,name,memory.total,memory.free,driver_version,cuda_version",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
 		// nvidia-smi not installed or no NVIDIA device — not an error.
 		var exitErr *exec.ExitError
 		if errors.Is(err, exec.ErrNotFound) || errors.As(err, &exitErr) {
 			return []GpuInfo{}, nil
 		}
 		return nil, fmt.Errorf("gpu_info: nvidia-smi: %w", err)
 	}
 	r := csv.NewReader(strings.NewReader(strings.TrimSpace(string(out))))
 	r.TrimLeadingSpace = true
 	records, err := r.ReadAll()
 	if err != nil {
 		return nil, fmt.Errorf("gpu_info: parse csv: %w", err)
 	}
 	gpus := make([]GpuInfo, 0, len(records))
 	for _, rec := range records {
 		if len(rec) < 6 {
 			continue
 		}
 		idx, _ := strconv.Atoi(strings.TrimSpace(rec[0]))
 		totalMb, _ := strconv.Atoi(strings.TrimSpace(rec[2]))
 		freeMb, _ := strconv.Atoi(strings.TrimSpace(rec[3]))
 		gpus = append(gpus, GpuInfo{
 			Index:         idx,
 			Name:          strings.TrimSpace(rec[1]),
 			VramTotalMb:   totalMb,
 			VramFreeMb:    freeMb,
 			DriverVersion: strings.TrimSpace(rec[4]),
 			CudaVersion:   strings.TrimSpace(rec[5]),
 		})
 	}
 	return gpus, nil
 }
@@ -0,0 +1,70 @@
 ---
 name: get_gpu_info
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func GetGpuInfo() ([]GpuInfo, error)"
 description: "Consulta GPUs NVIDIA via nvidia-smi y retorna un slice de GpuInfo con index, nombre, VRAM total/libre, driver y version CUDA. Si nvidia-smi no esta instalado o no hay GPU NVIDIA, retorna slice vacio y nil (ausencia de hardware no es error)."
 tags: [gpu, nvidia, cuda, hardware, infra, probe]
 uses_functions: []
 uses_types: ["gpu_info_go_infra"]
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: [encoding/csv, errors, fmt, os/exec, strconv, strings]
 params:
  - name: (ninguno)
    desc: "No toma parametros. Lee el estado del sistema via nvidia-smi."
 output: "Slice de GpuInfo con una entrada por GPU detectada. Slice vacio si no hay GPUs NVIDIA o nvidia-smi no esta instalado. Error solo si nvidia-smi existe pero falla inesperadamente al parsear la salida CSV."
 tested: true
 tests:
  - "retorna slice vacio y nil cuando no hay GPU NVIDIA"
  - "linea GPU RTX 3080 tipica"
  - "dos GPUs en el CSV"
  - "CSV vacio retorna slice vacio"
  - "linea con menos de 6 campos se ignora"
  - "espacios extra en los valores se eliminan"
  - "campos del struct GpuInfo correctos"
 test_file_path: "functions/infra/get_gpu_info_test.go"
 file_path: "functions/infra/get_gpu_info.go"
 ---
 ## Ejemplo
 ```go
 gpus, err := GetGpuInfo()
 if err != nil {
    log.Fatal(err)
 }
 if len(gpus) == 0 {
    fmt.Println("No NVIDIA GPUs detected")
 } else {
    for _, g := range gpus {
        fmt.Printf("[%d] %s  VRAM: %d/%d MiB  Driver: %s  CUDA: %s\n",
            g.Index, g.Name, g.VramFreeMb, g.VramTotalMb,
            g.DriverVersion, g.CudaVersion)
    }
 }
 ```
 ## Salida nvidia-smi
 Ejecuta:
 ```
 nvidia-smi --query-gpu=index,name,memory.total,memory.free,driver_version,cuda_version --format=csv,noheader,nounits
 ```
 Ejemplo de salida con una GPU:
 ```
 0, NVIDIA GeForce RTX 3080, 10240, 8192, 550.54.15, 12.4
 ```
 ## Notas
 - Requiere `nvidia-smi` en PATH (parte del driver NVIDIA).
 - La columna `cuda_version` en nvidia-smi refleja la version maxima de CUDA soportada por el driver, no la del toolkit instalado.
 - Para comprobar el toolkit CUDA instalado, usar `cuda_toolkit_check_bash_infra`.
 - En maquinas sin GPU NVIDIA retorna `([]GpuInfo{}, nil)` — el caller puede tratar esto como "sin GPU disponible".
 - No ejecutar tests automatizados para esta funcion en CI sin GPU; verificar manualmente o con mock.
@@ -0,0 +1,165 @@
 package infra
 import (
 	"strconv"
 	"strings"
 	"testing"
 )
 // TestGetGpuInfoNoGpu verifica que la funcion retorna slice vacio sin error
 // cuando nvidia-smi no esta instalado o no hay GPU NVIDIA presente.
 // Este test pasa en cualquier maquina, con o sin GPU.
 func TestGetGpuInfoNoGpu(t *testing.T) {
 	t.Run("retorna slice vacio y nil cuando no hay GPU NVIDIA", func(t *testing.T) {
 		gpus, err := GetGpuInfo()
 		if err != nil {
 			t.Errorf("GetGpuInfo() error inesperado: %v", err)
 		}
 		// En maquinas sin nvidia-smi el resultado debe ser un slice vacio (no nil)
 		if gpus == nil {
 			t.Error("GetGpuInfo() retorno nil, se esperaba slice vacio []GpuInfo{}")
 		}
 	})
 }
 // parseCsvNvidiaSmi replica la logica de parsing de GetGpuInfo para tests unitarios.
 // Recibe el output de nvidia-smi --format=csv,noheader,nounits y retorna []GpuInfo.
 func parseCsvNvidiaSmi(output string) ([]GpuInfo, error) {
 	trimmed := strings.TrimSpace(output)
 	if trimmed == "" {
 		return []GpuInfo{}, nil
 	}
 	lines := strings.Split(trimmed, "\n")
 	gpus := make([]GpuInfo, 0, len(lines))
 	for _, line := range lines {
 		parts := strings.Split(line, ",")
 		if len(parts) < 6 {
 			continue
 		}
 		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
 		totalMb, _ := strconv.Atoi(strings.TrimSpace(parts[2]))
 		freeMb, _ := strconv.Atoi(strings.TrimSpace(parts[3]))
 		gpus = append(gpus, GpuInfo{
 			Index:         idx,
 			Name:          strings.TrimSpace(parts[1]),
 			VramTotalMb:   totalMb,
 			VramFreeMb:    freeMb,
 			DriverVersion: strings.TrimSpace(parts[4]),
 			CudaVersion:   strings.TrimSpace(parts[5]),
 		})
 	}
 	return gpus, nil
 }
 // TestParseCsvNvidiaSmi verifica el parsing de la salida CSV de nvidia-smi
 // sin requerir GPU real ni nvidia-smi instalado.
 func TestParseCsvNvidiaSmi(t *testing.T) {
 	tests := []struct {
 		name          string
 		csvInput      string
 		wantLen       int
 		wantIndex     int
 		wantName      string
 		wantVramTotal int
 		wantVramFree  int
 		wantDriver    string
 		wantCuda      string
 	}{
 		{
 			name:          "linea GPU RTX 3080 tipica",
 			csvInput:      "0, NVIDIA GeForce RTX 3080, 10240, 8192, 550.54.15, 12.4",
 			wantLen:       1,
 			wantIndex:     0,
 			wantName:      "NVIDIA GeForce RTX 3080",
 			wantVramTotal: 10240,
 			wantVramFree:  8192,
 			wantDriver:    "550.54.15",
 			wantCuda:      "12.4",
 		},
 		{
 			name:     "dos GPUs en el CSV",
 			csvInput: "0, GPU A, 8192, 4096, 525.0, 12.0\n1, GPU B, 24576, 20000, 525.0, 12.0",
 			wantLen:  2,
 		},
 		{
 			name:     "CSV vacio retorna slice vacio",
 			csvInput: "",
 			wantLen:  0,
 		},
 		{
 			name:     "linea con menos de 6 campos se ignora",
 			csvInput: "0, GPU, 8192",
 			wantLen:  0,
 		},
 		{
 			name:          "espacios extra en los valores se eliminan",
 			csvInput:      " 1 ,  NVIDIA RTX 4090 ,  24576 ,  20000 ,  545.0 ,  12.6 ",
 			wantLen:       1,
 			wantIndex:     1,
 			wantName:      "NVIDIA RTX 4090",
 			wantVramTotal: 24576,
 			wantVramFree:  20000,
 			wantDriver:    "545.0",
 			wantCuda:      "12.6",
 		},
 	}
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
 			gpus, err := parseCsvNvidiaSmi(tc.csvInput)
 			if err != nil {
 				t.Fatalf("error inesperado: %v", err)
 			}
 			if len(gpus) != tc.wantLen {
 				t.Fatalf("len(gpus) = %d, quería %d", len(gpus), tc.wantLen)
 			}
 			if tc.wantLen == 1 {
 				g := gpus[0]
 				if g.Index != tc.wantIndex {
 					t.Errorf("Index = %d, quería %d", g.Index, tc.wantIndex)
 				}
 				if g.Name != tc.wantName {
 					t.Errorf("Name = %q, quería %q", g.Name, tc.wantName)
 				}
 				if g.VramTotalMb != tc.wantVramTotal {
 					t.Errorf("VramTotalMb = %d, quería %d", g.VramTotalMb, tc.wantVramTotal)
 				}
 				if g.VramFreeMb != tc.wantVramFree {
 					t.Errorf("VramFreeMb = %d, quería %d", g.VramFreeMb, tc.wantVramFree)
 				}
 				if g.DriverVersion != tc.wantDriver {
 					t.Errorf("DriverVersion = %q, quería %q", g.DriverVersion, tc.wantDriver)
 				}
 				if g.CudaVersion != tc.wantCuda {
 					t.Errorf("CudaVersion = %q, quería %q", g.CudaVersion, tc.wantCuda)
 				}
 			}
 		})
 	}
 }
 // TestGpuInfoStruct verifica los campos del tipo GpuInfo.
 func TestGpuInfoStruct(t *testing.T) {
 	t.Run("campos del struct GpuInfo correctos", func(t *testing.T) {
 		g := GpuInfo{
 			Index:         0,
 			Name:          "NVIDIA GeForce GTX 1080",
 			VramTotalMb:   8192,
 			VramFreeMb:    6144,
 			DriverVersion: "470.0",
 			CudaVersion:   "11.4",
 		}
 		if g.Index != 0 {
 			t.Errorf("Index = %d", g.Index)
 		}
 		if g.Name != "NVIDIA GeForce GTX 1080" {
 			t.Errorf("Name = %q", g.Name)
 		}
 		if g.VramTotalMb != 8192 {
 			t.Errorf("VramTotalMb = %d", g.VramTotalMb)
 		}
 		if g.VramFreeMb != 6144 {
 			t.Errorf("VramFreeMb = %d", g.VramFreeMb)
 		}
 	})
 }
@@ -0,0 +1,12 @@
 package infra
 // GpuInfo describe una GPU detectada en el sistema con sus capacidades de VRAM
 // y versiones de driver y CUDA.
 type GpuInfo struct {
 	Index         int    `json:"index"`
 	Name          string `json:"name"`
 	VramTotalMb   int    `json:"vram_total_mb"`
 	VramFreeMb    int    `json:"vram_free_mb"`
 	DriverVersion string `json:"driver_version"`
 	CudaVersion   string `json:"cuda_version,omitempty"`
 }
@@ -0,0 +1,171 @@
 package infra
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"time"
 )
 // AggregateReport summarises the result of a VaultAggregateIndex run.
 type AggregateReport struct {
 	VaultsProcessed int
 	VaultsSkipped   int      // vaults without a vault_index.db
 	TotalFiles      int
 	Errors          []string // non-fatal per-vault errors
 }
 // VaultAggregateIndex reads all vault manifests from repoRoot, opens each
 // vault_index.db and copies all file records into the central registry.db
 // vault_files table. The table is created if it does not exist (idempotent).
 //
 // For each vault the previous rows are deleted and replaced atomically, so
 // re-running always produces a clean, non-duplicated state.
 //
 // Returns an AggregateReport with counts. Per-vault errors are non-fatal
 // (logged in report.Errors); only fatal errors (e.g. registry.db
 // unreachable) are returned as the error value.
 func VaultAggregateIndex(repoRoot string) (AggregateReport, error) {
 	var report AggregateReport
 	// 1. Open registry.db
 	registryDB, err := SQLiteOpen(filepath.Join(repoRoot, "registry.db"), "")
 	if err != nil {
 		return report, fmt.Errorf("vault_aggregate_index: open registry.db: %w", err)
 	}
 	defer registryDB.Close()
 	// 2. Idempotent schema migration
 	for _, stmt := range []string{
 		`CREATE TABLE IF NOT EXISTS vault_files (
    vault_id    TEXT NOT NULL,
    vault_name  TEXT NOT NULL,
    rel_path    TEXT NOT NULL,
    size        INTEGER NOT NULL,
    mtime       INTEGER NOT NULL,
    sha256      TEXT NOT NULL,
    mime        TEXT NOT NULL DEFAULT '',
    ext         TEXT NOT NULL DEFAULT '',
    bucket      TEXT NOT NULL DEFAULT '',
    sub_bucket  TEXT NOT NULL DEFAULT '',
    indexed_at  INTEGER NOT NULL,
    PRIMARY KEY (vault_id, rel_path)
 );`,
 		`CREATE INDEX IF NOT EXISTS idx_vault_files_sha256 ON vault_files(sha256);`,
 		`CREATE INDEX IF NOT EXISTS idx_vault_files_vault ON vault_files(vault_id);`,
 	} {
 		if _, err := registryDB.Exec(stmt); err != nil {
 			if !isIdempotentMigrationError(err) {
 				return report, fmt.Errorf("vault_aggregate_index: schema: %w", err)
 			}
 		}
 	}
 	// 3. Read manifest
 	entries, err := VaultManifestRead(repoRoot)
 	if err != nil {
 		return report, fmt.Errorf("vault_aggregate_index: manifest: %w", err)
 	}
 	now := time.Now().UTC().Unix()
 	for _, entry := range entries {
 		vaultID := vaultIDFromEntry(entry)
 		vaultName := entry.Name
 		vaultPath := entry.Path
 		indexPath := filepath.Join(vaultPath, "vault_index.db")
 		if _, statErr := os.Stat(indexPath); statErr != nil {
 			report.VaultsSkipped++
 			continue
 		}
 		vaultDB, openErr := VaultIndexOpen(vaultPath)
 		if openErr != nil {
 			report.Errors = append(report.Errors, fmt.Sprintf("%s: open index: %v", vaultName, openErr))
 			continue
 		}
 		rows, queryErr := vaultDB.Query(
 			`SELECT rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket FROM files`,
 		)
 		if queryErr != nil {
 			vaultDB.Close()
 			report.Errors = append(report.Errors, fmt.Sprintf("%s: query files: %v", vaultName, queryErr))
 			continue
 		}
 		type fileRow struct {
 			RelPath   string
 			Size      int64
 			Mtime     int64
 			Sha256    string
 			Mime      string
 			Ext       string
 			Bucket    string
 			SubBucket string
 		}
 		var fileRows []fileRow
 		for rows.Next() {
 			var r fileRow
 			if scanErr := rows.Scan(&r.RelPath, &r.Size, &r.Mtime, &r.Sha256, &r.Mime, &r.Ext, &r.Bucket, &r.SubBucket); scanErr != nil {
 				continue
 			}
 			fileRows = append(fileRows, r)
 		}
 		rows.Close()
 		vaultDB.Close()
 		// Atomic replace in registry.db
 		tx, txErr := registryDB.Begin()
 		if txErr != nil {
 			report.Errors = append(report.Errors, fmt.Sprintf("%s: begin tx: %v", vaultName, txErr))
 			continue
 		}
 		if _, delErr := tx.Exec(`DELETE FROM vault_files WHERE vault_id = ?`, vaultID); delErr != nil {
 			tx.Rollback()
 			report.Errors = append(report.Errors, fmt.Sprintf("%s: delete: %v", vaultName, delErr))
 			continue
 		}
 		stmt, prepErr := tx.Prepare(`
 INSERT INTO vault_files
    (vault_id, vault_name, rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`)
 		if prepErr != nil {
 			tx.Rollback()
 			report.Errors = append(report.Errors, fmt.Sprintf("%s: prepare: %v", vaultName, prepErr))
 			continue
 		}
 		for _, r := range fileRows {
 			if _, insErr := stmt.Exec(vaultID, vaultName, r.RelPath, r.Size, r.Mtime, r.Sha256, r.Mime, r.Ext, r.Bucket, r.SubBucket, now); insErr != nil {
 				stmt.Close()
 				tx.Rollback()
 				report.Errors = append(report.Errors, fmt.Sprintf("%s: insert %s: %v", vaultName, r.RelPath, insErr))
 				continue
 			}
 		}
 		stmt.Close()
 		if commitErr := tx.Commit(); commitErr != nil {
 			report.Errors = append(report.Errors, fmt.Sprintf("%s: commit: %v", vaultName, commitErr))
 			continue
 		}
 		report.VaultsProcessed++
 		report.TotalFiles += len(fileRows)
 	}
 	return report, nil
 }
 // vaultIDFromEntry constructs the canonical vault ID used in registry.db.
 // Pattern: "<vault_name>_<project_id>" — consistent with the vaults table.
 func vaultIDFromEntry(e VaultManifestEntry) string {
 	if e.ProjectID == "" {
 		return e.Name
 	}
 	return e.Name + "_" + e.ProjectID
 }
@@ -0,0 +1,58 @@
 ---
 name: vault_aggregate_index
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func VaultAggregateIndex(repoRoot string) (AggregateReport, error)"
 description: "Agrega los índices de todos los vaults del registry en la tabla vault_files de registry.db. Lee cada vault_index.db (via VaultIndexOpen) y reemplaza las filas de forma atómica. Idempotente: re-ejecutar limpia y reescribe sin duplicar."
 tags: [vault, index, aggregate, registry]
 uses_functions:
  - "vault_manifest_read_go_infra"
  - "vault_index_open_go_infra"
  - "sqlite_open_go_infra"
 uses_types:
  - "vault_file_go_infra"
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports:
  - "database/sql"
  - "fmt"
  - "os"
  - "path/filepath"
  - "time"
 tested: true
 tests:
  - "TestVaultAggregateIndex_NoVaults"
  - "TestVaultAggregateIndex_VaultWithoutIndex"
  - "TestVaultAggregateIndex_HappyPath"
  - "TestVaultAggregateIndex_ReRunReplaces"
 test_file_path: "functions/infra/vault_aggregate_index_test.go"
 file_path: "functions/infra/vault_aggregate_index.go"
 params:
  - name: repoRoot
    desc: "Ruta absoluta a la raiz del fn_registry (contiene registry.db y projects/)."
 output: "AggregateReport con VaultsProcessed, VaultsSkipped (sin vault_index.db), TotalFiles y Errors (errores no fatales por vault). Error fatal solo si registry.db no se puede abrir."
 ---
 ## Ejemplo
 ```go
 report, err := infra.VaultAggregateIndex("/home/lucas/fn_registry")
 if err != nil {
    log.Fatal(err)
 }
 fmt.Printf("Processed: %d vaults, %d files\n", report.VaultsProcessed, report.TotalFiles)
 for _, e := range report.Errors {
    fmt.Println("warning:", e)
 }
 ```
 ## Notas
 - Requiere que `registry/migrations/012_vault_files.sql` haya sido aplicado (o que el indexer lo aplique al arrancar). La función aplica la migración de forma idempotente ella misma con `CREATE TABLE IF NOT EXISTS`.
 - Por cada vault: `DELETE WHERE vault_id = ?` + batch `INSERT` dentro de una transacción. Re-run siempre produce el mismo resultado.
 - Vaults sin `vault_index.db` se cuentan en `VaultsSkipped` y se omiten sin error.
 - El `vault_id` sigue el patrón `<vault_name>_<project_id>`, consistente con la tabla `vaults` de registry.db.
@@ -0,0 +1,175 @@
 package infra
 import (
 	"os"
 	"path/filepath"
 	"testing"
 	"time"
 )
 // setupAggregateTestRepo creates a minimal repo layout:
 //
 //	<root>/
 //	  registry.db  (SQLite, empty)
 //	  projects/<project>/vaults/vault.yaml
 //	  <vaultPath>/    (optionally with vault_index.db populated)
 func setupAggregateTestRepo(t *testing.T, vaultName, projectID, vaultPath string, withIndex bool) string {
 	t.Helper()
 	root := t.TempDir()
 	// Create registry.db
 	regDB, err := SQLiteOpen(filepath.Join(root, "registry.db"), "")
 	if err != nil {
 		t.Fatalf("create registry.db: %v", err)
 	}
 	regDB.Close()
 	// Create project vault manifest
 	projVaultsDir := filepath.Join(root, "projects", projectID, "vaults")
 	if err := os.MkdirAll(projVaultsDir, 0755); err != nil {
 		t.Fatalf("mkdir projects: %v", err)
 	}
 	manifestYAML := "vaults:\n  - name: " + vaultName + "\n    description: test\n    path: " + vaultPath + "\n    tags: []\n"
 	if err := os.WriteFile(filepath.Join(projVaultsDir, "vault.yaml"), []byte(manifestYAML), 0644); err != nil {
 		t.Fatalf("write vault.yaml: %v", err)
 	}
 	// Create vault dir
 	if err := os.MkdirAll(vaultPath, 0755); err != nil {
 		t.Fatalf("mkdir vault: %v", err)
 	}
 	if withIndex {
 		// Create a vault_index.db with one file row
 		vdb, err := VaultIndexOpen(vaultPath)
 		if err != nil {
 			t.Fatalf("VaultIndexOpen: %v", err)
 		}
 		now := time.Now().UTC().Unix()
 		_, err = vdb.Exec(`INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
 			"data/raw/sample.csv", 1024, now, "deadbeef", "text/csv", ".csv", "data", "raw", now)
 		if err != nil {
 			t.Fatalf("insert test file: %v", err)
 		}
 		vdb.Close()
 	}
 	return root
 }
 func TestVaultAggregateIndex_NoVaults(t *testing.T) {
 	root := t.TempDir()
 	// No manifests, just registry.db
 	regDB, err := SQLiteOpen(filepath.Join(root, "registry.db"), "")
 	if err != nil {
 		t.Fatalf("create registry.db: %v", err)
 	}
 	regDB.Close()
 	report, err := VaultAggregateIndex(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if report.VaultsProcessed != 0 {
 		t.Errorf("VaultsProcessed: want 0, got %d", report.VaultsProcessed)
 	}
 	if len(report.Errors) != 0 {
 		t.Errorf("Errors: want empty, got %v", report.Errors)
 	}
 }
 func TestVaultAggregateIndex_VaultWithoutIndex(t *testing.T) {
 	vaultDir := t.TempDir()
 	root := setupAggregateTestRepo(t, "my_vault", "my_proj", vaultDir, false /* no vault_index.db */)
 	report, err := VaultAggregateIndex(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if report.VaultsSkipped != 1 {
 		t.Errorf("VaultsSkipped: want 1, got %d", report.VaultsSkipped)
 	}
 	if report.VaultsProcessed != 0 {
 		t.Errorf("VaultsProcessed: want 0, got %d", report.VaultsProcessed)
 	}
 }
 func TestVaultAggregateIndex_HappyPath(t *testing.T) {
 	vaultDir := t.TempDir()
 	root := setupAggregateTestRepo(t, "my_vault", "my_proj", vaultDir, true)
 	report, err := VaultAggregateIndex(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if report.VaultsProcessed != 1 {
 		t.Errorf("VaultsProcessed: want 1, got %d", report.VaultsProcessed)
 	}
 	if report.TotalFiles != 1 {
 		t.Errorf("TotalFiles: want 1, got %d", report.TotalFiles)
 	}
 	// Verify row exists in registry.db
 	regDB, err := SQLiteOpen(filepath.Join(root, "registry.db"), "")
 	if err != nil {
 		t.Fatalf("open registry.db: %v", err)
 	}
 	defer regDB.Close()
 	var count int
 	if err := regDB.QueryRow(`SELECT COUNT(*) FROM vault_files`).Scan(&count); err != nil {
 		t.Fatalf("count vault_files: %v", err)
 	}
 	if count != 1 {
 		t.Errorf("vault_files count: want 1, got %d", count)
 	}
 }
 func TestVaultAggregateIndex_ReRunReplaces(t *testing.T) {
 	vaultDir := t.TempDir()
 	root := setupAggregateTestRepo(t, "my_vault", "my_proj", vaultDir, true)
 	// First run
 	if _, err := VaultAggregateIndex(root); err != nil {
 		t.Fatalf("first run: %v", err)
 	}
 	// Add a second file to vault_index.db
 	vdb, err := VaultIndexOpen(vaultDir)
 	if err != nil {
 		t.Fatalf("reopen vault index: %v", err)
 	}
 	now := time.Now().UTC().Unix()
 	_, err = vdb.Exec(`INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
 		"data/raw/extra.csv", 512, now, "cafebabe", "text/csv", ".csv", "data", "raw", now)
 	if err != nil {
 		t.Fatalf("insert second file: %v", err)
 	}
 	vdb.Close()
 	// Second run
 	report, err := VaultAggregateIndex(root)
 	if err != nil {
 		t.Fatalf("second run: %v", err)
 	}
 	if report.TotalFiles != 2 {
 		t.Errorf("TotalFiles: want 2, got %d", report.TotalFiles)
 	}
 	// Verify no duplicates — exactly 2 rows
 	regDB, err := SQLiteOpen(filepath.Join(root, "registry.db"), "")
 	if err != nil {
 		t.Fatalf("open registry.db: %v", err)
 	}
 	defer regDB.Close()
 	var count int
 	if err := regDB.QueryRow(`SELECT COUNT(*) FROM vault_files`).Scan(&count); err != nil {
 		t.Fatalf("count vault_files: %v", err)
 	}
 	if count != 2 {
 		t.Errorf("vault_files count after re-run: want 2, got %d", count)
 	}
 }
@@ -0,0 +1,68 @@
 package infra
 import "sort"
 // VaultFileChange holds the before/after state of a file whose content changed.
 type VaultFileChange struct {
 	RelPath string
 	Prev    VaultFile
 	Curr    VaultFile
 }
 // VaultDiffReport is the result of comparing two VaultFile slices.
 type VaultDiffReport struct {
 	Added     []VaultFile       // in curr but not in prev (by rel_path)
 	Removed   []VaultFile       // in prev but not in curr
 	Changed   []VaultFileChange // same rel_path, different sha256
 	Unchanged int               // files present in both with identical sha256
 }
 // VaultDiff computes the difference between two vault snapshots.
 // It indexes both slices by RelPath, then classifies each entry as
 // Added, Removed, Changed, or Unchanged. All output slices are sorted
 // by RelPath ascending. The function is pure and deterministic.
 func VaultDiff(prev, curr []VaultFile) VaultDiffReport {
 	prevMap := make(map[string]VaultFile, len(prev))
 	for _, f := range prev {
 		prevMap[f.RelPath] = f
 	}
 	currMap := make(map[string]VaultFile, len(curr))
 	for _, f := range curr {
 		currMap[f.RelPath] = f
 	}
 	var report VaultDiffReport
 	for _, f := range curr {
 		p, exists := prevMap[f.RelPath]
 		if !exists {
 			report.Added = append(report.Added, f)
 		} else if p.Sha256 != f.Sha256 {
 			report.Changed = append(report.Changed, VaultFileChange{
 				RelPath: f.RelPath,
 				Prev:    p,
 				Curr:    f,
 			})
 		} else {
 			report.Unchanged++
 		}
 	}
 	for _, f := range prev {
 		if _, exists := currMap[f.RelPath]; !exists {
 			report.Removed = append(report.Removed, f)
 		}
 	}
 	sort.Slice(report.Added, func(i, j int) bool {
 		return report.Added[i].RelPath < report.Added[j].RelPath
 	})
 	sort.Slice(report.Removed, func(i, j int) bool {
 		return report.Removed[i].RelPath < report.Removed[j].RelPath
 	})
 	sort.Slice(report.Changed, func(i, j int) bool {
 		return report.Changed[i].RelPath < report.Changed[j].RelPath
 	})
 	return report
 }
@@ -0,0 +1,49 @@
 ---
 name: vault_diff
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: pure
 signature: "func VaultDiff(prev, curr []VaultFile) VaultDiffReport"
 description: "Computes the diff between two vault snapshots (slices of VaultFile). Returns Added, Removed, Changed and Unchanged counts. Pure and deterministic — no I/O."
 tags: [vault, diff, comparison, pure]
 uses_functions: []
 uses_types: ["vault_file_go_infra"]
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["sort"]
 tested: true
 tests:
  - "TestVaultDiff_NoChanges"
  - "TestVaultDiff_AllAdded"
  - "TestVaultDiff_AllRemoved"
  - "TestVaultDiff_ContentChanged"
  - "TestVaultDiff_Mixed"
 test_file_path: "functions/infra/vault_diff_test.go"
 file_path: "functions/infra/vault_diff.go"
 params:
  - name: prev
    desc: "Snapshot anterior — slice de VaultFile del estado previo del vault (puede ser nil para diff desde cero)."
  - name: curr
    desc: "Snapshot actual — slice de VaultFile del estado corriente del vault (puede ser nil para diff de borrado total)."
 output: "VaultDiffReport con Added (nuevos), Removed (eliminados), Changed (mismo rel_path, sha256 distinto) y Unchanged (identicos). Todos los slices ordenados por RelPath ASC."
 ---
 ## Ejemplo
 ```go
 prev, _ := infra.VaultInventoryScan(oldPath, "my_vault_proj", "my_vault")
 curr, _ := infra.VaultInventoryScan(newPath, "my_vault_proj", "my_vault")
 report := infra.VaultDiff(prev, curr)
 fmt.Printf("Added: %d, Removed: %d, Changed: %d, Unchanged: %d\n",
    len(report.Added), len(report.Removed), len(report.Changed), report.Unchanged)
 ```
 ## Notas
 - Usa `RelPath` como clave de identidad de archivo (no nombre, no sha256).
 - Dos archivos con mismo `RelPath` pero diferente `Sha256` se consideran Changed.
 - Los slices del report se ordenan por `RelPath` ASC para salida deterministica.
 - Función pura: no toca disco ni BD.
@@ -0,0 +1,126 @@
 package infra
 import (
 	"testing"
 )
 func makeVF(relPath, sha256 string) VaultFile {
 	return VaultFile{
 		VaultID:   "test_vault",
 		VaultName: "test",
 		RelPath:   relPath,
 		Sha256:    sha256,
 	}
 }
 func TestVaultDiff_NoChanges(t *testing.T) {
 	files := []VaultFile{
 		makeVF("data/a.csv", "aaa"),
 		makeVF("data/b.csv", "bbb"),
 	}
 	report := VaultDiff(files, files)
 	if len(report.Added) != 0 {
 		t.Errorf("Added: want 0, got %d", len(report.Added))
 	}
 	if len(report.Removed) != 0 {
 		t.Errorf("Removed: want 0, got %d", len(report.Removed))
 	}
 	if len(report.Changed) != 0 {
 		t.Errorf("Changed: want 0, got %d", len(report.Changed))
 	}
 	if report.Unchanged != 2 {
 		t.Errorf("Unchanged: want 2, got %d", report.Unchanged)
 	}
 }
 func TestVaultDiff_AllAdded(t *testing.T) {
 	curr := []VaultFile{
 		makeVF("data/a.csv", "aaa"),
 		makeVF("data/b.csv", "bbb"),
 	}
 	report := VaultDiff(nil, curr)
 	if len(report.Added) != 2 {
 		t.Errorf("Added: want 2, got %d", len(report.Added))
 	}
 	if len(report.Removed) != 0 {
 		t.Errorf("Removed: want 0, got %d", len(report.Removed))
 	}
 	if report.Added[0].RelPath != "data/a.csv" {
 		t.Errorf("Added[0]: want data/a.csv, got %s", report.Added[0].RelPath)
 	}
 	if report.Added[1].RelPath != "data/b.csv" {
 		t.Errorf("Added[1]: want data/b.csv, got %s", report.Added[1].RelPath)
 	}
 }
 func TestVaultDiff_AllRemoved(t *testing.T) {
 	prev := []VaultFile{
 		makeVF("data/a.csv", "aaa"),
 		makeVF("data/b.csv", "bbb"),
 	}
 	report := VaultDiff(prev, nil)
 	if len(report.Removed) != 2 {
 		t.Errorf("Removed: want 2, got %d", len(report.Removed))
 	}
 	if len(report.Added) != 0 {
 		t.Errorf("Added: want 0, got %d", len(report.Added))
 	}
 	if report.Removed[0].RelPath != "data/a.csv" {
 		t.Errorf("Removed[0]: want data/a.csv, got %s", report.Removed[0].RelPath)
 	}
 }
 func TestVaultDiff_ContentChanged(t *testing.T) {
 	prev := []VaultFile{
 		makeVF("data/a.csv", "old_hash"),
 	}
 	curr := []VaultFile{
 		makeVF("data/a.csv", "new_hash"),
 	}
 	report := VaultDiff(prev, curr)
 	if len(report.Changed) != 1 {
 		t.Fatalf("Changed: want 1, got %d", len(report.Changed))
 	}
 	if report.Changed[0].RelPath != "data/a.csv" {
 		t.Errorf("Changed[0].RelPath: want data/a.csv, got %s", report.Changed[0].RelPath)
 	}
 	if report.Changed[0].Prev.Sha256 != "old_hash" {
 		t.Errorf("Changed[0].Prev.Sha256: want old_hash, got %s", report.Changed[0].Prev.Sha256)
 	}
 	if report.Changed[0].Curr.Sha256 != "new_hash" {
 		t.Errorf("Changed[0].Curr.Sha256: want new_hash, got %s", report.Changed[0].Curr.Sha256)
 	}
 	if len(report.Added) != 0 || len(report.Removed) != 0 {
 		t.Errorf("Expected no added/removed, got %d/%d", len(report.Added), len(report.Removed))
 	}
 	if report.Unchanged != 0 {
 		t.Errorf("Unchanged: want 0, got %d", report.Unchanged)
 	}
 }
 func TestVaultDiff_Mixed(t *testing.T) {
 	prev := []VaultFile{
 		makeVF("data/a.csv", "aaa"),
 		makeVF("data/b.csv", "bbb"),
 		makeVF("data/c.csv", "ccc"),
 	}
 	curr := []VaultFile{
 		makeVF("data/a.csv", "aaa"),     // unchanged
 		makeVF("data/b.csv", "bbb_new"), // changed
 		makeVF("data/d.csv", "ddd"),     // added
 	}
 	report := VaultDiff(prev, curr)
 	if len(report.Added) != 1 || report.Added[0].RelPath != "data/d.csv" {
 		t.Errorf("Added: want [data/d.csv], got %v", report.Added)
 	}
 	if len(report.Removed) != 1 || report.Removed[0].RelPath != "data/c.csv" {
 		t.Errorf("Removed: want [data/c.csv], got %v", report.Removed)
 	}
 	if len(report.Changed) != 1 || report.Changed[0].RelPath != "data/b.csv" {
 		t.Errorf("Changed: want [data/b.csv], got %v", report.Changed)
 	}
 	if report.Unchanged != 1 {
 		t.Errorf("Unchanged: want 1, got %d", report.Unchanged)
 	}
 }
@@ -0,0 +1,230 @@
 package infra
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
 )
 // VaultDoctorEntry holds the health report for a single vault.
 type VaultDoctorEntry struct {
 	VaultName     string   `json:"vault_name"`
 	VaultPath     string   `json:"vault_path"`
 	ProjectID     string   `json:"project_id"`
 	Issues        []string `json:"issues"`         // human-readable issues; empty = healthy
 	IndexedFiles  int      `json:"indexed_files"`  // 0 if no vault_index.db
 	LastIndexedAt int64    `json:"last_indexed_at"` // unix seconds; 0 if N/A
 	DiskFiles     int      `json:"disk_files"`     // count via WalkDir (no hashing)
 	Status        string   `json:"status"`         // "ok" | "warning" | "error"
 }
 // VaultDoctor audits every vault declared in projects/*/vaults/vault.yaml under
 // repoRoot. For each vault it performs a series of checks (disk presence, layout,
 // index existence, staleness, drift) and returns a slice of VaultDoctorEntry.
 //
 // The function is read-only: it never writes to disk or any database.
 // Returns an error only if VaultManifestRead fails (manifest parse error).
 func VaultDoctor(repoRoot string) ([]VaultDoctorEntry, error) {
 	entries, err := VaultManifestRead(repoRoot)
 	if err != nil {
 		return nil, fmt.Errorf("vault_doctor: read manifests: %w", err)
 	}
 	results := make([]VaultDoctorEntry, 0, len(entries))
 	for _, e := range entries {
 		result := auditVault(e)
 		results = append(results, result)
 	}
 	return results, nil
 }
 func auditVault(e VaultManifestEntry) VaultDoctorEntry {
 	entry := VaultDoctorEntry{
 		VaultName: e.Name,
 		VaultPath: e.Path,
 		ProjectID: e.ProjectID,
 	}
 	// Resolve symlinks for disk checks
 	realPath, err := filepath.EvalSymlinks(e.Path)
 	if err != nil || realPath == "" {
 		realPath = e.Path
 	}
 	// CHECK 1: directory_missing
 	info, statErr := os.Stat(realPath)
 	if statErr != nil || !info.IsDir() {
 		entry.Issues = append(entry.Issues, "directory_missing")
 		entry.Status = "error"
 		return entry
 	}
 	// COUNT disk files (cheap walk — no hashing, no mime detection)
 	diskCount := countDiskFiles(realPath)
 	entry.DiskFiles = diskCount
 	// CHECK 2: layout_missing / non_standard_layout
 	hasData := dirExists(filepath.Join(realPath, "data"))
 	hasKnowledge := dirExists(filepath.Join(realPath, "knowledge"))
 	if !hasData && !hasKnowledge {
 		// Check if it looks like a non-standard but intentional layout
 		if hasNonStandardLayout(realPath) {
 			entry.Issues = append(entry.Issues, "non_standard_layout")
 		} else {
 			entry.Issues = append(entry.Issues, "layout_missing")
 		}
 	}
 	// CHECK 3: index_missing
 	indexPath := filepath.Join(realPath, "vault_index.db")
 	_, indexStatErr := os.Stat(indexPath)
 	if indexStatErr != nil {
 		entry.Issues = append(entry.Issues, "index_missing")
 		entry.setWarningStatus()
 		entry.setFinalStatus()
 		return entry
 	}
 	// Open vault index (read-only) for checks 4 and 5
 	vdb, openErr := VaultIndexOpen(realPath)
 	if openErr != nil {
 		entry.Issues = append(entry.Issues, fmt.Sprintf("index_open_error: %v", openErr))
 		entry.setWarningStatus()
 		return entry
 	}
 	defer vdb.Close()
 	// Query indexed file count and max indexed_at
 	var indexedCount int
 	var maxIndexedAt int64
 	row := vdb.QueryRow(`SELECT COUNT(*), COALESCE(MAX(indexed_at), 0) FROM files`)
 	if scanErr := row.Scan(&indexedCount, &maxIndexedAt); scanErr != nil {
 		entry.Issues = append(entry.Issues, fmt.Sprintf("index_query_error: %v", scanErr))
 	} else {
 		entry.IndexedFiles = indexedCount
 		entry.LastIndexedAt = maxIndexedAt
 	}
 	// CHECK 4: index_stale — any file on disk newer than MAX(indexed_at)
 	if maxIndexedAt > 0 {
 		maxTime := time.Unix(maxIndexedAt, 0)
 		if isIndexStale(realPath, maxTime) {
 			entry.Issues = append(entry.Issues, "index_stale")
 		}
 	}
 	// CHECK 5: index_drift — disk file count != indexed count
 	if indexedCount != diskCount {
 		entry.Issues = append(entry.Issues, fmt.Sprintf("index_drift: disk=%d indexed=%d", diskCount, indexedCount))
 	}
 	// CHECK 6: empty_vault
 	if diskCount == 0 {
 		entry.Issues = append(entry.Issues, "empty_vault")
 	}
 	entry.setFinalStatus()
 	return entry
 }
 // setWarningStatus sets status to warning if not already error.
 func (e *VaultDoctorEntry) setWarningStatus() {
 	if e.Status != "error" {
 		e.Status = "warning"
 	}
 }
 // setFinalStatus derives the final Status from Issues.
 func (e *VaultDoctorEntry) setFinalStatus() {
 	if e.Status == "error" {
 		return
 	}
 	if len(e.Issues) == 0 {
 		e.Status = "ok"
 	} else {
 		e.Status = "warning"
 	}
 }
 // countDiskFiles walks realPath and counts regular files, excluding:
 // vault_index.db*, .git/, hidden files/dirs at any depth.
 func countDiskFiles(realPath string) int {
 	count := 0
 	_ = filepath.WalkDir(realPath, func(path string, d os.DirEntry, err error) error {
 		if err != nil {
 			return nil
 		}
 		name := d.Name()
 		// Skip hidden entries
 		if strings.HasPrefix(name, ".") {
 			if d.IsDir() {
 				return filepath.SkipDir
 			}
 			return nil
 		}
 		// Skip .git
 		if d.IsDir() && name == ".git" {
 			return filepath.SkipDir
 		}
 		// Skip vault_index.db files
 		if !d.IsDir() && (name == "vault_index.db" || name == "vault_index.db-shm" || name == "vault_index.db-wal") {
 			return nil
 		}
 		if !d.IsDir() {
 			count++
 		}
 		return nil
 	})
 	return count
 }
 // isIndexStale returns true if any regular file under realPath has an mtime
 // strictly after maxTime (excluding vault_index.db* and hidden files).
 func isIndexStale(realPath string, maxTime time.Time) bool {
 	stale := false
 	_ = filepath.WalkDir(realPath, func(path string, d os.DirEntry, err error) error {
 		if err != nil || stale {
 			return nil
 		}
 		name := d.Name()
 		if strings.HasPrefix(name, ".") {
 			if d.IsDir() {
 				return filepath.SkipDir
 			}
 			return nil
 		}
 		if d.IsDir() && name == ".git" {
 			return filepath.SkipDir
 		}
 		if !d.IsDir() {
 			if name == "vault_index.db" || name == "vault_index.db-shm" || name == "vault_index.db-wal" {
 				return nil
 			}
 			fi, statErr := d.Info()
 			if statErr == nil && fi.ModTime().After(maxTime) {
 				stale = true
 			}
 		}
 		return nil
 	})
 	return stale
 }
 // hasNonStandardLayout returns true when a vault directory contains
 // subdirectories that are clearly intentional but not data/knowledge.
 // Heuristic: any subdir at the vault root that is not data/knowledge.
 func hasNonStandardLayout(realPath string) bool {
 	entries, err := os.ReadDir(realPath)
 	if err != nil {
 		return false
 	}
 	standardDirs := map[string]bool{"data": true, "knowledge": true, ".git": true}
 	for _, e := range entries {
 		if e.IsDir() && !standardDirs[e.Name()] && !strings.HasPrefix(e.Name(), ".") {
 			return true
 		}
 	}
 	return false
 }
@@ -0,0 +1,66 @@
 ---
 name: vault_doctor
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func VaultDoctor(repoRoot string) ([]VaultDoctorEntry, error)"
 description: "Audita la salud de todos los vaults declarados en projects/*/vaults/vault.yaml. Comprueba existencia del directorio, layout estándar, presencia del índice, staleness y drift entre disco e índice. Read-only."
 tags: [vault, doctor, health, audit]
 uses_functions:
  - "vault_manifest_read_go_infra"
  - "vault_index_open_go_infra"
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports:
  - "fmt"
  - "os"
  - "path/filepath"
  - "strings"
  - "time"
 tested: true
 tests:
  - "TestVaultDoctor_OK"
  - "TestVaultDoctor_MissingDir"
  - "TestVaultDoctor_NoIndex"
  - "TestVaultDoctor_LayoutDrift"
  - "TestVaultDoctor_EmptyVault"
 test_file_path: "functions/infra/vault_doctor_test.go"
 file_path: "functions/infra/vault_doctor.go"
 params:
  - name: repoRoot
    desc: "Ruta absoluta a la raiz del fn_registry (donde están projects/ y registry.db)."
 output: "Slice de VaultDoctorEntry con Status (ok/warning/error), Issues, DiskFiles, IndexedFiles y LastIndexedAt por vault. Error fatal solo si los manifests no se pueden leer."
 ---
 ## Checks aplicados
 | Check | Condición | Severidad |
 |---|---|---|
 | `directory_missing` | `e.Path` no existe en disco | error |
 | `layout_missing` | no hay `data/` ni `knowledge/` en la raíz del vault | warning |
 | `non_standard_layout` | no hay `data/`/`knowledge/` pero sí otros subdirectorios (ej. imagegen_models) | warning |
 | `index_missing` | no existe `vault_index.db` | warning |
 | `index_stale` | algún archivo en disco tiene mtime > MAX(indexed_at) | warning |
 | `index_drift` | count disco != count en tabla `files` | warning |
 | `empty_vault` | DiskFiles == 0 | warning |
 ## Ejemplo
 ```go
 entries, err := infra.VaultDoctor("/home/lucas/fn_registry")
 for _, e := range entries {
    fmt.Printf("%-30s  %-8s  files=%d  issues=%v\n",
        e.VaultName, e.Status, e.DiskFiles, e.Issues)
 }
 ```
 ## Notas
 - Función read-only: nunca escribe en disco ni en ninguna base de datos.
 - `countDiskFiles` usa `filepath.WalkDir` sin hash (cheap) — excluye `vault_index.db*`, `.git/` y ficheros ocultos.
 - `isIndexStale` también usa WalkDir; compara mtime de archivos con MAX(indexed_at) de la BD.
 - El VaultIndexOpen de sólo lectura no crea el DB (si no existe, retorna error y se reporta `index_missing`).
@@ -0,0 +1,211 @@
 package infra
 import (
 	"os"
 	"path/filepath"
 	"testing"
 	"time"
 )
 // setupDoctorRepo creates a repo layout with one vault in a project manifest.
 // vaultPath must be an absolute path that already exists (or not, for missing tests).
 func setupDoctorRepo(t *testing.T, vaultName, projectID, vaultPath string) string {
 	t.Helper()
 	root := t.TempDir()
 	projVaultsDir := filepath.Join(root, "projects", projectID, "vaults")
 	if err := os.MkdirAll(projVaultsDir, 0755); err != nil {
 		t.Fatalf("mkdir projects: %v", err)
 	}
 	manifest := "vaults:\n  - name: " + vaultName + "\n    description: test vault\n    path: " + vaultPath + "\n    tags: []\n"
 	if err := os.WriteFile(filepath.Join(projVaultsDir, "vault.yaml"), []byte(manifest), 0644); err != nil {
 		t.Fatalf("write vault.yaml: %v", err)
 	}
 	return root
 }
 func TestVaultDoctor_OK(t *testing.T) {
 	vaultDir := t.TempDir()
 	// Proper layout
 	if err := os.MkdirAll(filepath.Join(vaultDir, "data", "raw"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.MkdirAll(filepath.Join(vaultDir, "knowledge"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	// Create a file with a past mtime so the index is not stale
 	samplePath := filepath.Join(vaultDir, "data", "raw", "sample.csv")
 	if err := os.WriteFile(samplePath, []byte("a,b\n1,2\n"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	pastTime := time.Now().Add(-1 * time.Hour)
 	if err := os.Chtimes(samplePath, pastTime, pastTime); err != nil {
 		t.Fatal(err)
 	}
 	// Create vault_index.db with the file indexed after its mtime
 	vdb, err := VaultIndexOpen(vaultDir)
 	if err != nil {
 		t.Fatalf("VaultIndexOpen: %v", err)
 	}
 	futureIndexed := time.Now().Unix() // indexed_at is now — after file mtime
 	_, err = vdb.Exec(`INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
 		"data/raw/sample.csv", 8, pastTime.Unix(), "deadbeef", "text/csv", ".csv", "data", "raw", futureIndexed)
 	if err != nil {
 		t.Fatalf("insert: %v", err)
 	}
 	vdb.Close()
 	root := setupDoctorRepo(t, "my_vault", "my_proj", vaultDir)
 	entries, err := VaultDoctor(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(entries) != 1 {
 		t.Fatalf("expected 1 entry, got %d", len(entries))
 	}
 	e := entries[0]
 	if e.Status != "ok" {
 		t.Errorf("Status: want ok, got %s (issues: %v)", e.Status, e.Issues)
 	}
 	if len(e.Issues) != 0 {
 		t.Errorf("Issues: want empty, got %v", e.Issues)
 	}
 	if e.DiskFiles != 1 {
 		t.Errorf("DiskFiles: want 1, got %d", e.DiskFiles)
 	}
 	if e.IndexedFiles != 1 {
 		t.Errorf("IndexedFiles: want 1, got %d", e.IndexedFiles)
 	}
 }
 func TestVaultDoctor_MissingDir(t *testing.T) {
 	missingPath := filepath.Join(t.TempDir(), "does_not_exist")
 	root := setupDoctorRepo(t, "missing_vault", "my_proj", missingPath)
 	entries, err := VaultDoctor(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(entries) != 1 {
 		t.Fatalf("expected 1 entry, got %d", len(entries))
 	}
 	e := entries[0]
 	if e.Status != "error" {
 		t.Errorf("Status: want error, got %s", e.Status)
 	}
 	found := false
 	for _, issue := range e.Issues {
 		if issue == "directory_missing" {
 			found = true
 		}
 	}
 	if !found {
 		t.Errorf("Expected directory_missing issue, got %v", e.Issues)
 	}
 }
 func TestVaultDoctor_NoIndex(t *testing.T) {
 	vaultDir := t.TempDir()
 	// Proper layout but no vault_index.db
 	if err := os.MkdirAll(filepath.Join(vaultDir, "data", "raw"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(vaultDir, "data", "raw", "a.csv"), []byte("x"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	root := setupDoctorRepo(t, "no_index_vault", "my_proj", vaultDir)
 	entries, err := VaultDoctor(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(entries) != 1 {
 		t.Fatalf("expected 1 entry, got %d", len(entries))
 	}
 	e := entries[0]
 	if e.Status != "warning" {
 		t.Errorf("Status: want warning, got %s", e.Status)
 	}
 	found := false
 	for _, issue := range e.Issues {
 		if issue == "index_missing" {
 			found = true
 		}
 	}
 	if !found {
 		t.Errorf("Expected index_missing issue, got %v", e.Issues)
 	}
 }
 func TestVaultDoctor_LayoutDrift(t *testing.T) {
 	vaultDir := t.TempDir()
 	// No data/ or knowledge/ — just a random file at root
 	if err := os.WriteFile(filepath.Join(vaultDir, "something.txt"), []byte("hi"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	root := setupDoctorRepo(t, "layout_vault", "my_proj", vaultDir)
 	entries, err := VaultDoctor(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(entries) != 1 {
 		t.Fatalf("expected 1 entry, got %d", len(entries))
 	}
 	e := entries[0]
 	if e.Status != "warning" {
 		t.Errorf("Status: want warning, got %s", e.Status)
 	}
 	foundLayout := false
 	for _, issue := range e.Issues {
 		if issue == "layout_missing" || issue == "non_standard_layout" {
 			foundLayout = true
 		}
 	}
 	if !foundLayout {
 		t.Errorf("Expected layout_missing or non_standard_layout, got %v", e.Issues)
 	}
 }
 func TestVaultDoctor_EmptyVault(t *testing.T) {
 	vaultDir := t.TempDir()
 	// data/ and knowledge/ exist but are empty
 	if err := os.MkdirAll(filepath.Join(vaultDir, "data"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.MkdirAll(filepath.Join(vaultDir, "knowledge"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	// Create vault_index.db (empty)
 	vdb, err := VaultIndexOpen(vaultDir)
 	if err != nil {
 		t.Fatalf("VaultIndexOpen: %v", err)
 	}
 	vdb.Close()
 	root := setupDoctorRepo(t, "empty_vault", "my_proj", vaultDir)
 	entries, err := VaultDoctor(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(entries) != 1 {
 		t.Fatalf("expected 1 entry, got %d", len(entries))
 	}
 	e := entries[0]
 	if e.Status != "warning" {
 		t.Errorf("Status: want warning, got %s (issues: %v)", e.Status, e.Issues)
 	}
 	found := false
 	for _, issue := range e.Issues {
 		if issue == "empty_vault" {
 			found = true
 		}
 	}
 	if !found {
 		t.Errorf("Expected empty_vault issue, got %v", e.Issues)
 	}
 }
@@ -0,0 +1,21 @@
 package infra
 // VaultFile describes a single file inside a vault directory.
 // It carries identity (vault + relative path), content metadata (size, mtime, sha256, mime)
 // and structural classification (bucket, sub-bucket).
 type VaultFile struct {
 	VaultID   string `json:"vault_id"`   // e.g. "turismo_spain_app_turismo"
 	VaultName string `json:"vault_name"` // e.g. "turismo_spain"
 	RelPath   string `json:"rel_path"`   // path relative to vault root, e.g. "data/raw/foo.csv"
 	Size      int64  `json:"size"`       // bytes
 	Mtime     int64  `json:"mtime"`      // unix seconds (UTC)
 	Sha256    string `json:"sha256"`     // hex lowercase
 	Mime      string `json:"mime"`       // e.g. "text/csv"
 	Ext       string `json:"ext"`        // e.g. ".csv"
 	// Bucket is the top-level classification: "data" or "knowledge".
 	Bucket string `json:"bucket"`
 	// SubBucket is the second-level directory within the bucket.
 	// Known values: raw, processed, exports (data); decisions, domains, models,
 	// benchmarks, test_documents (knowledge). Empty string for files at bucket root.
 	SubBucket string `json:"sub_bucket"`
 }
@@ -0,0 +1,49 @@
 CREATE TABLE IF NOT EXISTS files (
    rel_path    TEXT PRIMARY KEY,
    size        INTEGER NOT NULL,
    mtime       INTEGER NOT NULL,
    sha256      TEXT NOT NULL,
    mime        TEXT NOT NULL DEFAULT '',
    ext         TEXT NOT NULL DEFAULT '',
    bucket      TEXT NOT NULL DEFAULT '',
    sub_bucket  TEXT NOT NULL DEFAULT '',
    indexed_at  INTEGER NOT NULL
 );
 CREATE INDEX IF NOT EXISTS idx_files_sha256 ON files(sha256);
 CREATE INDEX IF NOT EXISTS idx_files_bucket ON files(bucket, sub_bucket);
 CREATE VIRTUAL TABLE IF NOT EXISTS files_fts USING fts5(
    rel_path,
    content_text,
    content='',
    tokenize='unicode61 remove_diacritics 2'
 );
 CREATE TABLE IF NOT EXISTS csv_profiles (
    rel_path    TEXT PRIMARY KEY,
    cols_json   TEXT NOT NULL,
    n_rows      INTEGER NOT NULL,
    encoding    TEXT NOT NULL DEFAULT '',
    date_min    TEXT,
    date_max    TEXT,
    profiled_at INTEGER NOT NULL,
    FOREIGN KEY (rel_path) REFERENCES files(rel_path) ON DELETE CASCADE
 );
 CREATE TABLE IF NOT EXISTS pdf_extracts (
    rel_path     TEXT PRIMARY KEY,
    page_count   INTEGER NOT NULL,
    text_len     INTEGER NOT NULL,
    extracted_to TEXT,
    extracted_at INTEGER NOT NULL,
    FOREIGN KEY (rel_path) REFERENCES files(rel_path) ON DELETE CASCADE
 );
 CREATE TABLE IF NOT EXISTS knowledge_docs (
    rel_path         TEXT PRIMARY KEY,
    title            TEXT NOT NULL DEFAULT '',
    frontmatter_json TEXT NOT NULL DEFAULT '{}',
    headings_json    TEXT NOT NULL DEFAULT '[]',
    parsed_at        INTEGER NOT NULL,
    FOREIGN KEY (rel_path) REFERENCES files(rel_path) ON DELETE CASCADE
 );
@@ -0,0 +1,30 @@
 package infra
 import (
 	"database/sql"
 	"embed"
 	"fmt"
 	"path/filepath"
 )
 //go:embed vault_index_migrations/*.sql
 var vaultIndexMigrationsFS embed.FS
 // VaultIndexOpen opens (or creates) the vault_index.db inside vaultPath.
 // It applies all embedded migrations idempotently and returns a ready-to-use
 // *sql.DB. The caller is responsible for closing the connection.
 //
 // The database is opened with WAL mode and foreign keys enabled via SQLiteOpen.
 // Migrations are applied from vault_index_migrations/*.sql in lexicographic order.
 func VaultIndexOpen(vaultPath string) (*sql.DB, error) {
 	dbPath := filepath.Join(vaultPath, "vault_index.db")
 	db, err := SQLiteOpen(dbPath, "")
 	if err != nil {
 		return nil, fmt.Errorf("vault_index_open: %w", err)
 	}
 	if err := ApplyMigrations(db, vaultIndexMigrationsFS, "vault_index_migrations/*.sql"); err != nil {
 		db.Close()
 		return nil, fmt.Errorf("vault_index_open: apply migrations: %w", err)
 	}
 	return db, nil
 }
@@ -0,0 +1,54 @@
 ---
 name: vault_index_open
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func VaultIndexOpen(vaultPath string) (*sql.DB, error)"
 description: "Abre (o crea) vault_index.db dentro de vaultPath con WAL + FK y aplica las migraciones embebidas idempotentemente. El caller cierra la conexion."
 tags: [vault, sqlite, index, migration, infra]
 uses_functions: ["sqlite_open_go_infra", "sqlite_apply_migrations_go_infra"]
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: [database/sql, embed, fmt, path/filepath]
 params:
  - name: vaultPath
    desc: "ruta absoluta o relativa al directorio raiz del vault"
 output: "*sql.DB apuntando a <vaultPath>/vault_index.db con schema completo aplicado; el caller es responsable de cerrar"
 tested: true
 tests:
  - "crea vault_index.db en tmpdir vacio"
  - "segunda apertura no falla (idempotente)"
  - "todas las tablas esperadas existen en sqlite_master"
  - "fts5 INSERT y MATCH funcionan"
 test_file_path: "functions/infra/vault_index_open_test.go"
 file_path: "functions/infra/vault_index_open.go"
 ---
 ## Ejemplo
 ```go
 db, err := VaultIndexOpen("/data/vaults/turismo_spain")
 if err != nil {
    log.Fatal(err)
 }
 defer db.Close()
 ```
 ## Notas
 El archivo de base de datos se crea en `<vaultPath>/vault_index.db`. Las migraciones
 viven en `vault_index_migrations/*.sql` embebidas via `//go:embed` en el mismo paquete.
 Schema creado por `001_init.sql`:
 - `files` — inventario de archivos (PK: rel_path)
 - `files_fts` — tabla FTS5 virtual para busqueda de texto (content_text lo llenan profilers posteriores)
 - `csv_profiles` — perfil de columnas/filas para .csv (FK → files)
 - `pdf_extracts` — metadatos de extraccion de texto para .pdf (FK → files)
 - `knowledge_docs` — headings/frontmatter para .md del bucket knowledge (FK → files)
 `SQLiteOpen` abre con WAL mode + foreign keys. `ApplyMigrations` es idempotente:
 los errores de "already exists" y "duplicate column" se ignoran silenciosamente.
@@ -0,0 +1,107 @@
 package infra
 import (
 	"database/sql"
 	"os"
 	"path/filepath"
 	"testing"
 )
 func TestVaultIndexOpen_CreatesDB(t *testing.T) {
 	t.Run("crea vault_index.db en tmpdir vacio", func(t *testing.T) {
 		dir := t.TempDir()
 		db, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatalf("VaultIndexOpen: %v", err)
 		}
 		defer db.Close()
 		dbPath := filepath.Join(dir, "vault_index.db")
 		if _, err := os.Stat(dbPath); os.IsNotExist(err) {
 			t.Fatalf("vault_index.db no fue creado en %s", dir)
 		}
 	})
 }
 func TestVaultIndexOpen_Idempotent(t *testing.T) {
 	t.Run("segunda apertura no falla (idempotente)", func(t *testing.T) {
 		dir := t.TempDir()
 		db1, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatalf("primera apertura: %v", err)
 		}
 		db1.Close()
 		db2, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatalf("segunda apertura: %v", err)
 		}
 		db2.Close()
 	})
 }
 func TestVaultIndexOpen_AppliesAllMigrations(t *testing.T) {
 	t.Run("todas las tablas esperadas existen en sqlite_master", func(t *testing.T) {
 		dir := t.TempDir()
 		db, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatalf("VaultIndexOpen: %v", err)
 		}
 		defer db.Close()
 		expectedTables := []string{
 			"files",
 			"csv_profiles",
 			"pdf_extracts",
 			"knowledge_docs",
 		}
 		for _, tbl := range expectedTables {
 			assertTableExists(t, db, tbl)
 		}
 	})
 }
 func TestVaultIndexOpen_FTS5Works(t *testing.T) {
 	t.Run("fts5 INSERT y MATCH funcionan", func(t *testing.T) {
 		dir := t.TempDir()
 		db, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatalf("VaultIndexOpen: %v", err)
 		}
 		defer db.Close()
 		// Insert a row into files_fts (content='' table, manual INSERT required)
 		_, err = db.Exec(`INSERT INTO files_fts(rel_path, content_text) VALUES (?, ?)`,
 			"data/raw/informe_ventas.csv", "ventas trimestrales empresa")
 		if err != nil {
 			t.Fatalf("INSERT files_fts: %v", err)
 		}
 		var count int
 		err = db.QueryRow(
 			`SELECT count(*) FROM files_fts WHERE files_fts MATCH 'ventas'`,
 		).Scan(&count)
 		if err != nil {
 			t.Fatalf("FTS MATCH query: %v", err)
 		}
 		if count != 1 {
 			t.Errorf("FTS MATCH: got %d rows, want 1", count)
 		}
 	})
 }
 // assertTableExists verifies that a table (or virtual table) exists in sqlite_master.
 func assertTableExists(t *testing.T, db *sql.DB, name string) {
 	t.Helper()
 	var exists int
 	err := db.QueryRow(
 		`SELECT count(*) FROM sqlite_master WHERE name = ?`, name,
 	).Scan(&exists)
 	if err != nil {
 		t.Fatalf("sqlite_master query for %q: %v", name, err)
 	}
 	if exists == 0 {
 		t.Errorf("table/vtable %q not found in sqlite_master", name)
 	}
 }
@@ -0,0 +1,154 @@
 package infra
 import (
 	"database/sql"
 	"fmt"
 	"strings"
 	"time"
 )
 // WriteReport summarises the outcome of a VaultIndexWrite call.
 type WriteReport struct {
 	Inserted int // rows newly inserted into files
 	Updated  int // rows updated (upserted) in files
 	Pruned   int // rows deleted from files (only when prune=true)
 	FTS      int // rows inserted into files_fts
 }
 // VaultIndexWrite upserts a slice of VaultFile into the vault_index.db opened
 // as db, updates the files_fts FTS5 table, and optionally prunes stale rows.
 //
 // All changes run inside a single transaction.
 //
 // Counting strategy: the set of rel_paths already in the DB is read before the
 // loop. An upsert is counted as Inserted if the rel_path was absent, Updated if
 // it was present. This avoids N+1 queries while remaining correct.
 //
 // FTS5: all affected rows are deleted and re-inserted with rel_path and empty
 // content_text. Downstream profilers (csv_profiles, pdf_extracts, knowledge_docs)
 // are responsible for populating content_text with meaningful text.
 //
 // Prune: if prune=true, every row in files whose rel_path is NOT in the provided
 // slice is deleted. Cascades to csv_profiles, pdf_extracts, knowledge_docs via FK.
 func VaultIndexWrite(db *sql.DB, files []VaultFile, prune bool) (WriteReport, error) {
 	var report WriteReport
 	if len(files) == 0 && !prune {
 		return report, nil
 	}
 	tx, err := db.Begin()
 	if err != nil {
 		return report, fmt.Errorf("vault_index_write: begin tx: %w", err)
 	}
 	defer func() {
 		if err != nil {
 			tx.Rollback() //nolint:errcheck
 		}
 	}()
 	// Load existing rel_paths into a set to distinguish insert vs update.
 	existing := make(map[string]struct{})
 	rows, err := tx.Query(`SELECT rel_path FROM files`)
 	if err != nil {
 		return report, fmt.Errorf("vault_index_write: query existing: %w", err)
 	}
 	for rows.Next() {
 		var rp string
 		if err := rows.Scan(&rp); err != nil {
 			rows.Close()
 			return report, fmt.Errorf("vault_index_write: scan existing: %w", err)
 		}
 		existing[rp] = struct{}{}
 	}
 	rows.Close()
 	if err := rows.Err(); err != nil {
 		return report, fmt.Errorf("vault_index_write: rows err: %w", err)
 	}
 	now := time.Now().Unix()
 	upsertStmt, err := tx.Prepare(`
 		INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
 		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
 		ON CONFLICT(rel_path) DO UPDATE SET
 			size       = excluded.size,
 			mtime      = excluded.mtime,
 			sha256     = excluded.sha256,
 			mime       = excluded.mime,
 			ext        = excluded.ext,
 			bucket     = excluded.bucket,
 			sub_bucket = excluded.sub_bucket,
 			indexed_at = excluded.indexed_at
 	`)
 	if err != nil {
 		return report, fmt.Errorf("vault_index_write: prepare upsert: %w", err)
 	}
 	defer upsertStmt.Close()
 	ftsDeleteStmt, err := tx.Prepare(`DELETE FROM files_fts WHERE rel_path = ?`)
 	if err != nil {
 		return report, fmt.Errorf("vault_index_write: prepare fts delete: %w", err)
 	}
 	defer ftsDeleteStmt.Close()
 	ftsInsertStmt, err := tx.Prepare(`INSERT INTO files_fts(rel_path, content_text) VALUES (?, '')`)
 	if err != nil {
 		return report, fmt.Errorf("vault_index_write: prepare fts insert: %w", err)
 	}
 	defer ftsInsertStmt.Close()
 	for _, f := range files {
 		_, err = upsertStmt.Exec(
 			f.RelPath, f.Size, f.Mtime, f.Sha256,
 			f.Mime, f.Ext, f.Bucket, f.SubBucket, now,
 		)
 		if err != nil {
 			return report, fmt.Errorf("vault_index_write: upsert %q: %w", f.RelPath, err)
 		}
 		if _, wasExisting := existing[f.RelPath]; wasExisting {
 			report.Updated++
 		} else {
 			report.Inserted++
 		}
 		// Refresh FTS row.
 		if _, err = ftsDeleteStmt.Exec(f.RelPath); err != nil {
 			return report, fmt.Errorf("vault_index_write: fts delete %q: %w", f.RelPath, err)
 		}
 		if _, err = ftsInsertStmt.Exec(f.RelPath); err != nil {
 			return report, fmt.Errorf("vault_index_write: fts insert %q: %w", f.RelPath, err)
 		}
 		report.FTS++
 	}
 	// Prune rows not present in the incoming slice.
 	if prune && len(files) > 0 {
 		keep := make([]string, len(files))
 		for i, f := range files {
 			keep[i] = "'" + strings.ReplaceAll(f.RelPath, "'", "''") + "'"
 		}
 		inClause := strings.Join(keep, ",")
 		res, err := tx.Exec(fmt.Sprintf(
 			`DELETE FROM files WHERE rel_path NOT IN (%s)`, inClause,
 		))
 		if err != nil {
 			return report, fmt.Errorf("vault_index_write: prune: %w", err)
 		}
 		n, _ := res.RowsAffected()
 		report.Pruned = int(n)
 	} else if prune && len(files) == 0 {
 		// prune=true with empty slice means delete everything.
 		res, err := tx.Exec(`DELETE FROM files`)
 		if err != nil {
 			return report, fmt.Errorf("vault_index_write: prune all: %w", err)
 		}
 		n, _ := res.RowsAffected()
 		report.Pruned = int(n)
 	}
 	if err = tx.Commit(); err != nil {
 		return report, fmt.Errorf("vault_index_write: commit: %w", err)
 	}
 	return report, nil
 }
@@ -0,0 +1,84 @@
 ---
 name: vault_index_write
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func VaultIndexWrite(db *sql.DB, files []VaultFile, prune bool) (WriteReport, error)"
 description: "Upserta un slice de VaultFile en vault_index.db (tabla files + FTS5 files_fts) dentro de una sola transaccion. Cuenta Inserted/Updated/FTS. Con prune=true elimina filas no presentes en el slice."
 tags: [vault, sqlite, index, write, upsert, fts, infra]
 uses_functions: []
 uses_types: ["vault_file_go_infra"]
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: [database/sql, fmt, strings, time]
 params:
  - name: db
    desc: "*sql.DB abierto sobre vault_index.db (tipicamente retornado por VaultIndexOpen)"
  - name: files
    desc: "slice de VaultFile a insertar/actualizar; puede ser vacio"
  - name: prune
    desc: "si true, elimina de 'files' todas las filas cuyo rel_path no este en el slice (sincronizacion destructiva)"
 output: "WriteReport con conteos Inserted/Updated/Pruned/FTS; error si falla la transaccion"
 tested: true
 tests:
  - "N archivos nuevos — Inserted=N"
  - "re-escritura con mtime distinto — Updated=N"
  - "prune elimina filas ausentes"
  - "sin prune, filas previas persisten"
  - "FTS5 MATCH funciona tras escritura"
 test_file_path: "functions/infra/vault_index_write_test.go"
 file_path: "functions/infra/vault_index_write.go"
 ---
 ## Ejemplo
 ```go
 db, _ := VaultIndexOpen("/data/vaults/turismo")
 defer db.Close()
 files, _ := VaultInventoryScan("/data/vaults/turismo", "turismo_v1", "turismo")
 report, err := VaultIndexWrite(db, files, true)
 if err != nil {
    log.Fatal(err)
 }
 fmt.Printf("inserted=%d updated=%d pruned=%d fts=%d\n",
    report.Inserted, report.Updated, report.Pruned, report.FTS)
 ```
 ## Notas
 ### WriteReport
 Struct local al paquete infra:
 ```go
 type WriteReport struct {
    Inserted int
    Updated  int
    Pruned   int
    FTS      int
 }
 ```
 ### Estrategia de conteo Inserted vs Updated
 Se carga el conjunto de rel_paths existentes en un map antes del loop. Un upsert
 se clasifica como Inserted si el rel_path no estaba en el map, Updated si estaba.
 Esto evita N+1 SELECTs y es correcto porque la transaccion serializa los cambios.
 ### FTS5
 `files_fts` usa `content=''` (tabla de contenido externo vacio). Para cada archivo
 se borra la fila FTS existente y se reinserta con `content_text=''`. Los profilers
 posteriores (csv_profiles, knowledge_docs) son responsables de actualizar
 `content_text` con texto indexable real.
 ### Prune
 Con `prune=true` se construye un IN clause con los rel_paths del slice. La FK con
 `ON DELETE CASCADE` propaga el DELETE a csv_profiles, pdf_extracts y knowledge_docs
 automaticamente. Con slice vacio + prune=true se borra todo (DELETE FROM files).
 ### Escapado SQL
 El IN clause se construye escapando las comillas simples en rel_path (duplicandolas).
 Evita inyeccion en rutas con apostrofos. Para entornos con rutas controladas
 (interior de vaults sin apostrofos) esto es suficiente; para entornos adversariales
 usar parametros binding con VALUES multiples via prepared statement.
@@ -0,0 +1,210 @@
 package infra
 import (
 	"testing"
 	"time"
 )
 // makeTestVaultFile creates a minimal VaultFile for testing.
 func makeTestVaultFile(relPath, mime, bucket, subBucket string) VaultFile {
 	return VaultFile{
 		VaultID:   "test_vault",
 		VaultName: "test",
 		RelPath:   relPath,
 		Size:      100,
 		Mtime:     time.Now().Unix(),
 		Sha256:    "abc123def456abc123def456abc123def456abc123def456abc123def456abc1",
 		Mime:      mime,
 		Ext:       ".csv",
 		Bucket:    bucket,
 		SubBucket: subBucket,
 	}
 }
 func openInMemoryVaultIndex(t *testing.T) interface{ Close() error } {
 	t.Helper()
 	dir := t.TempDir()
 	db, err := VaultIndexOpen(dir)
 	if err != nil {
 		t.Fatalf("VaultIndexOpen: %v", err)
 	}
 	return db
 }
 func TestVaultIndexWrite_FreshInsert(t *testing.T) {
 	t.Run("N archivos nuevos — Inserted=N", func(t *testing.T) {
 		dir := t.TempDir()
 		db, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatal(err)
 		}
 		defer db.Close()
 		files := []VaultFile{
 			makeTestVaultFile("data/raw/a.csv", "text/csv", "data", "raw"),
 			makeTestVaultFile("data/raw/b.csv", "text/csv", "data", "raw"),
 			makeTestVaultFile("knowledge/decisions/x.md", "text/markdown", "knowledge", "decisions"),
 		}
 		report, err := VaultIndexWrite(db, files, false)
 		if err != nil {
 			t.Fatalf("VaultIndexWrite: %v", err)
 		}
 		if report.Inserted != 3 {
 			t.Errorf("Inserted = %d, want 3", report.Inserted)
 		}
 		if report.Updated != 0 {
 			t.Errorf("Updated = %d, want 0", report.Updated)
 		}
 		if report.Pruned != 0 {
 			t.Errorf("Pruned = %d, want 0", report.Pruned)
 		}
 		if report.FTS != 3 {
 			t.Errorf("FTS = %d, want 3", report.FTS)
 		}
 	})
 }
 func TestVaultIndexWrite_Upsert(t *testing.T) {
 	t.Run("re-escritura con mtime distinto — Updated=N", func(t *testing.T) {
 		dir := t.TempDir()
 		db, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatal(err)
 		}
 		defer db.Close()
 		files := []VaultFile{
 			makeTestVaultFile("data/raw/a.csv", "text/csv", "data", "raw"),
 			makeTestVaultFile("data/raw/b.csv", "text/csv", "data", "raw"),
 		}
 		if _, err := VaultIndexWrite(db, files, false); err != nil {
 			t.Fatalf("first write: %v", err)
 		}
 		// Modify mtime to simulate file change.
 		files[0].Mtime = time.Now().Unix() + 100
 		files[1].Mtime = time.Now().Unix() + 200
 		report, err := VaultIndexWrite(db, files, false)
 		if err != nil {
 			t.Fatalf("second write: %v", err)
 		}
 		if report.Inserted != 0 {
 			t.Errorf("Inserted = %d, want 0", report.Inserted)
 		}
 		if report.Updated != 2 {
 			t.Errorf("Updated = %d, want 2", report.Updated)
 		}
 	})
 }
 func TestVaultIndexWrite_Prune(t *testing.T) {
 	t.Run("prune elimina filas ausentes", func(t *testing.T) {
 		dir := t.TempDir()
 		db, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatal(err)
 		}
 		defer db.Close()
 		// Write A and B.
 		ab := []VaultFile{
 			makeTestVaultFile("data/raw/a.csv", "text/csv", "data", "raw"),
 			makeTestVaultFile("data/raw/b.csv", "text/csv", "data", "raw"),
 		}
 		if _, err := VaultIndexWrite(db, ab, false); err != nil {
 			t.Fatalf("first write: %v", err)
 		}
 		// Write only A with prune=true — B should be deleted.
 		onlyA := []VaultFile{ab[0]}
 		report, err := VaultIndexWrite(db, onlyA, true)
 		if err != nil {
 			t.Fatalf("prune write: %v", err)
 		}
 		if report.Pruned != 1 {
 			t.Errorf("Pruned = %d, want 1", report.Pruned)
 		}
 		// Verify B is gone.
 		var count int
 		err = db.QueryRow(`SELECT count(*) FROM files WHERE rel_path = 'data/raw/b.csv'`).Scan(&count)
 		if err != nil {
 			t.Fatalf("query: %v", err)
 		}
 		if count != 0 {
 			t.Errorf("b.csv still present after prune")
 		}
 	})
 }
 func TestVaultIndexWrite_NoPrune(t *testing.T) {
 	t.Run("sin prune, filas previas persisten", func(t *testing.T) {
 		dir := t.TempDir()
 		db, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatal(err)
 		}
 		defer db.Close()
 		ab := []VaultFile{
 			makeTestVaultFile("data/raw/a.csv", "text/csv", "data", "raw"),
 			makeTestVaultFile("data/raw/b.csv", "text/csv", "data", "raw"),
 		}
 		if _, err := VaultIndexWrite(db, ab, false); err != nil {
 			t.Fatalf("first write: %v", err)
 		}
 		// Write only A without prune — B must remain.
 		onlyA := []VaultFile{ab[0]}
 		report, err := VaultIndexWrite(db, onlyA, false)
 		if err != nil {
 			t.Fatalf("second write: %v", err)
 		}
 		if report.Pruned != 0 {
 			t.Errorf("Pruned = %d, want 0", report.Pruned)
 		}
 		var count int
 		err = db.QueryRow(`SELECT count(*) FROM files`).Scan(&count)
 		if err != nil {
 			t.Fatalf("query: %v", err)
 		}
 		if count != 2 {
 			t.Errorf("files count = %d, want 2", count)
 		}
 	})
 }
 func TestVaultIndexWrite_FTSMatch(t *testing.T) {
 	t.Run("FTS5 MATCH funciona tras escritura", func(t *testing.T) {
 		dir := t.TempDir()
 		db, err := VaultIndexOpen(dir)
 		if err != nil {
 			t.Fatal(err)
 		}
 		defer db.Close()
 		files := []VaultFile{
 			makeTestVaultFile("data/raw/foo_report.csv", "text/csv", "data", "raw"),
 			makeTestVaultFile("data/raw/bar_data.csv", "text/csv", "data", "raw"),
 		}
 		if _, err := VaultIndexWrite(db, files, false); err != nil {
 			t.Fatalf("write: %v", err)
 		}
 		// FTS5 on rel_path column: MATCH 'foo*'
 		var count int
 		err = db.QueryRow(
 			`SELECT count(*) FROM files_fts WHERE files_fts MATCH 'rel_path:foo*'`,
 		).Scan(&count)
 		if err != nil {
 			t.Fatalf("FTS MATCH query: %v", err)
 		}
 		if count != 1 {
 			t.Errorf("FTS MATCH rel_path:foo* = %d rows, want 1", count)
 		}
 	})
 }
@@ -0,0 +1,174 @@
 package infra
 import (
 	"crypto/sha256"
 	"encoding/hex"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 )
 // VaultInventoryScan walks vaultPath and returns a VaultFile slice (sorted by RelPath)
 // for every regular file found, skipping:
 //   - vault_index.db, vault_index.db-shm, vault_index.db-wal
 //   - .git/ directories at any depth
 //   - hidden files/dirs (names starting with ".") at the vault root level only
 //
 // For each file it computes: relative path (forward slashes), size, mtime (unix UTC),
 // sha256 (streaming, hex lowercase), MIME type, extension, bucket and sub-bucket.
 //
 // MIME detection priority:
 //  1. Extension override: .csv → text/csv, .md → text/markdown, .parquet → application/parquet
 //  2. http.DetectContentType on first 512 bytes (magic bytes, stdlib)
 //
 // NOTE: file_validate_type_go_infra (FileValidateType) was not used here because its
 // signature requires an allowedTypes allowlist and returns (mime, bool) — it is designed
 // for upload validation, not for open-ended inventory scanning where any MIME is valid.
 // http.DetectContentType provides the same magic-byte detection without the allowlist
 // coupling and handles a broader set of formats including text/plain for CSV fallback.
 func VaultInventoryScan(vaultPath, vaultID, vaultName string) ([]VaultFile, error) {
 	var files []VaultFile
 	err := filepath.WalkDir(vaultPath, func(path string, d os.DirEntry, err error) error {
 		if err != nil {
 			return err
 		}
 		name := d.Name()
 		// Skip .git directories at any depth.
 		if d.IsDir() && name == ".git" {
 			return filepath.SkipDir
 		}
 		// Skip hidden entries (names starting with ".") at vault root only.
 		if strings.HasPrefix(name, ".") {
 			rel, relErr := filepath.Rel(vaultPath, path)
 			if relErr == nil {
 				// At root level the relative path has no separator.
 				if !strings.Contains(filepath.ToSlash(rel), "/") {
 					if d.IsDir() {
 						return filepath.SkipDir
 					}
 					return nil
 				}
 			}
 		}
 		if d.IsDir() {
 			return nil
 		}
 		// Skip vault_index.db and its WAL/SHM sidecar files.
 		if name == "vault_index.db" || name == "vault_index.db-shm" || name == "vault_index.db-wal" {
 			return nil
 		}
 		rel, err := filepath.Rel(vaultPath, path)
 		if err != nil {
 			return fmt.Errorf("vault_inventory_scan: rel path for %q: %w", path, err)
 		}
 		rel = filepath.ToSlash(rel)
 		info, err := d.Info()
 		if err != nil {
 			return fmt.Errorf("vault_inventory_scan: stat %q: %w", path, err)
 		}
 		// Compute sha256 by streaming — avoids loading large files into memory.
 		sha, err := fileSha256(path)
 		if err != nil {
 			return fmt.Errorf("vault_inventory_scan: sha256 %q: %w", path, err)
 		}
 		mime, err := detectVaultFileMime(path, name)
 		if err != nil {
 			return fmt.Errorf("vault_inventory_scan: mime %q: %w", path, err)
 		}
 		ext := strings.ToLower(filepath.Ext(name))
 		bucket, subBucket := vaultBucketParts(rel)
 		files = append(files, VaultFile{
 			VaultID:   vaultID,
 			VaultName: vaultName,
 			RelPath:   rel,
 			Size:      info.Size(),
 			Mtime:     info.ModTime().UTC().Unix(),
 			Sha256:    sha,
 			Mime:      mime,
 			Ext:       ext,
 			Bucket:    bucket,
 			SubBucket: subBucket,
 		})
 		return nil
 	})
 	if err != nil {
 		return nil, fmt.Errorf("vault_inventory_scan: walk %q: %w", vaultPath, err)
 	}
 	sort.Slice(files, func(i, j int) bool {
 		return files[i].RelPath < files[j].RelPath
 	})
 	return files, nil
 }
 // fileSha256 computes the hex-lowercase SHA-256 of the file at path by streaming.
 func fileSha256(path string) (string, error) {
 	f, err := os.Open(path)
 	if err != nil {
 		return "", err
 	}
 	defer f.Close()
 	h := sha256.New()
 	if _, err := io.Copy(h, f); err != nil {
 		return "", err
 	}
 	return hex.EncodeToString(h.Sum(nil)), nil
 }
 // detectVaultFileMime returns the MIME type for a vault file.
 // Extension overrides take priority; otherwise http.DetectContentType is used.
 func detectVaultFileMime(path, name string) (string, error) {
 	ext := strings.ToLower(filepath.Ext(name))
 	switch ext {
 	case ".csv":
 		return "text/csv", nil
 	case ".md":
 		return "text/markdown", nil
 	case ".parquet":
 		return "application/parquet", nil
 	}
 	f, err := os.Open(path)
 	if err != nil {
 		return "", err
 	}
 	defer f.Close()
 	buf := make([]byte, 512)
 	n, err := f.Read(buf)
 	if err != nil && err != io.EOF {
 		return "", err
 	}
 	return http.DetectContentType(buf[:n]), nil
 }
 // vaultBucketParts extracts the top-level bucket ("data" or "knowledge") and
 // the second-level sub-bucket from a forward-slash relative path.
 // Returns empty strings for files at vault root or with no recognisable bucket.
 func vaultBucketParts(relPath string) (bucket, subBucket string) {
 	parts := strings.SplitN(relPath, "/", 3)
 	if len(parts) < 1 {
 		return "", ""
 	}
 	bucket = parts[0]
 	if len(parts) >= 2 {
 		subBucket = parts[1]
 	}
 	return bucket, subBucket
 }
@@ -0,0 +1,74 @@
 ---
 name: vault_inventory_scan
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func VaultInventoryScan(vaultPath, vaultID, vaultName string) ([]VaultFile, error)"
 description: "Recorre vaultPath con filepath.WalkDir y retorna un slice de VaultFile ordenado por RelPath para cada archivo regular, computando sha256 por streaming, MIME por extension/magic y bucket/sub-bucket por posicion en el arbol."
 tags: [vault, inventory, scan, filesystem, sha256, mime, infra]
 uses_functions: []
 uses_types: ["vault_file_go_infra"]
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: [crypto/sha256, encoding/hex, fmt, io, net/http, os, path/filepath, sort, strings]
 params:
  - name: vaultPath
    desc: "ruta absoluta o relativa al directorio raiz del vault"
  - name: vaultID
    desc: "identificador del vault (ej: turismo_spain_app_turismo) — se copia a cada VaultFile"
  - name: vaultName
    desc: "nombre legible del vault (ej: turismo_spain) — se copia a cada VaultFile"
 output: "slice de VaultFile ordenado lexicograficamente por RelPath; slice vacio (no nil) si el vault esta vacio"
 tested: true
 tests:
  - "tmpdir vacio retorna slice vacio"
  - "data layout — bucket y sub_bucket correctos"
  - "knowledge layout — bucket y sub_bucket correctos"
  - "omite vault_index.db y .git"
  - "sha256 determinista para mismo contenido"
  - "orden lexicografico del resultado"
 test_file_path: "functions/infra/vault_inventory_scan_test.go"
 file_path: "functions/infra/vault_inventory_scan.go"
 ---
 ## Ejemplo
 ```go
 files, err := VaultInventoryScan("/data/vaults/turismo_spain", "turismo_spain_v1", "turismo_spain")
 if err != nil {
    log.Fatal(err)
 }
 for _, f := range files {
    fmt.Printf("%s  %s  %s/%s\n", f.RelPath, f.Mime, f.Bucket, f.SubBucket)
 }
 ```
 ## Notas
 ### Archivos omitidos
 - `vault_index.db`, `vault_index.db-shm`, `vault_index.db-wal` (siempre)
 - `.git/` en cualquier profundidad (SkipDir)
 - Entradas cuyo nombre empieza por `.` solo en la raiz del vault (nivel 0)
 ### Deteccion de MIME
 `file_validate_type_go_infra` (FileValidateType) no se usa porque su firma
 requiere una lista blanca de tipos permitidos y retorna (mime, bool) — esta
 disenada para validacion de uploads, no para escaneo inventarial donde
 cualquier MIME es valido. Se usan en su lugar:
 1. Override por extension (prioridad alta): `.csv` → `text/csv`, `.md` → `text/markdown`,
   `.parquet` → `application/parquet`. Necesario porque `http.DetectContentType`
   clasifica CSV como `text/plain` y no conoce Parquet.
 2. `http.DetectContentType` sobre primeros 512 bytes (magic bytes, stdlib) para el resto.
 ### SHA-256
 Calculado por streaming con `io.Copy` a `sha256.New()` — no carga el archivo completo
 a memoria. Valido para archivos de cualquier tamano.
 ### Bucket / SubBucket
 Derivados de la posicion en el arbol:
 - `bucket` = primer segmento del RelPath (tipicamente "data" o "knowledge")
 - `subBucket` = segundo segmento si existe; vacio si el archivo esta en la raiz del bucket
@@ -0,0 +1,182 @@
 package infra
 import (
 	"os"
 	"path/filepath"
 	"testing"
 )
 func writeTestFile(t *testing.T, dir, rel, content string) {
 	t.Helper()
 	full := filepath.Join(dir, filepath.FromSlash(rel))
 	if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
 		t.Fatalf("mkdir %s: %v", filepath.Dir(full), err)
 	}
 	if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
 		t.Fatalf("write %s: %v", full, err)
 	}
 }
 func TestVaultInventoryScan_Empty(t *testing.T) {
 	t.Run("tmpdir vacio retorna slice vacio", func(t *testing.T) {
 		dir := t.TempDir()
 		files, err := VaultInventoryScan(dir, "v1", "test")
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 		if len(files) != 0 {
 			t.Errorf("expected 0 files, got %d", len(files))
 		}
 	})
 }
 func TestVaultInventoryScan_DataLayout(t *testing.T) {
 	t.Run("data layout — bucket y sub_bucket correctos", func(t *testing.T) {
 		dir := t.TempDir()
 		writeTestFile(t, dir, "data/raw/a.csv", "col1,col2\n1,2\n")
 		writeTestFile(t, dir, "data/processed/b.parquet", "PAR1fakedata")
 		files, err := VaultInventoryScan(dir, "vid", "vname")
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 		if len(files) != 2 {
 			t.Fatalf("expected 2 files, got %d", len(files))
 		}
 		// files are sorted: data/processed/b.parquet < data/raw/a.csv
 		b := files[0]
 		if b.RelPath != "data/processed/b.parquet" {
 			t.Errorf("files[0].RelPath = %q, want data/processed/b.parquet", b.RelPath)
 		}
 		if b.Bucket != "data" {
 			t.Errorf("files[0].Bucket = %q, want data", b.Bucket)
 		}
 		if b.SubBucket != "processed" {
 			t.Errorf("files[0].SubBucket = %q, want processed", b.SubBucket)
 		}
 		if b.Mime != "application/parquet" {
 			t.Errorf("files[0].Mime = %q, want application/parquet", b.Mime)
 		}
 		if b.Ext != ".parquet" {
 			t.Errorf("files[0].Ext = %q, want .parquet", b.Ext)
 		}
 		if b.VaultID != "vid" {
 			t.Errorf("VaultID = %q, want vid", b.VaultID)
 		}
 		a := files[1]
 		if a.RelPath != "data/raw/a.csv" {
 			t.Errorf("files[1].RelPath = %q, want data/raw/a.csv", a.RelPath)
 		}
 		if a.Mime != "text/csv" {
 			t.Errorf("files[1].Mime = %q, want text/csv", a.Mime)
 		}
 		if a.Bucket != "data" || a.SubBucket != "raw" {
 			t.Errorf("files[1]: bucket=%q subBucket=%q, want data/raw", a.Bucket, a.SubBucket)
 		}
 	})
 }
 func TestVaultInventoryScan_KnowledgeLayout(t *testing.T) {
 	t.Run("knowledge layout — bucket y sub_bucket correctos", func(t *testing.T) {
 		dir := t.TempDir()
 		writeTestFile(t, dir, "knowledge/decisions/x.md", "# Decision\n\ncontent")
 		files, err := VaultInventoryScan(dir, "vid", "vname")
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 		if len(files) != 1 {
 			t.Fatalf("expected 1 file, got %d", len(files))
 		}
 		f := files[0]
 		if f.RelPath != "knowledge/decisions/x.md" {
 			t.Errorf("RelPath = %q", f.RelPath)
 		}
 		if f.Bucket != "knowledge" {
 			t.Errorf("Bucket = %q, want knowledge", f.Bucket)
 		}
 		if f.SubBucket != "decisions" {
 			t.Errorf("SubBucket = %q, want decisions", f.SubBucket)
 		}
 		if f.Mime != "text/markdown" {
 			t.Errorf("Mime = %q, want text/markdown", f.Mime)
 		}
 	})
 }
 func TestVaultInventoryScan_SkipsIndexAndGit(t *testing.T) {
 	t.Run("omite vault_index.db y .git", func(t *testing.T) {
 		dir := t.TempDir()
 		writeTestFile(t, dir, "vault_index.db", "sqlite data")
 		writeTestFile(t, dir, "vault_index.db-wal", "wal data")
 		writeTestFile(t, dir, ".git/HEAD", "ref: refs/heads/master")
 		writeTestFile(t, dir, "data/raw/real.csv", "a,b\n1,2\n")
 		files, err := VaultInventoryScan(dir, "vid", "vname")
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 		if len(files) != 1 {
 			t.Fatalf("expected 1 file (real.csv), got %d: %v", len(files), relPaths(files))
 		}
 		if files[0].RelPath != "data/raw/real.csv" {
 			t.Errorf("unexpected file: %q", files[0].RelPath)
 		}
 	})
 }
 func TestVaultInventoryScan_Sha256Deterministic(t *testing.T) {
 	t.Run("sha256 determinista para mismo contenido", func(t *testing.T) {
 		dir1 := t.TempDir()
 		dir2 := t.TempDir()
 		content := "deterministic content 123\n"
 		writeTestFile(t, dir1, "data/raw/f.csv", content)
 		writeTestFile(t, dir2, "data/raw/f.csv", content)
 		files1, err := VaultInventoryScan(dir1, "v1", "vault1")
 		if err != nil {
 			t.Fatal(err)
 		}
 		files2, err := VaultInventoryScan(dir2, "v2", "vault2")
 		if err != nil {
 			t.Fatal(err)
 		}
 		if files1[0].Sha256 != files2[0].Sha256 {
 			t.Errorf("sha256 mismatch: %q vs %q", files1[0].Sha256, files2[0].Sha256)
 		}
 		if len(files1[0].Sha256) != 64 {
 			t.Errorf("sha256 length = %d, want 64", len(files1[0].Sha256))
 		}
 	})
 }
 func TestVaultInventoryScan_Sorted(t *testing.T) {
 	t.Run("orden lexicografico del resultado", func(t *testing.T) {
 		dir := t.TempDir()
 		writeTestFile(t, dir, "knowledge/decisions/z.md", "z")
 		writeTestFile(t, dir, "data/raw/a.csv", "a")
 		writeTestFile(t, dir, "data/processed/m.parquet", "m")
 		writeTestFile(t, dir, "knowledge/domains/b.md", "b")
 		files, err := VaultInventoryScan(dir, "v", "v")
 		if err != nil {
 			t.Fatal(err)
 		}
 		for i := 1; i < len(files); i++ {
 			if files[i].RelPath < files[i-1].RelPath {
 				t.Errorf("not sorted at index %d: %q < %q", i, files[i].RelPath, files[i-1].RelPath)
 			}
 		}
 	})
 }
 // relPaths is a helper for test error messages.
 func relPaths(files []VaultFile) []string {
 	out := make([]string, len(files))
 	for i, f := range files {
 		out[i] = f.RelPath
 	}
 	return out
 }
@@ -0,0 +1,252 @@
 package infra
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 )
 // LayoutReport describes what VaultLayoutEnsure did (or would do) to a vault directory.
 type LayoutReport struct {
 	VaultPath string   `json:"vault_path"`
 	Created   []string `json:"created"`    // dirs created (relative paths)
 	Migrated  []string `json:"migrated"`   // renames executed, format "src -> dst" (relative)
 	AlreadyOK []string `json:"already_ok"` // dirs that already existed at the target location
 	Skipped   []string `json:"skipped"`    // unrecognized root-level entries, left untouched
 	DryRun    bool     `json:"dry_run"`
 }
 // dataBuckets are root-level directories that belong under data/.
 var dataBuckets = []string{"raw", "processed", "exports"}
 // knowledgeBuckets are root-level directories that belong under knowledge/.
 var knowledgeBuckets = []string{"decisions", "domains", "models", "benchmarks", "test_documents"}
 // knownRootFiles are root-level files that should be moved to knowledge/.
 var knownRootFiles = []string{"README.md", "README.txt"}
 // VaultLayoutEnsure ensures a vault directory uses the canonical hybrid layout:
 //
 //	data/{raw,processed,exports}
 //	knowledge/{decisions,domains,models,benchmarks,test_documents}
 //
 // Legacy vaults that have these directories at the root are migrated by renaming
 // (or merging when both src and dst already exist). The operation is idempotent:
 // a second run returns everything in AlreadyOK.
 //
 // When dryRun is true the function computes the report but does not touch the disk.
 func VaultLayoutEnsure(vaultPath string, dryRun bool) (LayoutReport, error) {
 	report := LayoutReport{DryRun: dryRun}
 	// --- resolve path ---
 	vaultPath = strings.TrimRight(vaultPath, "/\\")
 	var err error
 	vaultPath, err = filepath.Abs(vaultPath)
 	if err != nil {
 		return report, fmt.Errorf("vault_layout_ensure: abs(%q): %w", vaultPath, err)
 	}
 	// Follow symlinks for the vault root itself.
 	resolved, err := filepath.EvalSymlinks(vaultPath)
 	if err != nil {
 		return report, fmt.Errorf("vault_layout_ensure: eval symlinks %q: %w", vaultPath, err)
 	}
 	vaultPath = resolved
 	report.VaultPath = vaultPath
 	// --- check that vault exists and is a directory ---
 	info, err := os.Stat(vaultPath)
 	if err != nil {
 		return report, fmt.Errorf("vault_layout_ensure: stat %q: %w", vaultPath, err)
 	}
 	if !info.IsDir() {
 		return report, fmt.Errorf("vault_layout_ensure: %q is not a directory", vaultPath)
 	}
 	// --- ensure top-level containers ---
 	for _, container := range []string{"data", "knowledge"} {
 		dst := filepath.Join(vaultPath, container)
 		if err := ensureDir(dst, dryRun, container, &report); err != nil {
 			return report, err
 		}
 	}
 	// --- build migration table: root name -> relative destination ---
 	type migration struct {
 		rootName string // name in vault root (dir or file)
 		dstRel   string // relative destination path inside vault
 		isFile   bool
 	}
 	var migrations []migration
 	for _, b := range dataBuckets {
 		migrations = append(migrations, migration{rootName: b, dstRel: filepath.Join("data", b)})
 	}
 	for _, b := range knowledgeBuckets {
 		migrations = append(migrations, migration{rootName: b, dstRel: filepath.Join("knowledge", b)})
 	}
 	for _, rf := range knownRootFiles {
 		migrations = append(migrations, migration{rootName: rf, dstRel: filepath.Join("knowledge", "README.md"), isFile: true})
 	}
 	// Track which root names are "known" so we can compute Skipped.
 	knownNames := make(map[string]struct{})
 	for _, m := range migrations {
 		knownNames[strings.ToLower(m.rootName)] = struct{}{}
 	}
 	knownNames["data"] = struct{}{}
 	knownNames["knowledge"] = struct{}{}
 	// --- apply migrations ---
 	for _, m := range migrations {
 		src := filepath.Join(vaultPath, m.rootName)
 		dst := filepath.Join(vaultPath, m.dstRel)
 		srcRel := m.rootName
 		dstRel := m.dstRel
 		srcExists := pathExists(src)
 		dstExists := pathExists(dst)
 		switch {
 		case srcExists && dstExists:
 			// Both exist: merge if directory, error on file collision.
 			if m.isFile {
 				return report, fmt.Errorf("vault_layout_ensure: conflict: both %q and %q exist", srcRel, dstRel)
 			}
 			if err := mergeDirs(src, dst, srcRel, dstRel, dryRun, &report); err != nil {
 				return report, err
 			}
 		case srcExists && !dstExists:
 			// Only source exists: rename.
 			report.Migrated = append(report.Migrated, fmt.Sprintf("%s -> %s", srcRel, dstRel))
 			if !dryRun {
 				if err := os.Rename(src, dst); err != nil {
 					return report, fmt.Errorf("vault_layout_ensure: rename %q -> %q: %w", src, dst, err)
 				}
 			}
 		case !srcExists && dstExists:
 			// Already migrated.
 			report.AlreadyOK = append(report.AlreadyOK, dstRel)
 		default:
 			// Neither exists: create empty destination directory (skip for files).
 			if !m.isFile {
 				report.Created = append(report.Created, dstRel)
 				if !dryRun {
 					if err := os.MkdirAll(dst, 0o755); err != nil {
 						return report, fmt.Errorf("vault_layout_ensure: mkdir %q: %w", dst, err)
 					}
 				}
 			}
 		}
 	}
 	// --- collect skipped (unrecognized root entries) ---
 	entries, err := os.ReadDir(vaultPath)
 	if err != nil {
 		return report, fmt.Errorf("vault_layout_ensure: readdir %q: %w", vaultPath, err)
 	}
 	for _, e := range entries {
 		if _, known := knownNames[strings.ToLower(e.Name())]; !known {
 			report.Skipped = append(report.Skipped, e.Name())
 		}
 	}
 	return report, nil
 }
 // ensureDir adds the dir to Created (and creates it) if it doesn't exist,
 // or to AlreadyOK if it does. Used for top-level containers "data" and "knowledge".
 func ensureDir(path string, dryRun bool, rel string, report *LayoutReport) error {
 	if pathExists(path) {
 		report.AlreadyOK = append(report.AlreadyOK, rel)
 		return nil
 	}
 	report.Created = append(report.Created, rel)
 	if dryRun {
 		return nil
 	}
 	if err := os.MkdirAll(path, 0o755); err != nil {
 		return fmt.Errorf("vault_layout_ensure: mkdir %q: %w", path, err)
 	}
 	return nil
 }
 // mergeDirs moves the contents of src into dst, then removes src if empty.
 // Returns an error if any file in src already exists in dst (no overwrite policy).
 func mergeDirs(src, dst, srcRel, dstRel string, dryRun bool, report *LayoutReport) error {
 	children, err := os.ReadDir(src)
 	if err != nil {
 		return fmt.Errorf("vault_layout_ensure: readdir %q: %w", src, err)
 	}
 	for _, child := range children {
 		childDst := filepath.Join(dst, child.Name())
 		if pathExists(childDst) {
 			return fmt.Errorf("vault_layout_ensure: merge conflict: %q already exists in %q (cannot overwrite %q)",
 				child.Name(), dstRel, filepath.Join(srcRel, child.Name()))
 		}
 		childSrc := filepath.Join(src, child.Name())
 		childSrcRel := filepath.Join(srcRel, child.Name())
 		childDstRel := filepath.Join(dstRel, child.Name())
 		report.Migrated = append(report.Migrated, fmt.Sprintf("%s -> %s", childSrcRel, childDstRel))
 		if !dryRun {
 			if err := os.Rename(childSrc, childDst); err != nil {
 				return fmt.Errorf("vault_layout_ensure: rename %q -> %q: %w", childSrc, childDst, err)
 			}
 		}
 	}
 	// Remove the now-empty src directory.
 	if !dryRun {
 		// Re-check emptiness after renames.
 		remaining, _ := os.ReadDir(src)
 		if len(remaining) == 0 {
 			if err := os.Remove(src); err != nil {
 				return fmt.Errorf("vault_layout_ensure: remove empty src %q: %w", src, err)
 			}
 		}
 	}
 	return nil
 }
 // pathExists returns true if path exists (any type).
 func pathExists(path string) bool {
 	_, err := os.Lstat(path)
 	return err == nil
 }
 // dirIsEmpty returns true if a directory exists and has no entries.
 func dirIsEmpty(path string) bool {
 	entries, err := os.ReadDir(path)
 	if err != nil {
 		return false
 	}
 	return len(entries) == 0
 }
 // _ prevents "declared but not used" if dirIsEmpty is only used in tests.
 var _ = dirIsEmpty
 // vaultLayoutKnownNames returns the set of root-level names managed by this function.
 // Exported for use in tests.
 func vaultLayoutKnownNames() map[string]struct{} {
 	known := make(map[string]struct{})
 	for _, b := range dataBuckets {
 		known[b] = struct{}{}
 	}
 	for _, b := range knowledgeBuckets {
 		known[b] = struct{}{}
 	}
 	for _, rf := range knownRootFiles {
 		known[strings.ToLower(rf)] = struct{}{}
 	}
 	known["data"] = struct{}{}
 	known["knowledge"] = struct{}{}
 	return known
 }
@@ -0,0 +1,95 @@
 ---
 name: vault_layout_ensure
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func VaultLayoutEnsure(vaultPath string, dryRun bool) (LayoutReport, error)"
 description: "Normaliza el layout de un vault al esquema hibrido canónico data/{raw,processed,exports} + knowledge/{decisions,domains,models,benchmarks,test_documents}. Migra directorios legacy en la raíz del vault a su ubicación correcta; idempotente."
 tags: [vault, layout, migration, infra, filesystem, idempotent]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports:
  - "fmt"
  - "os"
  - "path/filepath"
  - "strings"
 params:
  - name: vault_path
    desc: "Ruta al directorio raíz del vault. Puede ser absoluta, relativa o un symlink — se resuelve con filepath.Abs + filepath.EvalSymlinks. Trailing slashes se ignoran."
  - name: dry_run
    desc: "Si true, calcula el reporte completo (qué se crearía, migraría, etc.) pero no modifica el disco. Util para previsualizar antes de ejecutar."
 output: "LayoutReport con: VaultPath (ruta resuelta), Created (dirs creados), Migrated (renombres ejecutados, formato 'src -> dst'), AlreadyOK (destinos que ya existían), Skipped (entradas en raíz no reconocidas, no tocadas), DryRun (flag). Error si el path no existe, no es directorio, o hay conflicto de merge (mismo nombre de archivo en src y dst)."
 tested: true
 tests:
  - "TestVaultLayoutEnsure_DryRun_NoChange"
  - "TestVaultLayoutEnsure_FreshDir_CreatesLayout"
  - "TestVaultLayoutEnsure_LegacyDataLayout_Migrates"
  - "TestVaultLayoutEnsure_LegacyKnowledgeLayout_Migrates"
  - "TestVaultLayoutEnsure_AlreadyMigrated_Idempotent"
  - "TestVaultLayoutEnsure_Mixed_PartialMigration"
  - "TestVaultLayoutEnsure_MergeConflict_Errors"
  - "TestVaultLayoutEnsure_UnknownFiles_Skipped"
  - "TestVaultLayoutEnsure_NotADir_Errors"
 test_file_path: "functions/infra/vault_layout_ensure_test.go"
 file_path: "functions/infra/vault_layout_ensure.go"
 ---
 ## Ejemplo
 ```go
 // Previsualizar sin tocar disco:
 report, err := VaultLayoutEnsure("/home/lucas/vaults/turismo_spain", true)
 if err != nil {
    log.Fatal(err)
 }
 fmt.Printf("Would migrate: %v\n", report.Migrated)
 fmt.Printf("Would create:  %v\n", report.Created)
 // Ejecutar la migración:
 report, err = VaultLayoutEnsure("/home/lucas/vaults/turismo_spain", false)
 if err != nil {
    log.Fatalf("migration failed: %v", err)
 }
 fmt.Printf("Migrated: %v\n", report.Migrated)
 fmt.Printf("Created:  %v\n", report.Created)
 fmt.Printf("Skipped:  %v\n", report.Skipped)
 ```
 ## Comportamiento detallado
 **Directorios gestionados:**
 | Raíz (legacy) | Destino canónico |
 |---|---|
 | `raw/` | `data/raw/` |
 | `processed/` | `data/processed/` |
 | `exports/` | `data/exports/` |
 | `decisions/` | `knowledge/decisions/` |
 | `domains/` | `knowledge/domains/` |
 | `models/` | `knowledge/models/` |
 | `benchmarks/` | `knowledge/benchmarks/` |
 | `test_documents/` | `knowledge/test_documents/` |
 | `README.md` / `README.txt` | `knowledge/README.md` |
 **Lógica de migración (por cada entrada conocida):**
 - Solo `src` existe → rename atómico `src` → `dst`, registrado en `Migrated`.
 - Solo `dst` existe → ya migrado, registrado en `AlreadyOK`.
 - Ambos existen (dir) → merge: mueve cada hijo de `src/` a `dst/`; error si mismo nombre. Registrado en `Migrated` por hijo.
 - Ambos existen (archivo README) → error inmediato con paths concretos.
 - Ninguno existe → crea `dst` vacío, registrado en `Created`.
 **Archivos/dirs no reconocidos** en la raíz (`.git`, `vault_index.db`, archivos custom) se registran en `Skipped` y no se tocan.
 **Idempotencia:** segunda ejecución sobre un vault ya migrado reporta todo en `AlreadyOK` y no toca disco.
 ## Notas
 `LayoutReport` es un tipo local de esta función (no un tipo del registry). El struct exportado vive en `functions/infra/vault_layout_ensure.go` junto con la función.
 Para aplicar la migración a múltiples vaults en batch, invocar desde un pipeline que lea los paths de `vault.yaml` (ver `vault_manifest_read_go_infra`) y llame a `VaultLayoutEnsure` en cada uno.
@@ -0,0 +1,394 @@
 package infra
 import (
 	"os"
 	"path/filepath"
 	"testing"
 )
 // mkVaultDir creates a temporary directory tree for tests.
 // entries is a list of relative paths to create.
 // Paths ending in "/" are directories; others are files with placeholder content.
 func mkVaultDir(t *testing.T, entries []string) string {
 	t.Helper()
 	root := t.TempDir()
 	for _, e := range entries {
 		full := filepath.Join(root, filepath.FromSlash(e))
 		if e[len(e)-1] == '/' {
 			if err := os.MkdirAll(full, 0o755); err != nil {
 				t.Fatalf("mkVaultDir: mkdir %q: %v", full, err)
 			}
 		} else {
 			if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
 				t.Fatalf("mkVaultDir: mkdir parent %q: %v", full, err)
 			}
 			if err := os.WriteFile(full, []byte("test\n"), 0o644); err != nil {
 				t.Fatalf("mkVaultDir: write %q: %v", full, err)
 			}
 		}
 	}
 	return root
 }
 func TestVaultLayoutEnsure_DryRun_NoChange(t *testing.T) {
 	root := mkVaultDir(t, []string{
 		"raw/",
 		"raw/file1.csv",
 		"processed/",
 	})
 	before := snapshotDir(t, root)
 	report, err := VaultLayoutEnsure(root, true)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if !report.DryRun {
 		t.Error("DryRun flag not set in report")
 	}
 	after := snapshotDir(t, root)
 	if !mapEqual(before, after) {
 		t.Errorf("dry-run modified disk: before=%v after=%v", before, after)
 	}
 	// Should have planned a migration for raw and processed.
 	if len(report.Migrated) == 0 {
 		t.Error("expected Migrated to be non-empty in dry-run plan")
 	}
 }
 func TestVaultLayoutEnsure_FreshDir_CreatesLayout(t *testing.T) {
 	root := mkVaultDir(t, []string{}) // empty vault
 	report, err := VaultLayoutEnsure(root, false)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	// All standard dirs should be created.
 	wantCreated := []string{
 		"data", "knowledge",
 		filepath.Join("data", "raw"),
 		filepath.Join("data", "processed"),
 		filepath.Join("data", "exports"),
 		filepath.Join("knowledge", "decisions"),
 		filepath.Join("knowledge", "domains"),
 		filepath.Join("knowledge", "models"),
 		filepath.Join("knowledge", "benchmarks"),
 		filepath.Join("knowledge", "test_documents"),
 	}
 	createdSet := toSet(report.Created)
 	for _, w := range wantCreated {
 		if _, ok := createdSet[w]; !ok {
 			t.Errorf("expected Created to contain %q, got %v", w, report.Created)
 		}
 	}
 	// All directories must actually exist on disk.
 	for _, w := range wantCreated {
 		full := filepath.Join(root, w)
 		info, err := os.Stat(full)
 		if err != nil {
 			t.Errorf("expected %q to exist: %v", full, err)
 			continue
 		}
 		if !info.IsDir() {
 			t.Errorf("%q should be a directory", full)
 		}
 	}
 }
 func TestVaultLayoutEnsure_LegacyDataLayout_Migrates(t *testing.T) {
 	root := mkVaultDir(t, []string{
 		"raw/",
 		"raw/file1.parquet",
 		"raw/file2.parquet",
 		"processed/",
 		"processed/clean.csv",
 		"exports/",
 	})
 	report, err := VaultLayoutEnsure(root, false)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	// raw and processed should appear in Migrated (as dirs, top-level rename).
 	migratedSet := toSet(report.Migrated)
 	for _, pair := range []string{
 		"raw -> " + filepath.Join("data", "raw"),
 		"processed -> " + filepath.Join("data", "processed"),
 	} {
 		if _, ok := migratedSet[pair]; !ok {
 			t.Errorf("expected Migrated to contain %q, got %v", pair, report.Migrated)
 		}
 	}
 	// Files must have moved.
 	for _, f := range []string{
 		filepath.Join("data", "raw", "file1.parquet"),
 		filepath.Join("data", "raw", "file2.parquet"),
 		filepath.Join("data", "processed", "clean.csv"),
 	} {
 		if _, err := os.Stat(filepath.Join(root, f)); err != nil {
 			t.Errorf("expected %q to exist after migration: %v", f, err)
 		}
 	}
 	// Old dirs must be gone.
 	for _, d := range []string{"raw", "processed"} {
 		if pathExists(filepath.Join(root, d)) {
 			t.Errorf("expected legacy dir %q to be removed", d)
 		}
 	}
 }
 func TestVaultLayoutEnsure_LegacyKnowledgeLayout_Migrates(t *testing.T) {
 	root := mkVaultDir(t, []string{
 		"decisions/",
 		"decisions/2024-01.md",
 		"models/",
 		"models/ner_v1.pkl",
 		"README.md",
 	})
 	report, err := VaultLayoutEnsure(root, false)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	// decisions and models should appear in Migrated.
 	migratedSet := toSet(report.Migrated)
 	for _, pair := range []string{
 		"decisions -> " + filepath.Join("knowledge", "decisions"),
 		"models -> " + filepath.Join("knowledge", "models"),
 		"README.md -> " + filepath.Join("knowledge", "README.md"),
 	} {
 		if _, ok := migratedSet[pair]; !ok {
 			t.Errorf("expected Migrated to contain %q, got %v", pair, report.Migrated)
 		}
 	}
 	// Files must be at new location.
 	for _, f := range []string{
 		filepath.Join("knowledge", "decisions", "2024-01.md"),
 		filepath.Join("knowledge", "models", "ner_v1.pkl"),
 		filepath.Join("knowledge", "README.md"),
 	} {
 		if _, err := os.Stat(filepath.Join(root, f)); err != nil {
 			t.Errorf("expected %q to exist after migration: %v", f, err)
 		}
 	}
 }
 func TestVaultLayoutEnsure_AlreadyMigrated_Idempotent(t *testing.T) {
 	root := mkVaultDir(t, []string{
 		"data/",
 		"data/raw/",
 		"data/raw/file.csv",
 		"data/processed/",
 		"data/exports/",
 		"knowledge/",
 		"knowledge/decisions/",
 		"knowledge/domains/",
 		"knowledge/models/",
 		"knowledge/benchmarks/",
 		"knowledge/test_documents/",
 	})
 	report1, err := VaultLayoutEnsure(root, false)
 	if err != nil {
 		t.Fatalf("first run error: %v", err)
 	}
 	if len(report1.Migrated) != 0 {
 		t.Errorf("first run on fully-migrated vault should have no migrations, got %v", report1.Migrated)
 	}
 	before := snapshotDir(t, root)
 	report2, err := VaultLayoutEnsure(root, false)
 	if err != nil {
 		t.Fatalf("second run error: %v", err)
 	}
 	after := snapshotDir(t, root)
 	if !mapEqual(before, after) {
 		t.Error("second run modified disk (not idempotent)")
 	}
 	if len(report2.Migrated) != 0 {
 		t.Errorf("second run should produce no migrations, got %v", report2.Migrated)
 	}
 	if len(report2.AlreadyOK) == 0 {
 		t.Error("second run should report existing dirs as AlreadyOK")
 	}
 }
 func TestVaultLayoutEnsure_Mixed_PartialMigration(t *testing.T) {
 	// data/raw already migrated; exports still at root; knowledge dirs in legacy positions.
 	root := mkVaultDir(t, []string{
 		"data/",
 		"data/raw/",
 		"data/raw/already_here.csv",
 		"exports/",
 		"exports/report.pdf",
 		"decisions/",
 		"decisions/2023-note.md",
 	})
 	report, err := VaultLayoutEnsure(root, false)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	// data/raw should be AlreadyOK.
 	if !sliceContains(report.AlreadyOK, filepath.Join("data", "raw")) {
 		t.Errorf("data/raw should be AlreadyOK, got AlreadyOK=%v", report.AlreadyOK)
 	}
 	// exports should be migrated.
 	exportsMigrated := false
 	for _, m := range report.Migrated {
 		if m == "exports -> "+filepath.Join("data", "exports") {
 			exportsMigrated = true
 		}
 	}
 	if !exportsMigrated {
 		t.Errorf("exports should be migrated, Migrated=%v", report.Migrated)
 	}
 	// decisions should be migrated.
 	decisionsMigrated := false
 	for _, m := range report.Migrated {
 		if m == "decisions -> "+filepath.Join("knowledge", "decisions") {
 			decisionsMigrated = true
 		}
 	}
 	if !decisionsMigrated {
 		t.Errorf("decisions should be migrated, Migrated=%v", report.Migrated)
 	}
 }
 func TestVaultLayoutEnsure_MergeConflict_Errors(t *testing.T) {
 	// Both src (raw/) and dst (data/raw/) exist and have a file with the same name.
 	root := mkVaultDir(t, []string{
 		"raw/",
 		"raw/collision.csv",
 		"data/",
 		"data/raw/",
 		"data/raw/collision.csv", // same name -> conflict
 	})
 	_, err := VaultLayoutEnsure(root, false)
 	if err == nil {
 		t.Fatal("expected error for merge conflict, got nil")
 	}
 	if !contains(err.Error(), "conflict") && !contains(err.Error(), "collision.csv") {
 		t.Errorf("error should mention conflict or the file name, got: %v", err)
 	}
 }
 func TestVaultLayoutEnsure_UnknownFiles_Skipped(t *testing.T) {
 	root := mkVaultDir(t, []string{
 		".git/",
 		"vault_index.db",
 		"my_custom_notes.txt",
 		"raw/",
 	})
 	report, err := VaultLayoutEnsure(root, false)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	skippedSet := toSet(report.Skipped)
 	for _, name := range []string{".git", "vault_index.db", "my_custom_notes.txt"} {
 		if _, ok := skippedSet[name]; !ok {
 			t.Errorf("expected %q in Skipped, got %v", name, report.Skipped)
 		}
 	}
 	// raw should NOT be in Skipped (it's a known bucket).
 	if _, ok := skippedSet["raw"]; ok {
 		t.Error("raw should not appear in Skipped — it is a known bucket")
 	}
 }
 func TestVaultLayoutEnsure_NotADir_Errors(t *testing.T) {
 	t.Run("non-existent path", func(t *testing.T) {
 		_, err := VaultLayoutEnsure("/tmp/does_not_exist_fn_registry_test_xyz", false)
 		if err == nil {
 			t.Fatal("expected error for non-existent path")
 		}
 	})
 	t.Run("path is a file", func(t *testing.T) {
 		f, err := os.CreateTemp("", "vault_layout_*.txt")
 		if err != nil {
 			t.Fatal(err)
 		}
 		f.Close()
 		defer os.Remove(f.Name())
 		_, err = VaultLayoutEnsure(f.Name(), false)
 		if err == nil {
 			t.Fatal("expected error when vaultPath is a file, not a dir")
 		}
 		if !contains(err.Error(), "not a directory") {
 			t.Errorf("error should mention 'not a directory', got: %v", err)
 		}
 	})
 }
 // --- helpers ---
 // snapshotDir returns a map of relative path -> exists for all entries under root.
 func snapshotDir(t *testing.T, root string) map[string]bool {
 	t.Helper()
 	snap := make(map[string]bool)
 	err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
 		if err != nil {
 			return err
 		}
 		rel, _ := filepath.Rel(root, path)
 		snap[rel] = true
 		return nil
 	})
 	if err != nil {
 		t.Fatalf("snapshotDir: %v", err)
 	}
 	return snap
 }
 func mapEqual(a, b map[string]bool) bool {
 	if len(a) != len(b) {
 		return false
 	}
 	for k := range a {
 		if !b[k] {
 			return false
 		}
 	}
 	return true
 }
 func toSet(ss []string) map[string]struct{} {
 	m := make(map[string]struct{}, len(ss))
 	for _, s := range ss {
 		m[s] = struct{}{}
 	}
 	return m
 }
 func sliceContains(ss []string, target string) bool {
 	for _, s := range ss {
 		if s == target {
 			return true
 		}
 	}
 	return false
 }
 func contains(s, sub string) bool {
 	return len(s) >= len(sub) && (s == sub || len(sub) == 0 ||
 		func() bool {
 			for i := 0; i <= len(s)-len(sub); i++ {
 				if s[i:i+len(sub)] == sub {
 					return true
 				}
 			}
 			return false
 		}())
 }
@@ -0,0 +1,96 @@
 package infra
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	"gopkg.in/yaml.v3"
 )
 // VaultManifestEntry is a single vault entry parsed from a projects/<proj>/vaults/vault.yaml.
 type VaultManifestEntry struct {
 	ProjectID    string   // basename of projects/<proj>/, inferred from manifest path
 	Name         string   // vault name as declared in vault.yaml
 	Description  string   // human description
 	Path         string   // absolute path to the vault directory
 	Tags         []string // tags declared in vault.yaml
 	ManifestFile string   // absolute path to the vault.yaml this entry came from
 }
 // vaultYAML mirrors the vault.yaml schema (only the fields we care about).
 type vaultYAML struct {
 	Vaults []struct {
 		Name        string   `yaml:"name"`
 		Description string   `yaml:"description"`
 		Path        string   `yaml:"path"`
 		Tags        []string `yaml:"tags"`
 	} `yaml:"vaults"`
 }
 // VaultManifestRead globs all projects/*/vaults/vault.yaml under repoRoot, parses each
 // manifest and returns a flat slice of VaultManifestEntry.
 //
 // Rules:
 //   - If a manifest fails to parse, an error is returned immediately with the file path.
 //   - If no manifests are found, an empty slice is returned (not an error).
 //   - ProjectID is inferred from the directory component between "projects/" and "/vaults/".
 func VaultManifestRead(repoRoot string) ([]VaultManifestEntry, error) {
 	pattern := filepath.Join(repoRoot, "projects", "*", "vaults", "vault.yaml")
 	matches, err := filepath.Glob(pattern)
 	if err != nil {
 		return nil, fmt.Errorf("vault_manifest_read: glob %q: %w", pattern, err)
 	}
 	var out []VaultManifestEntry
 	for _, manifestPath := range matches {
 		entries, err := parseVaultManifest(manifestPath)
 		if err != nil {
 			return nil, err
 		}
 		out = append(out, entries...)
 	}
 	return out, nil
 }
 func parseVaultManifest(manifestPath string) ([]VaultManifestEntry, error) {
 	data, err := os.ReadFile(manifestPath)
 	if err != nil {
 		return nil, fmt.Errorf("vault_manifest_read: read %q: %w", manifestPath, err)
 	}
 	var raw vaultYAML
 	if err := yaml.Unmarshal(data, &raw); err != nil {
 		return nil, fmt.Errorf("vault_manifest_read: parse %q: %w", manifestPath, err)
 	}
 	projectID := inferProjectID(manifestPath)
 	entries := make([]VaultManifestEntry, 0, len(raw.Vaults))
 	for _, v := range raw.Vaults {
 		entries = append(entries, VaultManifestEntry{
 			ProjectID:    projectID,
 			Name:         v.Name,
 			Description:  v.Description,
 			Path:         v.Path,
 			Tags:         v.Tags,
 			ManifestFile: manifestPath,
 		})
 	}
 	return entries, nil
 }
 // inferProjectID extracts the project basename from a path of the form
 // .../projects/<proj>/vaults/vault.yaml.
 func inferProjectID(manifestPath string) string {
 	// Normalize separators and split.
 	parts := strings.Split(filepath.ToSlash(manifestPath), "/")
 	// Walk backwards: vault.yaml -> vaults -> <proj> -> projects -> ...
 	for i, p := range parts {
 		if p == "projects" && i+1 < len(parts) {
 			return parts[i+1]
 		}
 	}
 	return ""
 }
@@ -0,0 +1,59 @@
 ---
 name: vault_manifest_read
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func VaultManifestRead(repoRoot string) ([]VaultManifestEntry, error)"
 description: "Lee todos los manifests vault.yaml bajo projects/*/vaults/ del repo y devuelve una lista plana de entradas de vault con su ProjectID inferido del path."
 tags: [vault, manifest, yaml, infra, projects, storage]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports:
  - "fmt"
  - "os"
  - "path/filepath"
  - "strings"
  - "gopkg.in/yaml.v3"
 params:
  - name: repoRoot
    desc: "Ruta absoluta a la raiz del repositorio fn_registry. Se usa como base para el glob projects/*/vaults/vault.yaml."
 output: "Slice plano de VaultManifestEntry (ProjectID, Name, Description, Path, Tags, ManifestFile). Vacio si no hay manifests. Error si un yaml no parsea, con el path concreto en el mensaje."
 tested: true
 tests:
  - "TestVaultManifestRead_HappyPath"
  - "TestVaultManifestRead_MalformedYAML"
  - "TestVaultManifestRead_EmptyDir"
 test_file_path: "functions/infra/vault_manifest_read_test.go"
 file_path: "functions/infra/vault_manifest_read.go"
 ---
 ## Ejemplo
 ```go
 entries, err := VaultManifestRead("/home/lucas/fn_registry")
 if err != nil {
    log.Fatal(err)
 }
 for _, e := range entries {
    fmt.Printf("%s/%s -> %s\n", e.ProjectID, e.Name, e.Path)
 }
 // app_turismo/turismo_spain -> /home/lucas/vaults/turismo_spain
 // app_finance/finance_data  -> /home/lucas/vaults/finance_data
 ```
 ## Notas
 `VaultManifestEntry` es un tipo local de esta funcion (no un tipo del registry). Contiene:
 - `ProjectID` — basename del directorio `projects/<proj>/`, inferido del path del manifest.
 - `Name`, `Description`, `Path`, `Tags` — copiados del yaml tal cual.
 - `ManifestFile` — path absoluto al vault.yaml de origen, util para mensajes de error y trazabilidad.
 El parseo usa `gopkg.in/yaml.v3` (ya en go.mod). Si un manifest falla, la funcion devuelve
 error inmediatamente con el path del fichero problemático. Los manifests sin entradas
 `vaults:` contribuyen cero entries (no es error). Si no existe ningun `projects/*/vaults/vault.yaml`
 el resultado es slice vacio sin error.
@@ -0,0 +1,113 @@
 package infra
 import (
 	"os"
 	"path/filepath"
 	"testing"
 )
 func TestVaultManifestRead_HappyPath(t *testing.T) {
 	root := t.TempDir()
 	writeManifest(t, root, "app_turismo", `
 vaults:
  - name: turismo_spain
    description: "Datos de turismo en Espana"
    path: "/home/lucas/vaults/turismo_spain"
    tags: [turismo, espana]
  - name: turismo_raw
    description: "Datos brutos sin procesar"
    path: "/home/lucas/vaults/turismo_raw"
    tags: [raw]
 `)
 	writeManifest(t, root, "app_finance", `
 vaults:
  - name: finance_data
    description: "Datos financieros"
    path: "/home/lucas/vaults/finance_data"
    tags: [finance]
 `)
 	entries, err := VaultManifestRead(root)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(entries) != 3 {
 		t.Fatalf("got %d entries, want 3", len(entries))
 	}
 	// Build index by name for order-independent assertions.
 	byName := make(map[string]VaultManifestEntry, len(entries))
 	for _, e := range entries {
 		byName[e.Name] = e
 	}
 	// Check turismo_spain entry.
 	e, ok := byName["turismo_spain"]
 	if !ok {
 		t.Fatal("missing entry 'turismo_spain'")
 	}
 	if e.ProjectID != "app_turismo" {
 		t.Errorf("turismo_spain.ProjectID = %q, want %q", e.ProjectID, "app_turismo")
 	}
 	if e.Path != "/home/lucas/vaults/turismo_spain" {
 		t.Errorf("turismo_spain.Path = %q, want %q", e.Path, "/home/lucas/vaults/turismo_spain")
 	}
 	if len(e.Tags) != 2 || e.Tags[0] != "turismo" {
 		t.Errorf("turismo_spain.Tags = %v, want [turismo espana]", e.Tags)
 	}
 	if e.ManifestFile == "" {
 		t.Error("turismo_spain.ManifestFile is empty")
 	}
 	// Check finance_data entry belongs to app_finance.
 	ef, ok := byName["finance_data"]
 	if !ok {
 		t.Fatal("missing entry 'finance_data'")
 	}
 	if ef.ProjectID != "app_finance" {
 		t.Errorf("finance_data.ProjectID = %q, want %q", ef.ProjectID, "app_finance")
 	}
 }
 func TestVaultManifestRead_MalformedYAML(t *testing.T) {
 	root := t.TempDir()
 	writeManifest(t, root, "bad_project", `
 vaults:
  - name: [invalid yaml
    path: missing_bracket
 `)
 	_, err := VaultManifestRead(root)
 	if err == nil {
 		t.Fatal("expected error for malformed YAML, got nil")
 	}
 }
 func TestVaultManifestRead_EmptyDir(t *testing.T) {
 	root := t.TempDir()
 	// No projects/ directory at all — glob returns no matches.
 	entries, err := VaultManifestRead(root)
 	if err != nil {
 		t.Fatalf("unexpected error for empty dir: %v", err)
 	}
 	if len(entries) != 0 {
 		t.Fatalf("got %d entries, want 0", len(entries))
 	}
 }
 // writeManifest creates <root>/projects/<proj>/vaults/vault.yaml with the given content.
 func writeManifest(t *testing.T, root, proj, content string) {
 	t.Helper()
 	dir := filepath.Join(root, "projects", proj, "vaults")
 	if err := os.MkdirAll(dir, 0o755); err != nil {
 		t.Fatalf("mkdir %s: %v", dir, err)
 	}
 	f := filepath.Join(dir, "vault.yaml")
 	if err := os.WriteFile(f, []byte(content), 0o644); err != nil {
 		t.Fatalf("write %s: %v", f, err)
 	}
 }
@@ -0,0 +1,265 @@
 package infra
 import (
 	"database/sql"
 	"fmt"
 	"path/filepath"
 	"strings"
 )
 // VaultSearchHit is a single result returned by VaultSearch.
 type VaultSearchHit struct {
 	VaultPath string `json:"vault_path"`
 	VaultName string `json:"vault_name"` // basename of VaultPath (after resolving symlinks)
 	RelPath   string `json:"rel_path"`
 	Size      int64  `json:"size"`
 	Mtime     int64  `json:"mtime"`
 	Mime      string `json:"mime"`
 	Bucket    string `json:"bucket"`
 	SubBucket string `json:"sub_bucket"`
 	Snippet   string `json:"snippet"` // FTS5 snippet or empty if match is only by rel_path (fallback)
 }
 // VaultSearch searches vault_index.db inside vaultPath for files matching query.
 //
 // Behaviour:
 //  1. Opens vault_index.db via VaultIndexOpen.
 //  2. If limit <= 0, defaults to 50.
 //  3. Runs a FTS5 MATCH query over files_fts to find content matches (when content_text
 //     is populated by profilers). Because the FTS5 table uses content='' (contentless),
 //     column values are not stored; results are correlated back to files via a LIKE
 //     match on rel_path for path tokens, or via an IN clause of matched rowids for
 //     content_text matches.
 //  4. Also searches files.rel_path with LIKE to find path matches.
 //  5. Results from both searches are merged (deduplication by rel_path).
 //  6. If both FTS5 and LIKE queries fail, returns the error.
 //  7. VaultName is derived from the basename of vaultPath (after resolving symlinks).
 func VaultSearch(vaultPath, query string, limit int) ([]VaultSearchHit, error) {
 	if limit <= 0 {
 		limit = 50
 	}
 	db, err := VaultIndexOpen(vaultPath)
 	if err != nil {
 		return nil, fmt.Errorf("vault_search: open index: %w", err)
 	}
 	defer db.Close()
 	vaultName := resolveVaultName(vaultPath)
 	hits, err := vaultSearchCombined(db, vaultPath, vaultName, query, limit)
 	if err != nil {
 		return nil, fmt.Errorf("vault_search: %w", err)
 	}
 	return hits, nil
 }
 // vaultSearchCombined runs the search using two strategies and merges deduplicated results:
 //  1. FTS5 MATCH on files_fts (for content_text when populated by profilers).
 //     Correlation back to files uses rowid (reliable for fresh indexes) or falls back.
 //  2. LIKE on files.rel_path (always reliable for path searching).
 //
 // Results are deduplicated by rel_path, up to limit entries.
 func vaultSearchCombined(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
 	seen := make(map[string]struct{})
 	var hits []VaultSearchHit
 	// Strategy 1: FTS5 MATCH on content_text (populated by profilers).
 	// With contentless FTS5 (content=''), column values are NOT retrievable via SELECT.
 	// We get matching rowids from FTS5, then look up files by rowid.
 	// This is reliable for content_text matches because VaultIndexWrite inserts
 	// content_text rows independently of the path rows (profilers update them).
 	// NOTE: for rel_path token matching, strategy 2 (LIKE) is more reliable.
 	ftsQuery := safeFTSQuery(query)
 	ftsHits, ftsErr := vaultSearchFTSContent(db, vaultPath, vaultName, ftsQuery, limit)
 	if ftsErr == nil {
 		for _, h := range ftsHits {
 			if len(hits) >= limit {
 				break
 			}
 			if _, ok := seen[h.RelPath]; !ok {
 				seen[h.RelPath] = struct{}{}
 				hits = append(hits, h)
 			}
 		}
 	}
 	// If FTS5 failed with a syntax error, that's expected for bad queries — continue.
 	// If it failed with a non-syntax error, still continue to LIKE fallback.
 	// Strategy 2: LIKE on rel_path — reliable path search.
 	// When query contains FTS5 special chars (e.g. "foo:bar:"), extract the first
 	// word-like token so the LIKE pattern is still useful.
 	likeQuery := simplifyForLike(query)
 	if len(hits) < limit && likeQuery != "" {
 		remaining := limit - len(hits)
 		likeHits, likeErr := vaultSearchLike(db, vaultPath, vaultName, likeQuery, remaining+len(seen))
 		if likeErr != nil && ftsErr != nil {
 			// Both failed — return a combined error.
 			return nil, fmt.Errorf("fts: %v; like: %v", ftsErr, likeErr)
 		}
 		for _, h := range likeHits {
 			if len(hits) >= limit {
 				break
 			}
 			if _, ok := seen[h.RelPath]; !ok {
 				seen[h.RelPath] = struct{}{}
 				hits = append(hits, h)
 			}
 		}
 	}
 	if hits == nil {
 		hits = []VaultSearchHit{}
 	}
 	return hits, nil
 }
 // vaultSearchFTSContent queries files_fts with a MATCH and correlates results
 // back to the files table.
 //
 // Design note: with content='' (contentless FTS5), SELECT on columns returns ''.
 // We get the rowid from the FTS5 match and look up files.rel_path via rowid.
 // This works correctly when content_text was populated by a profiler that did NOT
 // delete+reinsert the FTS row (i.e. profilers do direct INSERT/UPDATE of content_text
 // without changing the rowid). For the current VaultIndexWrite implementation
 // (which inserts content_text='' and profilers update it in-place), the rowids
 // remain stable after profiling.
 func vaultSearchFTSContent(db *sql.DB, vaultPath, vaultName, safeQuery string, limit int) ([]VaultSearchHit, error) {
 	// Get matching rowids from FTS5.
 	const qRowids = `
 		SELECT rowid
 		FROM files_fts
 		WHERE files_fts MATCH ?
 		ORDER BY rank
 		LIMIT ?`
 	rows, err := db.Query(qRowids, safeQuery, limit)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
 	var rowids []int64
 	for rows.Next() {
 		var rid int64
 		if err := rows.Scan(&rid); err != nil {
 			return nil, err
 		}
 		rowids = append(rowids, rid)
 	}
 	if err := rows.Err(); err != nil {
 		return nil, err
 	}
 	if len(rowids) == 0 {
 		return nil, nil
 	}
 	// Look up files by rowid. files uses a TEXT PK so its rowid is implicit.
 	// Snippet is empty for contentless FTS5 (snippet() returns NULL there).
 	var hits []VaultSearchHit
 	for _, rid := range rowids {
 		var h VaultSearchHit
 		err := db.QueryRow(`
 			SELECT rel_path, size, mtime, mime, bucket, sub_bucket
 			FROM files WHERE rowid = ?`, rid,
 		).Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket)
 		if err != nil {
 			// rowid mismatch (happens after update cycles) — skip gracefully.
 			continue
 		}
 		h.VaultPath = vaultPath
 		h.VaultName = vaultName
 		h.Snippet = ""
 		hits = append(hits, h)
 	}
 	return hits, nil
 }
 // vaultSearchLike searches files.rel_path with LIKE, ordered by mtime DESC.
 func vaultSearchLike(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
 	const qLike = `
 		SELECT rel_path, size, mtime, mime, bucket, sub_bucket
 		FROM files
 		WHERE rel_path LIKE '%' || ? || '%'
 		ORDER BY mtime DESC
 		LIMIT ?`
 	rows, err := db.Query(qLike, query, limit)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
 	var hits []VaultSearchHit
 	for rows.Next() {
 		var h VaultSearchHit
 		if err := rows.Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket); err != nil {
 			return nil, err
 		}
 		h.VaultPath = vaultPath
 		h.VaultName = vaultName
 		h.Snippet = ""
 		hits = append(hits, h)
 	}
 	return hits, rows.Err()
 }
 // resolveVaultName returns the basename of vaultPath after resolving symlinks.
 // Falls back to filepath.Base if EvalSymlinks fails.
 func resolveVaultName(vaultPath string) string {
 	resolved, err := filepath.EvalSymlinks(vaultPath)
 	if err != nil {
 		resolved = vaultPath
 	}
 	return filepath.Base(resolved)
 }
 // safeFTSQuery wraps the query in double-quotes if it does not already contain
 // FTS5 boolean operators (AND, OR, NOT) or column prefixes (containing ":").
 // This prevents FTS5 syntax errors on tokens like "foo:bar:" or "hello-world".
 func safeFTSQuery(query string) string {
 	q := strings.TrimSpace(query)
 	if q == "" {
 		return q
 	}
 	upper := strings.ToUpper(q)
 	// If user already uses explicit operators or column prefix, pass through.
 	if strings.ContainsAny(q, ":") ||
 		strings.Contains(upper, " AND ") ||
 		strings.Contains(upper, " OR ") ||
 		strings.Contains(upper, " NOT ") {
 		return q
 	}
 	// Escape any double-quotes in the query before wrapping.
 	escaped := strings.ReplaceAll(q, `"`, `""`)
 	return `"` + escaped + `"`
 }
 // isFTSSyntaxError returns true when the error looks like an FTS5 query parser error.
 func isFTSSyntaxError(err error) bool {
 	if err == nil {
 		return false
 	}
 	msg := strings.ToLower(err.Error())
 	return strings.Contains(msg, "syntax error") ||
 		strings.Contains(msg, "no such column") ||
 		strings.Contains(msg, "fts5: syntax error")
 }
 // simplifyForLike extracts a clean substring from query suitable for LIKE matching.
 // When the query contains FTS5 special characters (colons, double-quotes, operators),
 // only the first word-like sequence of alphanumeric/underscore/hyphen characters is
 // used. This ensures the LIKE fallback remains useful even when the FTS5 query is
 // syntactically complex or contains column-prefix syntax like "foo:bar:".
 func simplifyForLike(query string) string {
 	q := strings.TrimSpace(query)
 	var token strings.Builder
 	for _, r := range q {
 		if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
 			token.WriteRune(r)
 		} else if token.Len() > 0 {
 			break
 		}
 	}
 	return token.String()
 }
@@ -0,0 +1,61 @@
 ---
 name: vault_search
 kind: function
 lang: go
 domain: infra
 version: "1.0.0"
 purity: impure
 signature: "func VaultSearch(vaultPath, query string, limit int) ([]VaultSearchHit, error)"
 description: "Busca en vault_index.db de un vault usando FTS5 sobre files_fts. Si el query rompe el parser FTS5, hace fallback a LIKE sobre rel_path. Retorna hits con snippet de contexto."
 tags: [vault, search, fts5, sqlite, infra]
 uses_functions: ["vault_index_open_go_infra"]
 uses_types: ["vault_file_go_infra"]
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: [database/sql, fmt, path/filepath, strings]
 params:
  - name: vaultPath
    desc: "ruta absoluta al directorio raiz del vault (puede ser symlink)"
  - name: query
    desc: "termino o frase de busqueda; se escapa automaticamente para FTS5 salvo que ya incluya operadores booleanos o prefijos de columna"
  - name: limit
    desc: "maximo de resultados; si es <= 0 se usa 50"
 output: "slice de VaultSearchHit ordenado por rank FTS5 (o mtime DESC en fallback LIKE); slice vacio si no hay resultados"
 tested: true
 tests:
  - "FTS match devuelve hit con snippet"
  - "query sin resultados retorna slice vacio"
  - "limit se respeta"
  - "query FTS invalida activa fallback LIKE"
  - "limit cero usa 50 por defecto"
 test_file_path: "functions/infra/vault_search_test.go"
 file_path: "functions/infra/vault_search.go"
 ---
 ## Ejemplo
 ```go
 hits, err := infra.VaultSearch("/home/lucas/vaults/turismo_spain", "hoteles", 20)
 if err != nil {
    log.Fatal(err)
 }
 for _, h := range hits {
    fmt.Printf("[%s] %s  %s\n", h.VaultName, h.RelPath, h.Snippet)
 }
 ```
 ## Notas
 `VaultSearchHit` es un struct local definido en este archivo (no en `vault_file.go`)
 porque combina campos de `files` + metadatos de contexto de busqueda (Snippet, VaultPath, VaultName).
 **FTS5 safety:** el helper `safeFTSQuery` envuelve la query en comillas dobles
 cuando no contiene operadores booleanos ni prefijos de columna. Esto evita errores
 del parser en tokens como `foo:bar:` o `hello-world`.
 **Fallback LIKE:** si el MATCH falla con un error de sintaxis FTS5, se ejecuta
 `WHERE rel_path LIKE '%' || query || '%'`. Los hits del fallback tienen `Snippet=""`.
 **VaultName:** se deriva del `filepath.Base(filepath.EvalSymlinks(vaultPath))`.
 Si `EvalSymlinks` falla (e.g. symlink roto), usa `filepath.Base(vaultPath)`.
@@ -0,0 +1,147 @@
 package infra
 import (
 	"testing"
 	"time"
 )
 // openTestVaultDB creates a fresh vault_index.db in a temp dir and returns the path.
 func openTestVaultDir(t *testing.T) string {
 	t.Helper()
 	dir := t.TempDir()
 	db, err := VaultIndexOpen(dir)
 	if err != nil {
 		t.Fatalf("VaultIndexOpen: %v", err)
 	}
 	db.Close()
 	return dir
 }
 // seedVaultFile inserts a row into files + files_fts.
 func seedVaultFile(t *testing.T, dir, relPath, mime, bucket, subBucket, contentText string, size int64) {
 	t.Helper()
 	db, err := VaultIndexOpen(dir)
 	if err != nil {
 		t.Fatalf("VaultIndexOpen seed: %v", err)
 	}
 	defer db.Close()
 	now := time.Now().Unix()
 	_, err = db.Exec(`
 		INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
 		VALUES (?, ?, ?, 'aabbccdd', ?, '', ?, ?, ?)`,
 		relPath, size, now, mime, bucket, subBucket, now,
 	)
 	if err != nil {
 		t.Fatalf("seed files: %v", err)
 	}
 	_, err = db.Exec(`INSERT INTO files_fts(rel_path, content_text) VALUES (?, ?)`, relPath, contentText)
 	if err != nil {
 		t.Fatalf("seed files_fts: %v", err)
 	}
 }
 // --- Tests ---
 func TestVaultSearch_FTSMatch(t *testing.T) {
 	t.Run("FTS match devuelve hit con snippet", func(t *testing.T) {
 		dir := openTestVaultDir(t)
 		seedVaultFile(t, dir, "data/raw/informe.csv", "text/csv", "data", "raw",
 			"ventas trimestrales empresa iberica", 1024)
 		seedVaultFile(t, dir, "data/raw/other.csv", "text/csv", "data", "raw",
 			"productos inventario almacen", 512)
 		hits, err := VaultSearch(dir, "ventas", 10)
 		if err != nil {
 			t.Fatalf("VaultSearch: %v", err)
 		}
 		if len(hits) != 1 {
 			t.Fatalf("got %d hits, want 1", len(hits))
 		}
 		if hits[0].RelPath != "data/raw/informe.csv" {
 			t.Errorf("RelPath = %q, want data/raw/informe.csv", hits[0].RelPath)
 		}
 		if hits[0].VaultName == "" {
 			t.Errorf("VaultName should not be empty")
 		}
 	})
 }
 func TestVaultSearch_NoMatch(t *testing.T) {
 	t.Run("query sin resultados retorna slice vacio", func(t *testing.T) {
 		dir := openTestVaultDir(t)
 		seedVaultFile(t, dir, "data/raw/file.csv", "text/csv", "data", "raw", "some content", 100)
 		hits, err := VaultSearch(dir, "zzznomatch", 10)
 		if err != nil {
 			t.Fatalf("VaultSearch: %v", err)
 		}
 		if len(hits) != 0 {
 			t.Errorf("got %d hits, want 0", len(hits))
 		}
 	})
 }
 func TestVaultSearch_LimitRespected(t *testing.T) {
 	t.Run("limit se respeta", func(t *testing.T) {
 		dir := openTestVaultDir(t)
 		for i := 0; i < 10; i++ {
 			path := "data/raw/file" + string(rune('a'+i)) + ".csv"
 			seedVaultFile(t, dir, path, "text/csv", "data", "raw", "common keyword everywhere", 100)
 		}
 		hits, err := VaultSearch(dir, "common", 3)
 		if err != nil {
 			t.Fatalf("VaultSearch: %v", err)
 		}
 		if len(hits) != 3 {
 			t.Errorf("got %d hits, want 3", len(hits))
 		}
 	})
 }
 func TestVaultSearch_BadFTSQuery_FallbackLike(t *testing.T) {
 	t.Run("query FTS invalida activa fallback LIKE", func(t *testing.T) {
 		dir := openTestVaultDir(t)
 		// Insert a file whose rel_path contains "foobar" so LIKE can find it.
 		seedVaultFile(t, dir, "data/raw/foobar_report.csv", "text/csv", "data", "raw", "", 200)
 		// "foo:bar:" — colon after a non-column name triggers FTS5 parser error.
 		// safeFTSQuery passes it through unchanged because it contains ":"
 		// → FTS5 "no such column: bar" → fallback LIKE on rel_path.
 		hits, err := VaultSearch(dir, "foo:bar:", 10)
 		if err != nil {
 			t.Fatalf("VaultSearch: %v", err)
 		}
 		if len(hits) == 0 {
 			t.Errorf("expected fallback LIKE to find foobar_report.csv, got 0 hits")
 		}
 		for _, h := range hits {
 			if h.Snippet != "" {
 				t.Errorf("fallback hits should have empty Snippet, got %q", h.Snippet)
 			}
 		}
 	})
 }
 func TestVaultSearch_LimitZeroDefaults(t *testing.T) {
 	t.Run("limit cero usa 50 por defecto", func(t *testing.T) {
 		dir := openTestVaultDir(t)
 		// Insert 55 files with the same keyword.
 		for i := 0; i < 55; i++ {
 			path := "data/raw/doc" + string(rune('a')) + string(rune(int('0')+i%10)) + ".csv"
 			if i >= 10 {
 				path = "data/raw/doc" + string(rune('b'+i/10-1)) + string(rune(int('0')+i%10)) + ".csv"
 			}
 			seedVaultFile(t, dir, path, "text/csv", "data", "raw", "keyword alpha beta", 100)
 		}
 		hits, err := VaultSearch(dir, "keyword", 0)
 		if err != nil {
 			t.Fatalf("VaultSearch: %v", err)
 		}
 		if len(hits) != 50 {
 			t.Errorf("got %d hits, want 50 (default limit)", len(hits))
 		}
 	})
 }
@@ -0,0 +1,20 @@
 package ml
 import "encoding/json"
 // GenconfigMarshal serializa un GenerationConfig a JSON canonico con indent de 2 espacios.
 // El formato es identico al de Python json.dumps(indent=2, sort_keys=False):
 // keys en el orden de declaracion del struct, snake_case, campos omitempty ausentes si zero.
 func GenconfigMarshal(cfg GenerationConfig) ([]byte, error) {
 	return json.MarshalIndent(cfg, "", "  ")
 }
 // GenconfigUnmarshal deserializa JSON (compacto o con indent) a GenerationConfig.
 // Los campos JSON deben usar snake_case: negative_prompt, cfg_scale, model_type, etc.
 func GenconfigUnmarshal(data []byte) (GenerationConfig, error) {
 	var cfg GenerationConfig
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		return GenerationConfig{}, err
 	}
 	return cfg, nil
 }
@@ -0,0 +1,84 @@
 ---
 name: genconfig_json_marshal
 kind: function
 lang: go
 domain: ml
 version: "1.0.0"
 purity: impure
 signature: "func GenconfigMarshal(cfg GenerationConfig) ([]byte, error)\nfunc GenconfigUnmarshal(data []byte) (GenerationConfig, error)"
 description: "Wrappers json.Marshal/Unmarshal para GenerationConfig con formato canonico (MarshalIndent 2 espacios). Garantiza roundtrip identico al Python: json.dumps(indent=2, sort_keys=False). Campos JSON en snake_case."
 tags: [ml, json, marshal, unmarshal, serialization, generation, canonical]
 uses_functions: []
 uses_types: [generation_config_go_ml]
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: ["encoding/json"]
 params:
  - name: cfg
    desc: "GenerationConfig a serializar. Campos omitempty (negative_prompt, loras, clip_skip) se omiten si son zero/nil/empty."
  - name: data
    desc: "JSON bytes a deserializar. Acepta formato compacto o con indent. Keys deben ser snake_case (negative_prompt, cfg_scale, model_type, etc.)."
 output: "GenconfigMarshal: bytes JSON con indent 2 espacios, orden de campos segun declaracion del struct (prompt, negative_prompt, seed, steps, cfg_scale, sampler, width, height, model, loras, clip_skip). GenconfigUnmarshal: GenerationConfig poblado o error de parsing."
 tested: true
 tests:
  - "roundtrip marshal unmarshal produce config igual"
  - "json cross-language snake_case keys se deserializan correctamente"
 test_file_path: "functions/ml/genconfig_test.go"
 file_path: "functions/ml/genconfig_json_marshal.go"
 ---
 ## Ejemplo
 ```go
 cfg := ml.GenerationConfig{
    Prompt:   "a mountain at sunset",
    Seed:     1234,
    Steps:    30,
    CfgScale: 7.0,
    Sampler:  "euler",
    Width:    768,
    Height:   512,
    Model:    ml.ModelRef{Name: "sdxl-base", ModelType: "sdxl", Quantization: "fp16"},
 }
 b, err := ml.GenconfigMarshal(cfg)
 // b == {
 //   "prompt": "a mountain at sunset",
 //   "seed": 1234,
 //   ...
 // }
 cfg2, err := ml.GenconfigUnmarshal(b)
 // cfg2 == cfg  (DeepEqual)
 ```
 ## Notas
 ### Formato canonico y compatibilidad con Python
 `GenconfigMarshal` usa `json.MarshalIndent(cfg, "", "  ")`. El formato resultante es identico al que produce Python con `model.model_dump_json()` o `json.dumps(data, indent=2)` cuando `sort_keys=False`:
 - Keys en orden de declaracion del struct (no alfabetico).
 - Indent de 2 espacios, sin trailing whitespace.
 - Campos omitempty ausentes si zero: `negative_prompt` ausente si `""`, `loras` ausente si `[]`, `clip_skip` ausente si `nil`.
 ### Keys JSON (snake_case obligatorio)
 | Campo Go | Key JSON |
 |---|---|
 | `Prompt` | `"prompt"` |
 | `NegativePrompt` | `"negative_prompt"` |
 | `Seed` | `"seed"` |
 | `Steps` | `"steps"` |
 | `CfgScale` | `"cfg_scale"` |
 | `Sampler` | `"sampler"` |
 | `Width` | `"width"` |
 | `Height` | `"height"` |
 | `Model.ModelType` | `"model_type"` |
 | `Model.Quantization` | `"quantization"` |
 | `ClipSkip` | `"clip_skip"` |
 ### Por que impure
 Los errores de `json.Unmarshal` son errores de parsing del input externo, no de I/O, pero se modelan como `(T, error)` para forzar manejo explicito en el caller. Marcado `impure` con `error_type: error_go_core` por convencion del registry.
@@ -0,0 +1,260 @@
 package ml
 import (
 	"reflect"
 	"strings"
 	"testing"
 )
 // ---------------------------------------------------------------------------
 // TestGenconfigToSdcliArgs
 // ---------------------------------------------------------------------------
 func TestGenconfigToSdcliArgs(t *testing.T) {
 	clipSkip := 2
 	t.Run("config basico sin loras ni clip_skip", func(t *testing.T) {
 		cfg := GenerationConfig{
 			Prompt:   "a cat",
 			Seed:     42,
 			Steps:    20,
 			CfgScale: 7.5,
 			Sampler:  "euler",
 			Width:    512,
 			Height:   512,
 			Model:    ModelRef{Name: "v1-5", ModelType: "sd15", Quantization: "fp16"},
 		}
 		args := GenconfigToSdcliArgs(cfg)
 		want := []string{
 			"--prompt", "a cat",
 			"--seed", "42",
 			"--steps", "20",
 			"--cfg-scale", "7.5",
 			"--width", "512",
 			"--height", "512",
 			"--sampling-method", "euler",
 		}
 		if !reflect.DeepEqual(args, want) {
 			t.Errorf("got  %v\nwant %v", args, want)
 		}
 	})
 	t.Run("loras se emiten como pares path:weight", func(t *testing.T) {
 		cfg := GenerationConfig{
 			Prompt:   "portrait",
 			Seed:     1,
 			Steps:    10,
 			CfgScale: 7.0,
 			Sampler:  "euler",
 			Width:    512,
 			Height:   512,
 			Model:    ModelRef{Name: "v1-5", ModelType: "sd15", Quantization: "fp16", Path: "/models/v1.safetensors"},
 			Loras: []LoraRef{
 				{Path: "/loras/detail.safetensors", Weight: 0.8},
 				{Path: "/loras/style.safetensors", Weight: 0.5},
 			},
 			ClipSkip: &clipSkip,
 		}
 		args := GenconfigToSdcliArgs(cfg)
 		// Verificar que existen los pares --lora para ambas loras
 		loraIdx := indexAll(args, "--lora")
 		if len(loraIdx) != 2 {
 			t.Fatalf("esperaba 2 flags --lora, got %d en %v", len(loraIdx), args)
 		}
 		wantLoras := []string{
 			"/loras/detail.safetensors:0.8",
 			"/loras/style.safetensors:0.5",
 		}
 		for i, idx := range loraIdx {
 			if idx+1 >= len(args) {
 				t.Fatalf("--lora[%d] sin valor siguiente", i)
 			}
 			if args[idx+1] != wantLoras[i] {
 				t.Errorf("lora[%d]: got %q, want %q", i, args[idx+1], wantLoras[i])
 			}
 		}
 		// Verificar --model y --clip-skip presentes
 		if !containsPair(args, "--model", "/models/v1.safetensors") {
 			t.Errorf("--model no encontrado en %v", args)
 		}
 		if !containsPair(args, "--clip-skip", "2") {
 			t.Errorf("--clip-skip no encontrado en %v", args)
 		}
 	})
 	t.Run("sampler dpm++2m se traduce a dpmpp2m", func(t *testing.T) {
 		cfg := GenerationConfig{
 			Prompt:   "x",
 			Seed:     0,
 			Steps:    1,
 			CfgScale: 1.0,
 			Sampler:  "dpm++2m",
 			Width:    64,
 			Height:   64,
 			Model:    ModelRef{Name: "m", ModelType: "sd15", Quantization: "fp16"},
 		}
 		args := GenconfigToSdcliArgs(cfg)
 		if !containsPair(args, "--sampling-method", "dpmpp2m") {
 			t.Errorf("sampler no traducido; args=%v", args)
 		}
 	})
 	t.Run("negative_prompt vacio no genera flag", func(t *testing.T) {
 		cfg := GenerationConfig{
 			Prompt:         "x",
 			NegativePrompt: "",
 			Seed:           0,
 			Steps:          1,
 			CfgScale:       1.0,
 			Sampler:        "euler",
 			Width:          64,
 			Height:         64,
 			Model:          ModelRef{Name: "m", ModelType: "sd15", Quantization: "fp16"},
 		}
 		args := GenconfigToSdcliArgs(cfg)
 		for _, a := range args {
 			if a == "--negative-prompt" {
 				t.Errorf("flag --negative-prompt presente aunque NegativePrompt es vacio")
 			}
 		}
 	})
 }
 // ---------------------------------------------------------------------------
 // TestGenconfigMarshalRoundtrip
 // ---------------------------------------------------------------------------
 func TestGenconfigMarshalRoundtrip(t *testing.T) {
 	t.Run("roundtrip marshal unmarshal produce config igual", func(t *testing.T) {
 		clip := 2
 		cfg := GenerationConfig{
 			Prompt:         "sunset over the mountains",
 			NegativePrompt: "blurry, low quality",
 			Seed:           99,
 			Steps:          30,
 			CfgScale:       7.5,
 			Sampler:        "dpm++2m",
 			Width:          768,
 			Height:         512,
 			Model: ModelRef{
 				Name:         "sdxl-base",
 				ModelType:    "sdxl",
 				Quantization: "fp16",
 				Path:         "/models/sdxl.safetensors",
 			},
 			Loras: []LoraRef{
 				{Path: "/loras/detail.safetensors", Weight: 0.8},
 			},
 			ClipSkip: &clip,
 		}
 		b, err := GenconfigMarshal(cfg)
 		if err != nil {
 			t.Fatalf("GenconfigMarshal: %v", err)
 		}
 		got, err := GenconfigUnmarshal(b)
 		if err != nil {
 			t.Fatalf("GenconfigUnmarshal: %v", err)
 		}
 		if !reflect.DeepEqual(cfg, got) {
 			t.Errorf("roundtrip diverge\norig: %+v\ngot:  %+v", cfg, got)
 		}
 	})
 }
 // ---------------------------------------------------------------------------
 // TestGenconfigCrossLanguageJSON
 // ---------------------------------------------------------------------------
 func TestGenconfigCrossLanguageJSON(t *testing.T) {
 	// Fixture escrito a mano replicando lo que generaria Python:
 	//   json.dumps(config.model_dump(), indent=2)
 	// Keys en snake_case, orden de declaracion del dataclass Python.
 	fixture := `{
  "prompt": "a dragon",
  "negative_prompt": "ugly",
  "seed": 1234,
  "steps": 25,
  "cfg_scale": 7.0,
  "sampler": "euler_a",
  "width": 512,
  "height": 512,
  "model": {
    "name": "v1-5",
    "model_type": "sd15",
    "quantization": "fp16"
  },
  "loras": [
    {
      "path": "/loras/dragon.safetensors",
      "weight": 0.9
    }
  ]
 }`
 	t.Run("json cross-language snake_case keys se deserializan correctamente", func(t *testing.T) {
 		cfg, err := GenconfigUnmarshal([]byte(fixture))
 		if err != nil {
 			t.Fatalf("GenconfigUnmarshal fixture: %v", err)
 		}
 		// Verificar campos clave
 		if cfg.Prompt != "a dragon" {
 			t.Errorf("Prompt: got %q", cfg.Prompt)
 		}
 		if cfg.NegativePrompt != "ugly" {
 			t.Errorf("NegativePrompt: got %q", cfg.NegativePrompt)
 		}
 		if cfg.CfgScale != 7.0 {
 			t.Errorf("CfgScale: got %v", cfg.CfgScale)
 		}
 		if cfg.Model.ModelType != "sd15" {
 			t.Errorf("Model.ModelType: got %q", cfg.Model.ModelType)
 		}
 		if len(cfg.Loras) != 1 || cfg.Loras[0].Weight != 0.9 {
 			t.Errorf("Loras: got %+v", cfg.Loras)
 		}
 		// Re-marshal y verificar que las keys snake_case siguen presentes
 		b, err := GenconfigMarshal(cfg)
 		if err != nil {
 			t.Fatalf("GenconfigMarshal: %v", err)
 		}
 		s := string(b)
 		for _, key := range []string{"negative_prompt", "cfg_scale", "model_type", "quantization"} {
 			if !strings.Contains(s, `"`+key+`"`) {
 				t.Errorf("key %q ausente en JSON re-serializado:\n%s", key, s)
 			}
 		}
 	})
 }
 // ---------------------------------------------------------------------------
 // helpers
 // ---------------------------------------------------------------------------
 // indexAll retorna todos los indices de val en slice.
 func indexAll(slice []string, val string) []int {
 	var out []int
 	for i, s := range slice {
 		if s == val {
 			out = append(out, i)
 		}
 	}
 	return out
 }
 // containsPair verifica que flag seguido de value aparece en slice.
 func containsPair(slice []string, flag, value string) bool {
 	for i := 0; i+1 < len(slice); i++ {
 		if slice[i] == flag && slice[i+1] == value {
 			return true
 		}
 	}
 	return false
 }
@@ -0,0 +1,59 @@
 package ml
 import (
 	"fmt"
 	"strconv"
 )
 // samplerMap traduce nombres canonicos del dominio ml a flags de stable-diffusion.cpp.
 var samplerMap = map[string]string{
 	"euler":       "euler",
 	"euler_a":     "euler_a",
 	"dpm++2m":     "dpmpp2m",
 	"dpm++2m_v2":  "dpmpp2mv2",
 	"heun":        "heun",
 	"dpm2":        "dpm2",
 	"lcm":         "lcm",
 }
 // GenconfigToSdcliArgs convierte un GenerationConfig en una lista de argumentos
 // CLI para stable-diffusion.cpp (sd.exe / sd binario).
 // Espejo Go de genconfig_to_sdcpp_args_py_ml.
 //
 // Loras se emiten como pares repetidos "--lora" "path:weight".
 // Si el sampler no existe en samplerMap se usa el valor literal sin traducir.
 // La funcion es pura: sin I/O, sin estado, determinista.
 func GenconfigToSdcliArgs(cfg GenerationConfig) []string {
 	args := []string{
 		"--prompt", cfg.Prompt,
 		"--seed", strconv.FormatInt(cfg.Seed, 10),
 		"--steps", strconv.Itoa(cfg.Steps),
 		"--cfg-scale", strconv.FormatFloat(cfg.CfgScale, 'f', -1, 64),
 		"--width", strconv.Itoa(cfg.Width),
 		"--height", strconv.Itoa(cfg.Height),
 	}
 	if cfg.NegativePrompt != "" {
 		args = append(args, "--negative-prompt", cfg.NegativePrompt)
 	}
 	sampler := cfg.Sampler
 	if mapped, ok := samplerMap[sampler]; ok {
 		sampler = mapped
 	}
 	args = append(args, "--sampling-method", sampler)
 	if cfg.Model.Path != "" {
 		args = append(args, "--model", cfg.Model.Path)
 	}
 	if cfg.ClipSkip != nil {
 		args = append(args, "--clip-skip", strconv.Itoa(*cfg.ClipSkip))
 	}
 	for _, lora := range cfg.Loras {
 		args = append(args, "--lora", fmt.Sprintf("%s:%g", lora.Path, lora.Weight))
 	}
 	return args
 }
@@ -0,0 +1,59 @@
 ---
 name: genconfig_to_sdcli_args
 kind: function
 lang: go
 domain: ml
 version: "1.0.0"
 purity: pure
 signature: "func GenconfigToSdcliArgs(cfg GenerationConfig) []string"
 description: "Convierte un GenerationConfig en argumentos CLI para stable-diffusion.cpp. Espejo Go de genconfig_to_sdcpp_args_py_ml. Loras se emiten como pares repetidos --lora path:weight. Sampler traducido via samplerMap canonico."
 tags: [ml, stable-diffusion, cli, args, generation, pure]
 uses_functions: []
 uses_types: [generation_config_go_ml]
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["fmt", "strconv"]
 params:
  - name: cfg
    desc: "Parametros completos de generacion de imagen. Sampler debe ser uno de los valores de SamplerName. Model.Path se emite como --model si no esta vacio."
 output: "Slice de strings listos para pasar a exec.Command o similar. Incluye --prompt, --seed, --steps, --cfg-scale, --width, --height, --sampling-method, opcionales --negative-prompt / --model / --clip-skip, y pares --lora path:weight por cada LoraRef."
 tested: true
 tests:
  - "config basico sin loras ni clip_skip"
  - "loras se emiten como pares path:weight"
  - "sampler dpm++2m se traduce a dpmpp2m"
  - "negative_prompt vacio no genera flag"
 test_file_path: "functions/ml/genconfig_test.go"
 file_path: "functions/ml/genconfig_to_sdcli_args.go"
 ---
 ## Ejemplo
 ```go
 clip := 2
 cfg := ml.GenerationConfig{
    Prompt:   "a cat",
    Seed:     42,
    Steps:    20,
    CfgScale: 7.5,
    Sampler:  "dpm++2m",
    Width:    512,
    Height:   512,
    Model:    ml.ModelRef{Name: "v1-5", ModelType: "sd15", Quantization: "fp16", Path: "/models/v1-5.safetensors"},
    Loras:    []ml.LoraRef{{Path: "/loras/detail.safetensors", Weight: 0.8}},
    ClipSkip: &clip,
 }
 args := ml.GenconfigToSdcliArgs(cfg)
 // args == ["--prompt","a cat","--seed","42","--steps","20",
 //          "--cfg-scale","7.5","--width","512","--height","512",
 //          "--sampling-method","dpmpp2m","--model","/models/v1-5.safetensors",
 //          "--clip-skip","2","--lora","/loras/detail.safetensors:0.8"]
 ```
 ## Notas
 - `samplerMap` traduce nombres canonicos del dominio ml a los identificadores que acepta stable-diffusion.cpp. Si el sampler no esta en el mapa se usa el valor literal.
 - El flag de modelo (`--model`) solo se emite si `cfg.Model.Path != ""`.
 - `%g` en `fmt.Sprintf` para el peso de la lora elimina ceros insignificantes: `0.800000` → `0.8`.
 - Funcion pura: misma entrada, misma salida. Sin I/O ni estado global.
@@ -0,0 +1,18 @@
 package ml
 // GenerationConfig parametriza una solicitud de generacion de imagen.
 // Espejo JSON-compatible de GenerationConfig_py_ml: los tags json coinciden
 // con los campos snake_case del dataclass Python para roundtrip sin perdida.
 type GenerationConfig struct {
 	Prompt         string    `json:"prompt"`
 	NegativePrompt string    `json:"negative_prompt,omitempty"`
 	Seed           int64     `json:"seed"`
 	Steps          int       `json:"steps"`
 	CfgScale       float64   `json:"cfg_scale"`
 	Sampler        string    `json:"sampler"`
 	Width          int       `json:"width"`
 	Height         int       `json:"height"`
 	Model          ModelRef  `json:"model"`
 	Loras          []LoraRef `json:"loras,omitempty"`
 	ClipSkip       *int      `json:"clip_skip,omitempty"`
 }
@@ -0,0 +1,12 @@
 package ml
 // ImageGenResult contiene la imagen generada y su metadata de ejecucion.
 // ImageBytes transporta los bytes raw del PNG y se excluye del JSON
 // (campo json:"-") porque viaja por canal binario separado.
 type ImageGenResult struct {
 	ImageBytes  []byte         `json:"-"`
 	Format      string         `json:"format"`
 	Meta        map[string]any `json:"meta"`
 	DurationMs  int64          `json:"duration_ms"`
 	VramPeakMb  *int           `json:"vram_peak_mb,omitempty"`
 }
@@ -0,0 +1,9 @@
 package ml
 import "context"
 // ImageGenerator define el contrato para cualquier backend de generacion de imagenes.
 // Las implementaciones pueden ser locales (ComfyUI, diffusers) o remotas (API).
 type ImageGenerator interface {
 	Generate(ctx context.Context, cfg GenerationConfig) (ImageGenResult, error)
 }
@@ -0,0 +1,8 @@
 package ml
 // LoraRef referencia un adaptador LoRA con su peso de fusión y escala opcional.
 type LoraRef struct {
 	Path   string   `json:"path"`
 	Weight float64  `json:"weight"`
 	Scale  *float64 `json:"scale,omitempty"`
 }
@@ -0,0 +1,10 @@
 package ml
 // ModelRef identifica un modelo de generacion de imagenes por nombre, tipo,
 // cuantizacion y path opcional en disco.
 type ModelRef struct {
 	Name         string `json:"name"`
 	ModelType    string `json:"model_type"`   // sd15|sdxl|flux_dev|...
 	Quantization string `json:"quantization"` // fp16|q8_0|...
 	Path         string `json:"path,omitempty"`
 }
@@ -0,0 +1,78 @@
 package ml
 import (
 	"regexp"
 	"strconv"
 )
 // SdcliProgress contiene el estado de progreso parseado de una linea de stderr de sd-cli.
 type SdcliProgress struct {
 	Step       int     `json:"step"`
 	TotalSteps int     `json:"total_steps"`
 	ItPerSec   float64 `json:"it_per_sec"`
 	Percent    float64 `json:"percent"`
 }
 // reProgress1 parsea el formato compacto: "  3/30 |  0.84it/s |  10%"
 var reProgress1 = regexp.MustCompile(`\s*(\d+)\s*/\s*(\d+)\s*\|[^|]*?([\d.]+)\s*it/s[^|]*?\|\s*([\d.]+)\s*%`)
 // reProgress2 parsea el formato verbose: "sampling: step 3 of 30 (0.84 it/s)"
 var reProgress2 = regexp.MustCompile(`step\s+(\d+)\s+of\s+(\d+)\s*\(\s*([\d.]+)\s*it/s\)`)
 // reProgress3 parsea el formato minimal: "step 3/30" o "progress: 3/30"
 var reProgress3 = regexp.MustCompile(`(?:progress[:\s]+)?(\d+)\s*/\s*(\d+)`)
 // SdcliParseProgress parsea una linea de stderr de stable-diffusion.cpp / sd-cli
 // y extrae el estado de progreso. Retorna (SdcliProgress, true) si la linea
 // contiene informacion de progreso reconocible; (zero, false) en caso contrario.
 // Funcion pura: sin I/O, sin estado mutable, determinista.
 func SdcliParseProgress(line string) (SdcliProgress, bool) {
 	// Formato 1: "  3/30 |  0.84it/s |  10%"
 	if m := reProgress1.FindStringSubmatch(line); m != nil {
 		step, err1 := strconv.Atoi(m[1])
 		total, err2 := strconv.Atoi(m[2])
 		itPerSec, err3 := strconv.ParseFloat(m[3], 64)
 		pct, err4 := strconv.ParseFloat(m[4], 64)
 		if err1 == nil && err2 == nil && err3 == nil && err4 == nil {
 			return SdcliProgress{
 				Step:       step,
 				TotalSteps: total,
 				ItPerSec:   itPerSec,
 				Percent:    pct,
 			}, true
 		}
 	}
 	// Formato 2: "sampling: step 3 of 30 (0.84 it/s)"
 	if m := reProgress2.FindStringSubmatch(line); m != nil {
 		step, err1 := strconv.Atoi(m[1])
 		total, err2 := strconv.Atoi(m[2])
 		itPerSec, err3 := strconv.ParseFloat(m[3], 64)
 		if err1 == nil && err2 == nil && err3 == nil && total > 0 {
 			pct := 100.0 * float64(step) / float64(total)
 			return SdcliProgress{
 				Step:       step,
 				TotalSteps: total,
 				ItPerSec:   itPerSec,
 				Percent:    pct,
 			}, true
 		}
 	}
 	// Formato 3: "step 3/30" o "progress: 3/30" sin velocidad
 	if m := reProgress3.FindStringSubmatch(line); m != nil {
 		step, err1 := strconv.Atoi(m[1])
 		total, err2 := strconv.Atoi(m[2])
 		if err1 == nil && err2 == nil && total > 0 {
 			pct := 100.0 * float64(step) / float64(total)
 			return SdcliProgress{
 				Step:       step,
 				TotalSteps: total,
 				ItPerSec:   0,
 				Percent:    pct,
 			}, true
 		}
 	}
 	return SdcliProgress{}, false
 }
@@ -0,0 +1,50 @@
 ---
 name: sdcli_parse_progress
 kind: function
 lang: go
 domain: ml
 version: "1.0.0"
 purity: pure
 signature: "func SdcliParseProgress(line string) (SdcliProgress, bool)"
 description: "Parsea una linea de stderr de stable-diffusion.cpp / sd-cli y extrae el estado de progreso. Soporta el formato compacto '3/30 | 0.84it/s | 10%', el formato verbose 'sampling: step 3 of 30 (0.84 it/s)', y el formato minimal 'progress: 3/30'. Retorna (zero, false) si la linea no contiene informacion de progreso reconocible."
 tags: [ml, stable-diffusion, sdcli, progress, parser, stderr, pure]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["regexp", "strconv"]
 params:
  - name: line
    desc: "Una linea de stderr emitida por sd-cli / stable-diffusion.cpp durante la fase de sampling. Puede contener espacios al inicio o final."
 output: "Par (SdcliProgress, bool). bool=true si se reconocio un patron de progreso; SdcliProgress contiene Step (paso actual), TotalSteps (pasos totales), ItPerSec (iteraciones por segundo, 0 si no disponible) y Percent (porcentaje 0-100 calculado o leido de la linea). bool=false y struct zero si la linea no contiene progreso."
 tested: true
 tests:
  - "formato estandar compacto step/total/itpersec/percent"
  - "linea sin patron retorna false"
  - "formato sampling verbose con velocidad"
 file_path: "functions/ml/sdcli_parse_progress.go"
 test_file_path: "functions/ml/sdcli_parse_progress_test.go"
 ---
 ## Ejemplo
 ```go
 p, ok := ml.SdcliParseProgress("  3/30 |  0.84it/s |  10%")
 // ok = true
 // p = SdcliProgress{Step:3, TotalSteps:30, ItPerSec:0.84, Percent:10.0}
 p2, ok2 := ml.SdcliParseProgress("sampling: step 15 of 30 (1.2 it/s)")
 // ok2 = true
 // p2 = SdcliProgress{Step:15, TotalSteps:30, ItPerSec:1.2, Percent:50.0}
 _, ok3 := ml.SdcliParseProgress("loading model...")
 // ok3 = false
 ```
 ## Notas
 - Regexps precompiladas como vars de paquete (se compilan una sola vez al init del paquete).
 - Tolerante a variaciones de espaciado gracias a `\s*` en los patrones.
 - El campo `Percent` en el formato verbose se calcula como `100 * step / total` (no se lee de la linea porque ese formato no lo emite).
 - Funcion pura: sin I/O, sin estado mutable, determinista.
@@ -0,0 +1,103 @@
 package ml
 import (
 	"math"
 	"testing"
 )
 func TestSdcliParseProgress_StandardFormat(t *testing.T) {
 	line := "  3/30 |  0.84it/s |  10%"
 	got, ok := SdcliParseProgress(line)
 	if !ok {
 		t.Fatalf("expected match, got false")
 	}
 	if got.Step != 3 {
 		t.Errorf("Step: got %d, want 3", got.Step)
 	}
 	if got.TotalSteps != 30 {
 		t.Errorf("TotalSteps: got %d, want 30", got.TotalSteps)
 	}
 	if math.Abs(got.ItPerSec-0.84) > 1e-9 {
 		t.Errorf("ItPerSec: got %v, want 0.84", got.ItPerSec)
 	}
 	if math.Abs(got.Percent-10.0) > 1e-9 {
 		t.Errorf("Percent: got %v, want 10.0", got.Percent)
 	}
 }
 func TestSdcliParseProgress_NoMatch(t *testing.T) {
 	cases := []string{
 		"loading model...",
 		"",
 		"error: out of memory",
 		"clip model loaded",
 		"generating image...",
 	}
 	for _, line := range cases {
 		_, ok := SdcliParseProgress(line)
 		if ok {
 			t.Errorf("expected no match for %q, but got match", line)
 		}
 	}
 }
 func TestSdcliParseProgress_AltFormat(t *testing.T) {
 	t.Run("formato sampling verbose", func(t *testing.T) {
 		line := "sampling: step 3 of 30 (0.84 it/s)"
 		got, ok := SdcliParseProgress(line)
 		if !ok {
 			t.Fatalf("expected match, got false")
 		}
 		if got.Step != 3 {
 			t.Errorf("Step: got %d, want 3", got.Step)
 		}
 		if got.TotalSteps != 30 {
 			t.Errorf("TotalSteps: got %d, want 30", got.TotalSteps)
 		}
 		if math.Abs(got.ItPerSec-0.84) > 1e-9 {
 			t.Errorf("ItPerSec: got %v, want 0.84", got.ItPerSec)
 		}
 		expectedPct := 100.0 * 3.0 / 30.0
 		if math.Abs(got.Percent-expectedPct) > 1e-6 {
 			t.Errorf("Percent: got %v, want %v", got.Percent, expectedPct)
 		}
 	})
 	t.Run("formato step/total sin velocidad", func(t *testing.T) {
 		line := "progress: 15/20"
 		got, ok := SdcliParseProgress(line)
 		if !ok {
 			t.Fatalf("expected match, got false")
 		}
 		if got.Step != 15 {
 			t.Errorf("Step: got %d, want 15", got.Step)
 		}
 		if got.TotalSteps != 20 {
 			t.Errorf("TotalSteps: got %d, want 20", got.TotalSteps)
 		}
 		if got.ItPerSec != 0 {
 			t.Errorf("ItPerSec: got %v, want 0", got.ItPerSec)
 		}
 		expectedPct := 75.0
 		if math.Abs(got.Percent-expectedPct) > 1e-6 {
 			t.Errorf("Percent: got %v, want %v", got.Percent, expectedPct)
 		}
 	})
 	t.Run("formato con espacios variables y mayor velocidad", func(t *testing.T) {
 		line := "  20/30 | 12.50it/s |  66%"
 		got, ok := SdcliParseProgress(line)
 		if !ok {
 			t.Fatalf("expected match, got false")
 		}
 		if got.Step != 20 {
 			t.Errorf("Step: got %d, want 20", got.Step)
 		}
 		if got.TotalSteps != 30 {
 			t.Errorf("TotalSteps: got %d, want 30", got.TotalSteps)
 		}
 		if math.Abs(got.ItPerSec-12.5) > 1e-9 {
 			t.Errorf("ItPerSec: got %v, want 12.5", got.ItPerSec)
 		}
 	})
 }
@@ -0,0 +1,161 @@
 """Tests para vault_csv_profile."""
 from __future__ import annotations
 import os
 import sqlite3
 import sys
 import tempfile
 from pathlib import Path
 import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from vault_csv_profile import vault_csv_profile
 def _make_vault(tmp: Path) -> tuple[Path, Path]:
    """Crea un vault mínimo con vault_index.db y tabla files + files_fts + csv_profiles."""
    db = tmp / "vault_index.db"
    conn = sqlite3.connect(str(db))
    conn.executescript(
        """
        CREATE TABLE IF NOT EXISTS files (
            rowid INTEGER PRIMARY KEY AUTOINCREMENT,
            rel_path TEXT UNIQUE NOT NULL,
            size_bytes INTEGER,
            ext TEXT
        );
        CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
            USING fts5(rel_path, content_text, content='', contentless_delete=1);
        CREATE TABLE IF NOT EXISTS csv_profiles (
            rel_path TEXT PRIMARY KEY,
            cols_json TEXT,
            n_rows INTEGER,
            encoding TEXT,
            date_min TEXT,
            date_max TEXT,
            profiled_at INTEGER
        );
        """
    )
    conn.commit()
    conn.close()
    return tmp, db
 def _insert_file_entry(db: Path, rel_path: str):
    """Inserta entrada en files para que files_fts tenga rowid válido."""
    conn = sqlite3.connect(str(db))
    conn.execute(
        "INSERT OR IGNORE INTO files(rel_path, size_bytes, ext) VALUES (?, 0, '.csv')",
        (rel_path,),
    )
    conn.commit()
    conn.close()
 def test_csv_basic(tmp_path):
    vault, db = _make_vault(tmp_path)
    rel = "data/basic.csv"
    csv_file = vault / rel
    csv_file.parent.mkdir(parents=True, exist_ok=True)
    csv_file.write_text("nombre,edad,score\nAna,30,9.5\nBob,25,8.0\nCarla,35,7.5\n", encoding="utf-8")
    _insert_file_entry(db, rel)
    result = vault_csv_profile(str(vault), rel, db_path=str(db))
    assert result["rel_path"] == rel
    assert result["n_rows"] == 3
    assert len(result["cols"]) == 3
    col_names = [c["name"] for c in result["cols"]]
    assert "nombre" in col_names
    assert "edad" in col_names
    assert "score" in col_names
    assert result["persisted"] is True
    # Verificar persistencia en csv_profiles
    conn = sqlite3.connect(str(db))
    row = conn.execute("SELECT n_rows FROM csv_profiles WHERE rel_path = ?", (rel,)).fetchone()
    conn.close()
    assert row is not None
    assert row[0] == 3
 def test_csv_date_detection(tmp_path):
    vault, db = _make_vault(tmp_path)
    rel = "data/fechas.csv"
    csv_file = vault / rel
    csv_file.parent.mkdir(parents=True, exist_ok=True)
    csv_file.write_text(
        "fecha,valor\n2023-01-01,100\n2023-06-15,200\n2023-12-31,300\n",
        encoding="utf-8",
    )
    _insert_file_entry(db, rel)
    result = vault_csv_profile(str(vault), rel, db_path=str(db))
    assert result["date_min"] is not None
    assert result["date_max"] is not None
    assert result["date_min"] <= "2023-01-01"
    assert result["date_max"] >= "2023-12-31"
 def test_csv_encoding_latin1(tmp_path):
    vault, db = _make_vault(tmp_path)
    rel = "data/tildes.csv"
    csv_file = vault / rel
    csv_file.parent.mkdir(parents=True, exist_ok=True)
    csv_file.write_bytes(
        "ciudad,poblacion\nMálaga,500000\nCórdoba,320000\n".encode("latin-1")
    )
    _insert_file_entry(db, rel)
    result = vault_csv_profile(str(vault), rel, db_path=str(db))
    assert result["n_rows"] == 2
    assert result["encoding"] != "utf-8?"
    # encoding detectado (algún valor no vacío)
    assert result["encoding"]
    assert result["persisted"] is True
 def test_csv_empty(tmp_path):
    vault, db = _make_vault(tmp_path)
    rel = "data/empty.csv"
    csv_file = vault / rel
    csv_file.parent.mkdir(parents=True, exist_ok=True)
    csv_file.write_text("", encoding="utf-8")
    _insert_file_entry(db, rel)
    result = vault_csv_profile(str(vault), rel, db_path=str(db))
    assert result["n_rows"] == 0
    assert result["cols"] == []
    assert result["date_min"] is None
    assert result["date_max"] is None
 def test_csv_persists_fts(tmp_path):
    """FTS5 contentless: verifica que las columnas son buscables con MATCH."""
    vault, db = _make_vault(tmp_path)
    rel = "data/fts_test.csv"
    csv_file = vault / rel
    csv_file.parent.mkdir(parents=True, exist_ok=True)
    csv_file.write_text("producto,precio\nManzana,1.5\nPera,2.0\n", encoding="utf-8")
    _insert_file_entry(db, rel)
    vault_csv_profile(str(vault), rel, db_path=str(db))
    conn = sqlite3.connect(str(db))
    # FTS5 contentless no permite SELECT directo — usar MATCH para verificar indexado
    row_prod = conn.execute(
        "SELECT rowid FROM files_fts WHERE files_fts MATCH 'producto'",
    ).fetchone()
    row_prec = conn.execute(
        "SELECT rowid FROM files_fts WHERE files_fts MATCH 'precio'",
    ).fetchone()
    conn.close()
    assert row_prod is not None, "FTS no encontró 'producto'"
    assert row_prec is not None, "FTS no encontró 'precio'"
--- a/Show More
+++ b/Show More