chore: auto-commit (95 archivos)

- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 00:50:34 +02:00
parent a2bbf23374
commit e3c8979e8d
189 changed files with 18964 additions and 330 deletions
@@ -0,0 +1,73 @@
+---
+name: cuda_toolkit_check
+kind: function
+lang: bash
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "cuda_toolkit_check() -> void"
+description: "Detecta componentes CUDA instalados en el sistema y emite pares key=value a stdout: nvcc (version o missing), nvidia_smi (present/missing), driver_version, cuda_libs (path o missing) y overall (ok|partial|missing). Exit code 0 siempre — funcion informativa, no fatal."
+tags: [cuda, nvidia, gpu, hardware, probe, infra, toolkit]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: []
+params:
+  - name: (ninguno)
+    desc: "No toma parametros. Lee el estado del sistema via nvcc, nvidia-smi y busqueda en rutas canonicas de CUDA."
+output: "Cinco pares key=value en stdout: nvcc, nvidia_smi, driver_version, cuda_libs, overall. overall=ok si los tres componentes principales estan presentes; partial si algunos; missing si ninguno."
+tested: false
+tests: []
+test_file_path: ""
+file_path: "bash/functions/infra/cuda_toolkit_check.sh"
+---
+
+## Ejemplo
+
+```bash
+source bash/functions/infra/cuda_toolkit_check.sh
+cuda_toolkit_check
+```
+
+Salida en maquina con CUDA completo:
+```
+nvcc=12.4
+nvidia_smi=present
+driver_version=550.54.15
+cuda_libs=/usr/local/cuda
+overall=ok
+```
+
+Salida en maquina sin CUDA:
+```
+nvcc=missing
+nvidia_smi=missing
+driver_version=missing
+cuda_libs=missing
+overall=missing
+```
+
+Invocar directamente:
+```bash
+bash bash/functions/infra/cuda_toolkit_check.sh
+```
+
+Parsear desde otro script:
+```bash
+eval "$(cuda_toolkit_check)"
+echo "CUDA overall: $overall"
+if [[ "$overall" == "ok" ]]; then
+    echo "CUDA completo: nvcc=$nvcc driver=$driver_version libs=$cuda_libs"
+fi
+```
+
+## Notas
+
+- Idempotente: no instala, no modifica nada, solo consulta.
+- Exit code 0 siempre — ausencia de CUDA es informacion, no fallo.
+- Busca `libcuda.so` en `/usr/local/cuda*`, `/opt/cuda*` y via `ldconfig -p`.
+- `driver_version` refleja el driver NVIDIA del kernel, reportado por nvidia-smi.
+- `nvcc` reporta la version del compilador CUDA toolkit (puede diferir de la version soportada por el driver).
+- Para obtener la version CUDA maxima soportada por el driver, usar `get_gpu_info_go_infra` (campo CudaVersion del struct GpuInfo).
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+# cuda_toolkit_check — Detecta componentes CUDA instalados en el sistema.
+#
+# Emite pares key=value a stdout:
+#   nvcc=<version|missing>
+#   nvidia_smi=<present|missing>
+#   driver_version=<version|missing>
+#   cuda_libs=<path|missing>
+#   overall=<ok|partial|missing>
+#
+# Exit code 0 siempre (funcion informativa, no fatal).
+# Idempotente: se puede invocar multiples veces sin efectos secundarios.
+
+cuda_toolkit_check() {
+    local nvcc_ver="missing"
+    local nvidia_smi_status="missing"
+    local driver_version="missing"
+    local cuda_libs_path="missing"
+
+    # --- nvcc ---
+    if command -v nvcc &>/dev/null; then
+        # nvcc --version imprime algo como:
+        #   Cuda compilation tools, release 12.4, V12.4.131
+        local raw
+        raw="$(nvcc --version 2>&1)"
+        # Extraer "12.4" de "release 12.4,"
+        local ver
+        ver="$(echo "$raw" | grep -oP 'release \K[0-9]+\.[0-9]+')"
+        nvcc_ver="${ver:-present}"
+    fi
+
+    # --- nvidia-smi + driver_version ---
+    if command -v nvidia-smi &>/dev/null; then
+        nvidia_smi_status="present"
+        # nvidia-smi --query-gpu=driver_version --format=csv,noheader retorna la version
+        local drv
+        drv="$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -n1 | tr -d ' ')"
+        if [[ -n "$drv" ]]; then
+            driver_version="$drv"
+        fi
+    fi
+
+    # --- cuda_libs: buscar en rutas canonicas ---
+    local search_dirs=(
+        "/usr/local/cuda"
+        "/usr/local/cuda-"*
+        "/opt/cuda"
+        "/opt/cuda-"*
+        "/usr/lib/x86_64-linux-gnu/libcuda.so"*
+        "/usr/lib/aarch64-linux-gnu/libcuda.so"*
+    )
+
+    for candidate in "${search_dirs[@]}"; do
+        # shellcheck disable=SC2206
+        # Expandir globs: si el candidato no existe el glob no expande
+        for path in $candidate; do
+            if [[ -e "$path" ]]; then
+                # Normalizar: tomar solo el directorio raiz /usr/local/cuda*
+                local base
+                base="${path%%/lib*}"
+                cuda_libs_path="$base"
+                break 2
+            fi
+        done
+    done
+
+    # Si no encontramos directorio CUDA pero si libcuda.so en rutas de lib estandar
+    if [[ "$cuda_libs_path" == "missing" ]]; then
+        local libcuda
+        libcuda="$(ldconfig -p 2>/dev/null | grep 'libcuda\.so' | head -n1 | awk '{print $NF}')"
+        if [[ -n "$libcuda" ]]; then
+            cuda_libs_path="$(dirname "$libcuda")"
+        fi
+    fi
+
+    # --- overall ---
+    local found_count=0
+    [[ "$nvcc_ver"          != "missing" ]] && ((found_count++))
+    [[ "$nvidia_smi_status" != "missing" ]] && ((found_count++))
+    [[ "$cuda_libs_path"    != "missing" ]] && ((found_count++))
+
+    local overall
+    if   [[ $found_count -eq 0 ]]; then overall="missing"
+    elif [[ $found_count -eq 3 ]]; then overall="ok"
+    else                                  overall="partial"
+    fi
+
+    # --- emitir resultados ---
+    echo "nvcc=${nvcc_ver}"
+    echo "nvidia_smi=${nvidia_smi_status}"
+    echo "driver_version=${driver_version}"
+    echo "cuda_libs=${cuda_libs_path}"
+    echo "overall=${overall}"
+}
+
+# Ejecutar si se invoca directamente
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    cuda_toolkit_check "$@"
+fi
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+# Tests para cuda_toolkit_check
+# Smoke: verifica que stdout contiene todas las keys requeridas y exit code 0.
+set -uo pipefail
+# Nota: set -e NO se usa para que los asserts fallen de forma acumulativa
+# en lugar de abortar el script al primer fallo.
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../cuda_toolkit_check.sh"
+
+PASS=0
+FAIL=0
+
+assert_eq() {
+    local test_name="$1" expected="$2" got="$3"
+    if [[ "$expected" == "$got" ]]; then
+        echo "PASS: $test_name"
+        ((PASS++)) || true
+    else
+        echo "FAIL: $test_name — expected '$expected', got '$got'"
+        ((FAIL++)) || true
+    fi
+}
+
+assert_contains() {
+    local test_name="$1" needle="$2" haystack="$3"
+    if echo "$haystack" | grep -qF "$needle"; then
+        echo "PASS: $test_name"
+        ((PASS++)) || true
+    else
+        echo "FAIL: $test_name — '$needle' not found in output"
+        ((FAIL++)) || true
+    fi
+}
+
+assert_matches_pattern() {
+    local test_name="$1" pattern="$2" value="$3"
+    if echo "$value" | grep -qE "$pattern"; then
+        echo "PASS: $test_name"
+        ((PASS++)) || true
+    else
+        echo "FAIL: $test_name — '$value' does not match pattern '$pattern'"
+        ((FAIL++)) || true
+    fi
+}
+
+assert_nonempty() {
+    local test_name="$1" value="$2"
+    if [[ -n "$value" ]]; then
+        echo "PASS: $test_name"
+        ((PASS++)) || true
+    else
+        echo "FAIL: $test_name — valor vacio"
+        ((FAIL++)) || true
+    fi
+}
+
+# --- Capturar salida ---
+OUTPUT="$(cuda_toolkit_check)"
+EXIT_CODE=$?
+
+# --- Test: exit code 0 ---
+assert_eq "exit code es 0" "0" "$EXIT_CODE"
+
+# --- Test: stdout contiene clave nvcc= ---
+assert_contains "stdout contiene clave nvcc=" "nvcc=" "$OUTPUT"
+
+# --- Test: stdout contiene clave nvidia_smi= ---
+assert_contains "stdout contiene clave nvidia_smi=" "nvidia_smi=" "$OUTPUT"
+
+# --- Test: stdout contiene clave driver_version= ---
+assert_contains "stdout contiene clave driver_version=" "driver_version=" "$OUTPUT"
+
+# --- Test: stdout contiene clave cuda_libs= ---
+assert_contains "stdout contiene clave cuda_libs=" "cuda_libs=" "$OUTPUT"
+
+# --- Test: stdout contiene clave overall= ---
+assert_contains "stdout contiene clave overall=" "overall=" "$OUTPUT"
+
+# --- Test: overall tiene valor valido (ok|partial|missing) ---
+OVERALL_VAL="$(echo "$OUTPUT" | grep '^overall=' | cut -d= -f2)"
+assert_matches_pattern "overall tiene valor valido ok|partial|missing" "^(ok|partial|missing)$" "$OVERALL_VAL"
+
+# --- Test: nvcc tiene valor no vacio ---
+NVCC_VAL="$(echo "$OUTPUT" | grep '^nvcc=' | cut -d= -f2)"
+assert_nonempty "nvcc tiene valor no vacio" "$NVCC_VAL"
+
+# --- Test: nvidia_smi tiene valor valido (present|missing) ---
+SMI_VAL="$(echo "$OUTPUT" | grep '^nvidia_smi=' | cut -d= -f2)"
+assert_matches_pattern "nvidia_smi tiene valor valido present|missing" "^(present|missing)$" "$SMI_VAL"
+
+# --- Test: driver_version tiene valor no vacio ---
+DRV_VAL="$(echo "$OUTPUT" | grep '^driver_version=' | cut -d= -f2)"
+assert_nonempty "driver_version tiene valor no vacio" "$DRV_VAL"
+
+# --- Test: cuda_libs tiene valor no vacio ---
+LIBS_VAL="$(echo "$OUTPUT" | grep '^cuda_libs=' | cut -d= -f2)"
+assert_nonempty "cuda_libs tiene valor no vacio" "$LIBS_VAL"
+
+# --- Test: exactamente 5 lineas en la salida ---
+LINE_COUNT="$(echo "$OUTPUT" | wc -l | tr -d ' ')"
+assert_eq "salida tiene exactamente 5 lineas" "5" "$LINE_COUNT"
+
+# --- Test: segunda invocacion idempotente (mismo resultado) ---
+OUTPUT2="$(cuda_toolkit_check)"
+assert_eq "segunda invocacion produce mismo resultado (idempotente)" "$OUTPUT" "$OUTPUT2"
+
+# --- Resumen ---
+echo "---"
+echo "Results: $PASS passed, $FAIL failed"
+[[ $FAIL -eq 0 ]] || exit 1
@@ -0,0 +1,90 @@
+---
+name: vault_audit
+kind: pipeline
+lang: bash
+domain: pipelines
+version: "1.0.0"
+purity: impure
+signature: "vault_audit(<vault_name> | --all) [--skip-profilers] [--dry-run-layout] -> void"
+description: "Pipeline completo de auditoria para uno o todos los vaults declarados: layout-ensure, index, profile (csv/pdf/md), dedupe, aggregate y doctor. Produce tabla resumen con estado por vault y codigo de salida 4 si hay warnings."
+tags: [vault, audit, pipeline, launcher, infra, bash]
+uses_functions:
+  - vault_layout_ensure_go_infra
+  - vault_inventory_scan_go_infra
+  - vault_index_open_go_infra
+  - vault_index_write_go_infra
+  - vault_csv_profile_py_datascience
+  - vault_pdf_extract_py_datascience
+  - vault_knowledge_parse_py_infra
+  - vault_dedupe_report_py_infra
+  - vault_aggregate_index_go_infra
+  - vault_doctor_go_infra
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: []
+params:
+  - name: vault_name
+    desc: "Nombre del vault a auditar (como aparece en registry.db tabla vaults). Usar --all para todos."
+  - name: --all
+    desc: "Audita todos los vaults declarados en registry.db. Mutuamente excluyente con vault_name."
+  - name: --skip-profilers
+    desc: "Omite el paso de profiling CSV/PDF/MD. Util para auditorias rapidas de inventario."
+  - name: --dry-run-layout
+    desc: "Pasa --dry-run a vault layout-ensure: calcula cambios sin tocar el disco."
+output: "Tabla de resumen por vault con status ok/warn. Codigo de salida 0=exito, 1=root no localizable, 4=uno o mas vaults con warnings."
+tested: false
+tests: []
+test_file_path: ""
+file_path: "bash/functions/pipelines/vault_audit.sh"
+---
+
+## Ejemplo
+
+```bash
+# Auditar un vault especifico
+FN_REGISTRY_ROOT=/home/lucas/fn_registry \
+  bash bash/functions/pipelines/vault_audit.sh turismo_spain
+
+# Auditar todos los vaults
+FN_REGISTRY_ROOT=/home/lucas/fn_registry \
+  bash bash/functions/pipelines/vault_audit.sh --all
+
+# Solo layout + index + aggregate (sin profilers, mas rapido)
+bash bash/functions/pipelines/vault_audit.sh turismo_spain --skip-profilers
+
+# Ver que haria layout-ensure sin tocar disco
+bash bash/functions/pipelines/vault_audit.sh turismo_spain --dry-run-layout
+
+# Equivalente via fn run (desde la raiz del registry)
+./fn run vault_audit_bash_pipelines turismo_spain
+```
+
+## Pasos del pipeline
+
+1. **layout-ensure** — `fn vault layout-ensure <name>` asegura `data/{raw,processed,exports}` y `knowledge/{...}`.
+2. **index** — `fn vault index <name>` escanea archivos y persiste en `vault_index.db`.
+3. **profile** — `fn vault profile <name>` llama `vault_profile_dispatch.py` para CSV/PDF/MD.
+4. **dedupe** — `fn vault dedupe <name>` detecta duplicados por sha256 (informacional, no fatal).
+5. **aggregate** — `fn vault aggregate` copia todo a `registry.db` tabla `vault_files` (una sola vez al final).
+6. **doctor** — `fn vault doctor` muestra estado de salud de todos los vaults.
+
+## Codigos de salida
+
+| Codigo | Significado |
+|--------|-------------|
+| 0 | Todos los vaults procesados sin errores |
+| 1 | FN_REGISTRY_ROOT no localizable o fn binary no encontrado |
+| 4 | Uno o mas vaults con warnings (layout o index fallaron) |
+
+## Variables de entorno
+
+- `FN_REGISTRY_ROOT` — raiz del registry (auto-detectada si no esta seteada).
+- `FN_BIN` — path al binario `fn` (default: `$FN_REGISTRY_ROOT/fn`).
+
+## Notas
+
+Requiere `sqlite3` en PATH para resolver la lista de vaults con `--all`.
+El paso de profile es non-fatal: errores en profilers individuales se reportan como warnings.
+El paso de dedupe es siempre informacional (no borra archivos).
@@ -0,0 +1,172 @@
+#!/usr/bin/env bash
+# vault_audit — Full audit pipeline for one or all declared vaults.
+# Runs: layout-ensure → index → profile → dedupe → aggregate → doctor
+#
+# Usage:
+#   vault_audit.sh <vault_name>
+#   vault_audit.sh --all
+#   vault_audit.sh <vault_name> --skip-profilers
+#   vault_audit.sh <vault_name> --dry-run-layout
+#   vault_audit.sh --all --skip-profilers
+set -euo pipefail
+
+# --- locate FN_REGISTRY_ROOT ---
+_find_registry_root() {
+    local dir
+    dir="$(pwd)"
+    while [[ "$dir" != "/" ]]; do
+        if [[ -f "$dir/registry.db" ]]; then
+            echo "$dir"
+            return 0
+        fi
+        dir="$(dirname "$dir")"
+    done
+    return 1
+}
+
+if [[ -n "${FN_REGISTRY_ROOT:-}" && -f "${FN_REGISTRY_ROOT}/registry.db" ]]; then
+    REGISTRY_ROOT="$FN_REGISTRY_ROOT"
+elif REGISTRY_ROOT="$(_find_registry_root 2>/dev/null)"; then
+    : # found
+else
+    echo "ERROR: Cannot locate registry.db. Set FN_REGISTRY_ROOT or run from registry root." >&2
+    exit 1
+fi
+
+FN_BIN="${FN_BIN:-${REGISTRY_ROOT}/fn}"
+if [[ ! -x "$FN_BIN" ]]; then
+    echo "ERROR: fn binary not found at $FN_BIN. Build with: CGO_ENABLED=1 go build -tags fts5 -o fn ./cmd/fn/" >&2
+    exit 1
+fi
+
+# --- parse args ---
+AUDIT_ALL=0
+SKIP_PROFILERS=0
+DRY_RUN_LAYOUT=0
+VAULT_NAMES=()
+START_TS=$(date +%s)
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --all)           AUDIT_ALL=1 ;;
+        --skip-profilers) SKIP_PROFILERS=1 ;;
+        --dry-run-layout) DRY_RUN_LAYOUT=1 ;;
+        -*)
+            echo "ERROR: Unknown flag: $1" >&2
+            echo "Usage: vault_audit.sh <name> | --all [--skip-profilers] [--dry-run-layout]" >&2
+            exit 1
+            ;;
+        *)
+            VAULT_NAMES+=("$1")
+            ;;
+    esac
+    shift
+done
+
+if [[ $AUDIT_ALL -eq 0 && ${#VAULT_NAMES[@]} -eq 0 ]]; then
+    echo "Usage: vault_audit.sh <vault_name> | --all [--skip-profilers] [--dry-run-layout]" >&2
+    exit 1
+fi
+
+# --- resolve vault list ---
+if [[ $AUDIT_ALL -eq 1 ]]; then
+    mapfile -t VAULT_NAMES < <(
+        sqlite3 "${REGISTRY_ROOT}/registry.db" "SELECT name FROM vaults ORDER BY name;" 2>/dev/null || true
+    )
+    if [[ ${#VAULT_NAMES[@]} -eq 0 ]]; then
+        echo "No vaults registered in registry.db. Run 'fn index' first." >&2
+        exit 1
+    fi
+    echo "Found ${#VAULT_NAMES[@]} vault(s): ${VAULT_NAMES[*]}"
+fi
+
+# --- build fn vault flags ---
+LAYOUT_FLAGS=()
+if [[ $DRY_RUN_LAYOUT -eq 1 ]]; then
+    LAYOUT_FLAGS+=(--dry-run)
+fi
+
+# --- per-vault audit ---
+PASS_COUNT=0
+FAIL_COUNT=0
+declare -A VAULT_STATUS
+
+audit_one() {
+    local name="$1"
+    local vault_ok=1
+    echo ""
+    echo "=== vault: $name ==="
+
+    # Step 1: layout-ensure
+    echo "  [1/5] layout-ensure"
+    if ! "$FN_BIN" vault layout-ensure "$name" "${LAYOUT_FLAGS[@]}" 2>&1 | sed 's/^/    /'; then
+        echo "    WARN: layout-ensure failed (non-fatal)" >&2
+        vault_ok=0
+    fi
+
+    # Step 2: index
+    echo "  [2/5] index"
+    if ! "$FN_BIN" vault index "$name" 2>&1 | sed 's/^/    /'; then
+        echo "    ERROR: index failed" >&2
+        vault_ok=0
+    fi
+
+    # Step 3: profile
+    if [[ $SKIP_PROFILERS -eq 0 ]]; then
+        echo "  [3/5] profile"
+        if ! "$FN_BIN" vault profile "$name" 2>&1 | sed 's/^/    /'; then
+            echo "    WARN: profile had errors (non-fatal)" >&2
+        fi
+    else
+        echo "  [3/5] profile (skipped)"
+    fi
+
+    # Step 4: dedupe (informational, non-fatal)
+    echo "  [4/5] dedupe"
+    "$FN_BIN" vault dedupe "$name" 2>&1 | sed 's/^/    /' || true
+
+    # Step 5 deferred — aggregate runs once at the end
+    echo "  [5/5] aggregate (deferred)"
+
+    if [[ $vault_ok -eq 1 ]]; then
+        VAULT_STATUS["$name"]="ok"
+        PASS_COUNT=$((PASS_COUNT + 1))
+    else
+        VAULT_STATUS["$name"]="warn"
+        FAIL_COUNT=$((FAIL_COUNT + 1))
+    fi
+}
+
+for vault_name in "${VAULT_NAMES[@]}"; do
+    audit_one "$vault_name"
+done
+
+# --- aggregate (once, after all vaults) ---
+echo ""
+echo "=== aggregate ==="
+"$FN_BIN" vault aggregate 2>&1 | sed 's/^/  /'
+
+# --- doctor (read-only health check) ---
+echo ""
+echo "=== doctor ==="
+"$FN_BIN" vault doctor 2>&1 | sed 's/^/  /' || true
+
+# --- summary table ---
+END_TS=$(date +%s)
+ELAPSED=$(( END_TS - START_TS ))
+
+echo ""
+echo "=== summary ==="
+printf "%-30s  %s\n" "VAULT" "STATUS"
+printf "%-30s  %s\n" "-----" "------"
+for vault_name in "${VAULT_NAMES[@]}"; do
+    status="${VAULT_STATUS[$vault_name]:-unknown}"
+    printf "%-30s  %s\n" "$vault_name" "$status"
+done
+echo ""
+echo "Done: ${PASS_COUNT} ok, ${FAIL_COUNT} warn  (${ELAPSED}s)"
+
+if [[ $FAIL_COUNT -gt 0 ]]; then
+    exit 4
+fi
+exit 0
@@ -0,0 +1,318 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"fn-registry/functions/infra"
+	"fn-registry/registry"
+)
+
+// fnBinDir holds the temp directory for the compiled fn binary.
+// It is created by TestMain and cleaned up at test end.
+var fnBinDir string
+var fnBinPath string
+
+// TestMain compiles the fn binary once before all tests.
+func TestMain(m *testing.M) {
+	var err error
+	fnBinDir, err = os.MkdirTemp("", "fn-vault-test-*")
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "create temp dir: %v\n", err)
+		os.Exit(1)
+	}
+	defer os.RemoveAll(fnBinDir)
+
+	fnBinPath = filepath.Join(fnBinDir, "fn")
+	// Find registry root by walking up from current directory.
+	regRoot, err := findRoot()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "find root: %v\n", err)
+		os.Exit(1)
+	}
+	cmd := exec.Command("go", "build", "-tags", "fts5", "-o", fnBinPath, ".")
+	cmd.Dir = filepath.Join(regRoot, "cmd", "fn")
+	if out, errB := cmd.CombinedOutput(); errB != nil {
+		fmt.Fprintf(os.Stderr, "build fn: %v\n%s\n", errB, out)
+		os.Exit(1)
+	}
+
+	os.Exit(m.Run())
+}
+
+func findRoot() (string, error) {
+	dir, err := os.Getwd()
+	if err != nil {
+		return "", err
+	}
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir, nil
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return "", fmt.Errorf("could not find go.mod from %s", dir)
+		}
+		dir = parent
+	}
+}
+
+func ensureFnBin(t *testing.T) string {
+	t.Helper()
+	return fnBinPath
+}
+
+// setupTestRegistry creates a minimal registry root with:
+//   - registry.db (opened + migrations applied via registry.Open)
+//   - a project with a vault declared in vault.yaml
+//   - a vault directory with some test files
+//   - a symlink from projects/test_proj/vaults/test_vault -> vault dir
+//
+// Returns (repoRoot, vaultDir).
+func setupTestRegistry(t *testing.T) (string, string) {
+	t.Helper()
+	repoRoot := t.TempDir()
+
+	// Create vault directory with files.
+	vaultDir := filepath.Join(t.TempDir(), "test_vault")
+	if err := os.MkdirAll(filepath.Join(vaultDir, "data", "raw"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(vaultDir, "data", "raw", "report.csv"),
+		[]byte("name,value\nfoo,1"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(vaultDir, "data", "raw", "notes.md"),
+		[]byte("# Notes\nsome text"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create project directory structure.
+	projDir := filepath.Join(repoRoot, "projects", "test_proj")
+	vaultsDir := filepath.Join(projDir, "vaults")
+	if err := os.MkdirAll(vaultsDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create vault.yaml.
+	vaultYAML := "vaults:\n  - name: test_vault\n    description: Test vault for unit tests\n    path: " + vaultDir + "\n    tags: [test]\n"
+	if err := os.WriteFile(filepath.Join(vaultsDir, "vault.yaml"), []byte(vaultYAML), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create project.md.
+	projMD := "---\nname: test_proj\ndescription: Test project\ntags: [test]\n---\n"
+	if err := os.WriteFile(filepath.Join(projDir, "project.md"), []byte(projMD), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Open registry.db (creates schema + runs migrations).
+	db, err := registry.Open(filepath.Join(repoRoot, "registry.db"))
+	if err != nil {
+		t.Fatalf("registry.Open: %v", err)
+	}
+
+	// Index so the vault is registered in registry.db.
+	if _, err := registry.Index(db, repoRoot); err != nil {
+		t.Fatalf("registry.Index: %v", err)
+	}
+	db.Close()
+
+	return repoRoot, vaultDir
+}
+
+// runFn runs the fn binary in repoRoot with the given args.
+func runFn(t *testing.T, repoRoot string, args ...string) (string, string, int) {
+	t.Helper()
+	bin := ensureFnBin(t)
+	cmd := exec.Command(bin, args...)
+	cmd.Dir = repoRoot
+	var stdout, stderr strings.Builder
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err := cmd.Run()
+	code := 0
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			code = exitErr.ExitCode()
+		} else {
+			t.Logf("cmd error: %v", err)
+		}
+	}
+	return stdout.String(), stderr.String(), code
+}
+
+// TestVaultList verifies that 'fn vault list' shows the indexed vault.
+func TestVaultList(t *testing.T) {
+	repoRoot, _ := setupTestRegistry(t)
+	out, stderr, code := runFn(t, repoRoot, "vault", "list")
+	if code != 0 {
+		t.Fatalf("fn vault list exit %d\nstderr: %s", code, stderr)
+	}
+	if !strings.Contains(out, "test_vault") {
+		t.Errorf("expected 'test_vault' in output, got:\n%s", out)
+	}
+}
+
+// TestVaultIndex verifies that 'fn vault index <name>' runs without error.
+func TestVaultIndex(t *testing.T) {
+	repoRoot, _ := setupTestRegistry(t)
+	out, stderr, code := runFn(t, repoRoot, "vault", "index", "test_vault")
+	if code != 0 {
+		t.Fatalf("fn vault index exit %d\nstderr: %s\nstdout: %s", code, stderr, out)
+	}
+	if !strings.Contains(out, "indexed") {
+		t.Errorf("expected 'indexed' in output, got:\n%s", out)
+	}
+}
+
+// TestVaultSearchJSON verifies that 'fn vault search --json' returns valid JSON array.
+func TestVaultSearchJSON(t *testing.T) {
+	repoRoot, vaultDir := setupTestRegistry(t)
+
+	// First index the vault so there is something to search.
+	if _, _, code := runFn(t, repoRoot, "vault", "index", "test_vault"); code != 0 {
+		t.Fatal("fn vault index failed")
+	}
+
+	// Seed some content into the vault index for the search to find.
+	db, err := infra.VaultIndexOpen(vaultDir)
+	if err != nil {
+		t.Fatalf("VaultIndexOpen: %v", err)
+	}
+	// Update content_text for FTS search.
+	db.Exec(`DELETE FROM files_fts WHERE rel_path = 'data/raw/report.csv'`)
+	db.Exec(`INSERT INTO files_fts(rel_path, content_text) VALUES ('data/raw/report.csv', 'foo report data')`)
+	db.Close()
+
+	out, stderr, code := runFn(t, repoRoot, "vault", "search", "report", "--json", "--vault", "test_vault")
+	if code != 0 {
+		t.Fatalf("fn vault search exit %d\nstderr: %s", code, stderr)
+	}
+
+	var result []map[string]interface{}
+	if err := json.Unmarshal([]byte(out), &result); err != nil {
+		t.Fatalf("output is not valid JSON: %v\nraw: %s", err, out)
+	}
+	// Should be a JSON array (possibly empty if search finds nothing, but must be valid).
+	t.Logf("search returned %d hits", len(result))
+}
+
+// TestVaultInfo verifies that 'fn vault info <name>' outputs vault stats.
+func TestVaultInfo(t *testing.T) {
+	repoRoot, _ := setupTestRegistry(t)
+
+	// Index first.
+	if _, _, code := runFn(t, repoRoot, "vault", "index", "test_vault"); code != 0 {
+		t.Fatal("fn vault index failed")
+	}
+
+	out, stderr, code := runFn(t, repoRoot, "vault", "info", "test_vault")
+	if code != 0 {
+		t.Fatalf("fn vault info exit %d\nstderr: %s", code, stderr)
+	}
+	if !strings.Contains(out, "test_vault") {
+		t.Errorf("expected vault name in output, got:\n%s", out)
+	}
+	if !strings.Contains(out, "Files:") {
+		t.Errorf("expected 'Files:' in output, got:\n%s", out)
+	}
+}
+
+// TestFormatBytes verifies the formatBytes helper.
+func TestFormatBytes(t *testing.T) {
+	cases := []struct {
+		input    int64
+		expected string
+	}{
+		{500, "500 B"},
+		{1024, "1.0 KB"},
+		{1536, "1.5 KB"},
+		{1048576, "1.0 MB"},
+		{1073741824, "1.0 GB"},
+	}
+	for _, tc := range cases {
+		got := formatBytes(tc.input)
+		if got != tc.expected {
+			t.Errorf("formatBytes(%d) = %q, want %q", tc.input, got, tc.expected)
+		}
+	}
+}
+
+// TestVaultLayoutEnsure verifies that 'fn vault layout-ensure --dry-run' works.
+func TestVaultLayoutEnsure(t *testing.T) {
+	repoRoot, _ := setupTestRegistry(t)
+	out, stderr, code := runFn(t, repoRoot, "vault", "layout-ensure", "test_vault", "--dry-run")
+	if code != 0 {
+		t.Fatalf("fn vault layout-ensure exit %d\nstderr: %s\nstdout: %s", code, stderr, out)
+	}
+	if !strings.Contains(out, "test_vault") {
+		t.Errorf("expected vault name in output, got:\n%s", out)
+	}
+}
+
+// TestVaultAggregate verifies that 'fn vault aggregate' runs without error on a clean registry.
+func TestVaultAggregate(t *testing.T) {
+	repoRoot, _ := setupTestRegistry(t)
+
+	// Index first so there is something to aggregate.
+	if _, _, code := runFn(t, repoRoot, "vault", "index", "test_vault"); code != 0 {
+		t.Fatal("fn vault index failed")
+	}
+
+	_, stderr, code := runFn(t, repoRoot, "vault", "aggregate")
+	if code != 0 {
+		t.Fatalf("fn vault aggregate exit %d\nstderr: %s", code, stderr)
+	}
+}
+
+// TestVaultDoctor verifies that 'fn vault doctor' runs and reports on vaults.
+func TestVaultDoctor(t *testing.T) {
+	repoRoot, _ := setupTestRegistry(t)
+	out, stderr, code := runFn(t, repoRoot, "vault", "doctor")
+	if code != 0 {
+		t.Fatalf("fn vault doctor exit %d\nstderr: %s", code, stderr)
+	}
+	if !strings.Contains(out, "test_vault") {
+		t.Errorf("expected 'test_vault' in doctor output, got:\n%s", out)
+	}
+}
+
+// TestVaultDedupe verifies that 'fn vault dedupe' runs without error after indexing.
+func TestVaultDedupe(t *testing.T) {
+	repoRoot, _ := setupTestRegistry(t)
+
+	if _, _, code := runFn(t, repoRoot, "vault", "index", "test_vault"); code != 0 {
+		t.Fatal("fn vault index failed")
+	}
+
+	out, stderr, code := runFn(t, repoRoot, "vault", "dedupe", "test_vault")
+	if code != 0 {
+		t.Fatalf("fn vault dedupe exit %d\nstderr: %s", code, stderr)
+	}
+	// Should say "No duplicates" or show a table — either is fine.
+	_ = out
+}
+
+// TestVaultAuditDryRun verifies that 'fn vault audit --dry-run-layout --skip-profilers' works.
+func TestVaultAuditDryRun(t *testing.T) {
+	repoRoot, _ := setupTestRegistry(t)
+	out, stderr, code := runFn(t, repoRoot, "vault", "audit", "test_vault",
+		"--dry-run-layout", "--skip-profilers")
+	// Exit 0 = fully ok; exit 4 = warnings (layout issues) — both acceptable here.
+	if code != 0 && code != 4 {
+		t.Fatalf("fn vault audit exit %d\nstderr: %s\nstdout: %s", code, stderr, out)
+	}
+	if !strings.Contains(out, "summary") {
+		t.Errorf("expected 'summary' section in audit output, got:\n%s", out)
+	}
+}
+
+// Suppress unused import for time.
+var _ = time.Now
@@ -44,6 +44,10 @@ func cmdDoctor(args []string) {
 		doctorUnused(r, jsonOut)
 	case "cpp-apps":
 		doctorCppApps(r, jsonOut)
+	case "ml":
+		doctorML(r, jsonOut)
+	case "vaults":
+		doctorVaults(r, jsonOut)
 	default:
 		fmt.Fprintf(os.Stderr, "unknown doctor subcommand: %s\n", sub)
 		doctorUsage()
@@ -65,6 +69,8 @@ Subcommands:
  uses-functions  Audit imports reales vs uses_functions del app.md
  unused          Funciones del registry sin consumidores
  cpp-apps        Conformidad de apps C++ con cpp/PATTERNS.md (cfg.about, dockspace, menubar)
+  ml              Entorno ML: GPUs NVIDIA, CUDA toolkit, venv Python, paquetes torch/diffusers, CLIs y vault
+  vaults          Salud de vaults: directorio, layout, índice, staleness, drift

 Flags:
  --json          Salida JSON (para scripting/agentes)`)
@@ -103,6 +109,16 @@ func doctorAll(root string, jsonOut bool) {
 		} else {
 			all["cpp_apps_error"] = err.Error()
 		}
+		if v, err := infra.AuditMlEnv(root); err == nil {
+			all["ml"] = v
+		} else {
+			all["ml_error"] = err.Error()
+		}
+		if v, err := infra.VaultDoctor(root); err == nil {
+			all["vaults"] = v
+		} else {
+			all["vaults_error"] = err.Error()
+		}
 		emit(all)
 		return
 	}
@@ -119,6 +135,10 @@ func doctorAll(root string, jsonOut bool) {
 	doctorUnused(root, false)
 	fmt.Println("\n=== C++ apps standard conformance ===")
 	doctorCppApps(root, false)
+	fmt.Println("\n=== ML environment ===")
+	doctorML(root, false)
+	fmt.Println("\n=== Vaults ===")
+	doctorVaults(root, false)
 }

 func doctorCppApps(root string, jsonOut bool) {
@@ -280,6 +300,81 @@ func doctorUnused(root string, jsonOut bool) {
 	fmt.Printf("\n%d unused functions (candidates to remove).\n", len(unused))
 }

+func doctorVaults(root string, jsonOut bool) {
+	entries, err := infra.VaultDoctor(root)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(1)
+	}
+	if jsonOut {
+		emit(entries)
+		return
+	}
+	if len(entries) == 0 {
+		fmt.Println("No vaults declared (no projects/*/vaults/vault.yaml found).")
+		return
+	}
+	w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
+	fmt.Fprintln(w, "NAME\tSTATUS\tFILES\tINDEXED\tISSUES")
+	ok := 0
+	for _, e := range entries {
+		issues := "-"
+		if len(e.Issues) > 0 {
+			issues = strings.Join(e.Issues, "; ")
+		}
+		fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%s\n",
+			e.VaultName, e.Status, e.DiskFiles, e.IndexedFiles, issues)
+		if e.Status == "ok" {
+			ok++
+		}
+	}
+	w.Flush()
+	fmt.Printf("\n%d/%d vaults healthy.\n", ok, len(entries))
+}
+
+func doctorML(root string, jsonOut bool) {
+	report, err := infra.AuditMlEnv(root)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(1)
+	}
+	if jsonOut {
+		emit(report)
+		return
+	}
+
+	fmt.Printf("GPUs detected: %d\n", len(report.Gpus))
+	for _, g := range report.Gpus {
+		fmt.Printf("  [%d] %s  VRAM: %d/%d MiB  Driver: %s  CUDA: %s\n",
+			g.Index, g.Name, g.VramFreeMb, g.VramTotalMb, g.DriverVersion, g.CudaVersion)
+	}
+	fmt.Println()
+
+	w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
+	fmt.Fprintln(w, "CHECK\tSTATUS\tVERSION\tDETAIL")
+	for _, c := range report.Checks {
+		version := c.Version
+		if version == "" {
+			version = "-"
+		}
+		detail := c.Detail
+		if len(detail) > 60 {
+			detail = detail[:60] + "..."
+		}
+		if detail == "" {
+			detail = "-"
+		}
+		fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", c.Name, c.Status, version, detail)
+	}
+	w.Flush()
+
+	overall := "OK"
+	if !report.OverallOK {
+		overall = "INCOMPLETE"
+	}
+	fmt.Printf("\nOverall ML environment: %s\n", overall)
+}
+
 func emit(v any) {
 	b, err := json.MarshalIndent(v, "", "  ")
 	if err != nil {
@@ -45,6 +45,8 @@ func main() {
 		cmdAnalysis(os.Args[2:])
 	case "sync":
 		cmdSync(os.Args[2:])
+	case "vault":
+		cmdVault(os.Args[2:])
 	case "doctor":
 		cmdDoctor(os.Args[2:])
 	case "help", "-h", "--help":
@@ -73,6 +75,7 @@ Usage:
  fn app     <list|clone|pull>             Gestiona apps externas (Gitea)
  fn analysis <list|clone|pull>            Gestiona analyses externas (Gitea)
  fn sync    [status|locations]            Sincroniza con servidor central
+  fn vault   <list|search|index|info>      Gestiona y busca en data vaults
  fn doctor  [artefacts|services|sync|uses-functions|unused] [--json]
                                           Diagnostico read-only del registry`)
 }
@@ -3,8 +3,10 @@ add_imgui_app(tables_playground
    main.cpp
    data_table.cpp
    data_table_logic.cpp
+    llm_anthropic.cpp
    lua_engine.cpp
    tql.cpp
+    tql_to_sql.cpp
    viz.cpp
 )
 target_link_libraries(tables_playground PRIVATE lua54 implot)
@@ -13,10 +15,13 @@ target_link_libraries(tables_playground PRIVATE lua54 implot)
 add_executable(tables_playground_self_test
    self_test.cpp
    data_table_logic.cpp
+    llm_anthropic.cpp
    lua_engine.cpp
    tql.cpp
+    tql_to_sql.cpp
 )
 target_include_directories(tables_playground_self_test PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_SOURCE_DIR}/functions
 )
 target_link_libraries(tables_playground_self_test PRIVATE lua54)
@@ -1,20 +1,33 @@
 #include "data_table.h"
 #include "app_base.h"
 #include "imgui.h"
+#include "llm_anthropic.h"
 #include "lua_engine.h"
 #include "tql.h"
+#include "tql_to_sql.h"
 #include "viz.h"

 #include <algorithm>
 #include <cfloat>
 #include <cstdio>
 #include <cstring>
+#include <ctime>
 #include <fstream>
 #include <string>
 #include <unordered_map>

 namespace data_table {

+// UTC date today as ISO YYYY-MM-DD. Para preset filtros Last7/30/90d.
+static std::string today_iso() {
+    std::time_t t = std::time(nullptr);
+    std::tm tm = *std::gmtime(&t);
+    char buf[16];
+    std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d",
+                  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+    return buf;
+}
+
 namespace {

 // ---------------------------------------------------------------------------
@@ -122,10 +135,106 @@ struct UiState {

    // Toggle Table <-> View: remember last non-table display.
    ViewMode last_non_table_main = ViewMode::Bar;
+
+    // Drill history (fase 10). Stacks per-app; no persistido en TQL.
+    std::vector<DrillStep> drill_back;
+    std::vector<DrillStep> drill_forward;
+
+    // Row inspector (fase 10). -1 cerrado, sino row idx en el output del stage activo.
+    int  inspect_row     = -1;
+    bool inspect_open    = false;
+
+    // Ask AI modal (fase 11 — issue 0080).
+    bool         ask_open       = false;
+    bool         ask_busy       = false;
+    int          ask_mode       = 0;     // 0 = TQL, 1 = SQL
+    char         ask_question[2048] = {0};
+    std::string  ask_current_tql;        // emit del state actual al abrir modal
+    std::string  ask_response_raw;       // texto del modelo
+    std::string  ask_response_code;      // bloque extraido (Lua o SQL)
+    std::string  ask_error;
+    std::string  ask_status;             // "Sent. Waiting..." / "OK" / error
+    char         ask_edit_buf[8192] = {0}; // buffer editable de propuesta
 };

 UiState& ui() { static UiState s; return s; }

+// Row inspector modal (fase 10). Muestra todas cols + valores de la fila
+// inspect_row del output del stage activo. Read-only + Copy TSV + Filter
+// by this row (anade filters al stage previo si existe).
+static void draw_row_inspector_modal(State& st, int active,
+                                      const char* const* cells, int rows, int cols,
+                                      const std::vector<std::string>& headers,
+                                      const std::vector<ColumnType>& types,
+                                      const std::vector<std::string>& prev_input_headers) {
+    auto& U = ui();
+    if (!U.inspect_open) return;
+    if (U.inspect_row < 0 || U.inspect_row >= rows) {
+        U.inspect_open = false;
+        return;
+    }
+    ImGui::OpenPopup("##row_inspector");
+    ImGui::SetNextWindowSize(ImVec2(560, 400), ImGuiCond_Appearing);
+    if (ImGui::BeginPopupModal("##row_inspector", &U.inspect_open,
+                                ImGuiWindowFlags_NoSavedSettings)) {
+        ImGui::Text("Row %d", U.inspect_row);
+        ImGui::SameLine(0, 20);
+        if (ImGui::SmallButton("Copy TSV")) {
+            std::string tsv = row_to_tsv(cells, rows, cols, U.inspect_row, headers);
+            ImGui::SetClipboardText(tsv.c_str());
+        }
+        ImGui::SameLine();
+        bool can_filter = (active > 0 && !prev_input_headers.empty());
+        ImGui::BeginDisabled(!can_filter);
+        if (ImGui::SmallButton("Filter prev stage by this row")) {
+            int target = active - 1;
+            for (int c = 0; c < cols; ++c) {
+                const char* v = cells[U.inspect_row * cols + c];
+                if (!v || !*v) continue;
+                const std::string& h = headers[c];
+                std::string h_clean;
+                parse_breakout_granularity(h, h_clean);
+                int ci = -1;
+                for (size_t i = 0; i < prev_input_headers.size(); ++i) {
+                    if (prev_input_headers[i] == h_clean) { ci = (int)i; break; }
+                }
+                if (ci < 0) continue;
+                DrillStep step;
+                step.target_stage      = target;
+                step.filter_pos        = (int)st.stages[target].filters.size();
+                step.prev_active_stage = st.active_stage;
+                step.added             = make_drill_filter(ci, v);
+                if (apply_drill_step(st, step)) {
+                    U.drill_back.push_back(step);
+                }
+            }
+            U.drill_forward.clear();
+            U.inspect_open = false;
+        }
+        ImGui::EndDisabled();
+        ImGui::Separator();
+        ImGuiTableFlags flags = ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg
+                              | ImGuiTableFlags_ScrollY | ImGuiTableFlags_Resizable;
+        if (ImGui::BeginTable("##inspector_tbl", 2, flags, ImVec2(-1, -1))) {
+            ImGui::TableSetupColumn("col");
+            ImGui::TableSetupColumn("value");
+            ImGui::TableHeadersRow();
+            for (int c = 0; c < cols; ++c) {
+                ImGui::TableNextRow();
+                ImGui::TableSetColumnIndex(0);
+                ColumnType t = (c < (int)types.size()) ? types[c] : ColumnType::String;
+                ImGui::Text("%s %s", column_type_icon(t),
+                            (c < (int)headers.size()) ? headers[c].c_str() : "?");
+                ImGui::TableSetColumnIndex(1);
+                const char* v = cells[U.inspect_row * cols + c];
+                ImGui::TextWrapped("%s", v ? v : "");
+            }
+            ImGui::EndTable();
+        }
+        ImGui::EndPopup();
+    }
+}
+
 int autocomplete_cb(ImGuiInputTextCallbackData* data) {
    UiState* U = (UiState*)data->UserData;
    if (data->EventFlag == ImGuiInputTextFlags_CallbackAlways) {
@@ -180,6 +289,47 @@ void ensure_init(State& st, int eff_cols) {
 // ---------------------------------------------------------------------------
 void draw_stage_breadcrumb(State& st) {
    st.ensure_stage0();
+
+    // Drill history back/forward (fase 10). Botones al inicio.
+    auto& U = ui();
+    {
+        bool can_back = !U.drill_back.empty();
+        ImGui::BeginDisabled(!can_back);
+        if (ImGui::SmallButton("<##drill_back")) {
+            DrillStep s = U.drill_back.back();
+            U.drill_back.pop_back();
+            if (undo_drill_step(st, s)) {
+                U.drill_forward.push_back(s);
+            }
+        }
+        ImGui::EndDisabled();
+        if (can_back && ImGui::IsItemHovered())
+            ImGui::SetTooltip("Drill back (%zu)", U.drill_back.size());
+        ImGui::SameLine();
+        bool can_fwd = !U.drill_forward.empty();
+        ImGui::BeginDisabled(!can_fwd);
+        if (ImGui::SmallButton(">##drill_fwd")) {
+            DrillStep s = U.drill_forward.back();
+            U.drill_forward.pop_back();
+            if (apply_drill_step(st, s)) {
+                U.drill_back.push_back(s);
+            }
+        }
+        ImGui::EndDisabled();
+        if (can_fwd && ImGui::IsItemHovered())
+            ImGui::SetTooltip("Drill forward (%zu)", U.drill_forward.size());
+        ImGui::SameLine();
+        bool can_up = (st.active_stage > 0);
+        ImGui::BeginDisabled(!can_up);
+        if (ImGui::SmallButton("^##drill_up")) drill_up(st);
+        ImGui::EndDisabled();
+        if (can_up && ImGui::IsItemHovered())
+            ImGui::SetTooltip("Drill up (stage previo, sin perder filters)");
+        ImGui::SameLine();
+        ImGui::TextDisabled("|");
+        ImGui::SameLine();
+    }
+
    for (int si = 0; si < (int)st.stages.size(); ++si) {
        if (si > 0) { ImGui::SameLine(); ImGui::TextDisabled(">"); ImGui::SameLine(); }

@@ -610,6 +760,19 @@ void draw_viz_selector(State& st) {
        ImGui::OpenPopup("##viz_cfg_popup");
    }
    ImGui::SameLine();
+    if (ImGui::SmallButton("Ask AI##ask_open")) {
+        auto& U2 = ui();
+        U2.ask_open = true;
+        U2.ask_busy = false;
+        U2.ask_error.clear();
+        U2.ask_status.clear();
+        U2.ask_response_code.clear();
+        U2.ask_response_raw.clear();
+        U2.ask_current_tql = tql::emit(st,
+            std::vector<std::string>(),  // emit headers stage 0 (caller fill si necesario)
+            std::vector<ColumnType>());
+    }
+    ImGui::SameLine();
    if (ImGui::SmallButton("+ Viz##viz_add")) {
        VizPanel p;
        p.display = ViewMode::Bar;
@@ -737,7 +900,8 @@ void draw_joins_chips(State& st, const std::vector<TableInput>& joinables,
 // Filter chips para el stage activo. eff_headers/eff_cols son del INPUT del
 // stage activo (= orig+derived para stage 0; output del stage previo para 1+).
 // ---------------------------------------------------------------------------
-void draw_filter_chips(Stage& stg, const char* const* eff_headers, int eff_cols) {
+void draw_filter_chips(Stage& stg, const char* const* eff_headers, int eff_cols,
+                        const std::vector<ColumnType>& eff_types) {
    auto& U = ui();
    ImGui::PushStyleColor(ImGuiCol_Button,        IM_COL32(120,  60, 170, 220));
    ImGui::PushStyleColor(ImGuiCol_ButtonHovered, IM_COL32(150,  85, 200, 240));
@@ -746,6 +910,50 @@ void draw_filter_chips(Stage& stg, const char* const* eff_headers, int eff_cols)
    ImGui::PopStyleColor(3);
    ImGui::SameLine();

+    // Presets (fase 10): menu con Last7/30/90d (cols Date), ExcludeNulls (any),
+    // NonZero (cols numericas). Apply append a stg.filters via build_preset_filters.
+    if (ImGui::SmallButton("Presets##fpresets")) ImGui::OpenPopup("##presets_menu");
+    if (ImGui::BeginPopup("##presets_menu")) {
+        int first_date = -1, first_num = -1;
+        for (int c = 0; c < eff_cols && c < (int)eff_types.size(); ++c) {
+            if (first_date < 0 && eff_types[c] == ColumnType::Date) first_date = c;
+            if (first_num  < 0 && (eff_types[c] == ColumnType::Int ||
+                                    eff_types[c] == ColumnType::Float)) first_num = c;
+        }
+        auto apply_preset = [&](FilterPreset p, int col) {
+            auto fs = build_preset_filters(p, col, today_iso());
+            for (auto& f : fs) stg.filters.push_back(f);
+        };
+        if (first_date >= 0) {
+            char l1[96], l2[96], l3[96];
+            std::snprintf(l1, sizeof(l1), "Last 7 days on \"%s\"",  eff_headers[first_date]);
+            std::snprintf(l2, sizeof(l2), "Last 30 days on \"%s\"", eff_headers[first_date]);
+            std::snprintf(l3, sizeof(l3), "Last 90 days on \"%s\"", eff_headers[first_date]);
+            if (ImGui::MenuItem(l1)) apply_preset(FilterPreset::Last7d,  first_date);
+            if (ImGui::MenuItem(l2)) apply_preset(FilterPreset::Last30d, first_date);
+            if (ImGui::MenuItem(l3)) apply_preset(FilterPreset::Last90d, first_date);
+            ImGui::Separator();
+        }
+        if (ImGui::BeginMenu("Exclude nulls in...")) {
+            for (int c = 0; c < eff_cols; ++c) {
+                if (ImGui::MenuItem(eff_headers[c])) apply_preset(FilterPreset::ExcludeNulls, c);
+            }
+            ImGui::EndMenu();
+        }
+        if (first_num >= 0) {
+            if (ImGui::BeginMenu("Non-zero in...")) {
+                for (int c = 0; c < eff_cols && c < (int)eff_types.size(); ++c) {
+                    if (eff_types[c] == ColumnType::Int || eff_types[c] == ColumnType::Float) {
+                        if (ImGui::MenuItem(eff_headers[c])) apply_preset(FilterPreset::NonZero, c);
+                    }
+                }
+                ImGui::EndMenu();
+            }
+        }
+        ImGui::EndPopup();
+    }
+    ImGui::SameLine();
+
    if (stg.filters.empty()) {
        ImGui::TextDisabled("Sin filtros.");
        return;
@@ -778,7 +986,8 @@ void draw_filter_chips(Stage& stg, const char* const* eff_headers, int eff_cols)
 }

 // Chips de breakout (stage > 0).
-void draw_breakout_chips(Stage& stg, const char* const* in_headers, int in_cols) {
+void draw_breakout_chips(Stage& stg, const char* const* in_headers, int in_cols,
+                          const std::vector<ColumnType>& in_types) {
    auto& U = ui();
    ImGui::PushStyleColor(ImGuiCol_Button,        IM_COL32( 60, 160, 170, 220));
    ImGui::PushStyleColor(ImGuiCol_ButtonHovered, IM_COL32( 80, 190, 200, 240));
@@ -792,6 +1001,17 @@ void draw_breakout_chips(Stage& stg, const char* const* in_headers, int in_cols)
        return;
    }
    for (size_t i = 0; i < stg.breakouts.size(); ) {
+        std::string col_name;
+        DateGranularity g = parse_breakout_granularity(stg.breakouts[i], col_name);
+
+        // Resolve col index para lookup de tipo.
+        int col_idx = -1;
+        for (int c = 0; c < in_cols; ++c) {
+            if (std::strcmp(in_headers[c], col_name.c_str()) == 0) { col_idx = c; break; }
+        }
+        bool is_date_col = (col_idx >= 0 && col_idx < (int)in_types.size()
+                             && in_types[col_idx] == ColumnType::Date);
+
        char buf[256];
        std::snprintf(buf, sizeof(buf), "%s  x##bk%zu", stg.breakouts[i].c_str(), i);
        ImGui::PushStyleColor(ImGuiCol_Button,        IM_COL32( 60, 160, 170, 220));
@@ -802,20 +1022,42 @@ void draw_breakout_chips(Stage& stg, const char* const* in_headers, int in_cols)
        if (ImGui::IsItemClicked(ImGuiMouseButton_Right)) {
            U.edit_chip_kind = 2;
            U.edit_chip_idx  = (int)i;
-            // resolve current col name to index in in_headers
-            U.edit_col_idx = 0;
-            for (int c = 0; c < in_cols; ++c) {
-                if (std::strcmp(in_headers[c], stg.breakouts[i].c_str()) == 0) {
-                    U.edit_col_idx = c; break;
-                }
-            }
+            U.edit_col_idx = (col_idx >= 0) ? col_idx : 0;
            ImGui::OpenPopup("##edit_breakout");
        }
        if (clicked) { stg.breakouts.erase(stg.breakouts.begin() + i); continue; }
+
+        // Granularity combo inline cuando col Date (fase 10).
+        if (is_date_col) {
+            ImGui::SameLine();
+            const char* preview = (g == DateGranularity::None)
+                                  ? "(raw)" : date_granularity_token(g);
+            char combo_id[32];
+            std::snprintf(combo_id, sizeof(combo_id), "##gran%zu", i);
+            ImGui::SetNextItemWidth(72);
+            if (ImGui::BeginCombo(combo_id, preview)) {
+                DateGranularity opts[] = {
+                    DateGranularity::None,
+                    DateGranularity::Year,
+                    DateGranularity::Month,
+                    DateGranularity::Week,
+                    DateGranularity::Day,
+                    DateGranularity::Hour,
+                };
+                for (auto o : opts) {
+                    const char* lbl = (o == DateGranularity::None)
+                                      ? "(raw)" : date_granularity_token(o);
+                    if (ImGui::Selectable(lbl, o == g)) {
+                        stg.breakouts[i] = compose_breakout(col_name, o);
+                    }
+                }
+                ImGui::EndCombo();
+            }
+        }
+
        ImGui::SameLine();
        ++i;
    }
-    (void)in_headers; (void)in_cols;
    ImGui::NewLine();
 }

@@ -1220,7 +1462,8 @@ void draw_add_filter_popup(Stage& stg, const char* const* eff_headers_arr, int e
 }

 void draw_add_breakout_popup(Stage& stg, const char* const* in_headers, int in_cols,
-                              const std::vector<ColumnType>& in_types) {
+                              const std::vector<ColumnType>& in_types,
+                              const char* const* in_cells, int in_rows) {
    auto& U = ui();
    if (!ImGui::BeginPopup("##addbreakout")) return;
    if (U.brk_picker_col < 0 || U.brk_picker_col >= in_cols) U.brk_picker_col = 0;
@@ -1236,7 +1479,18 @@ void draw_add_breakout_popup(Stage& stg, const char* const* in_headers, int in_c
        ImGui::EndCombo();
    }
    if (ImGui::Button("Add##bk")) {
-        stg.breakouts.emplace_back(in_headers[U.brk_picker_col]);
+        int c = U.brk_picker_col;
+        std::string col = in_headers[c];
+        // Fase 10: si col es Date, auto-detect granularidad via rango lexical
+        // (ISO YYYY-MM-DD ordena bien). Default Day si rango invalido.
+        if (c >= 0 && c < (int)in_types.size() && in_types[c] == ColumnType::Date) {
+            std::string lo, hi;
+            column_min_max(in_cells, in_rows, in_cols, c, lo, hi);
+            DateGranularity g = auto_date_granularity(lo, hi);
+            stg.breakouts.emplace_back(compose_breakout(col, g));
+        } else {
+            stg.breakouts.emplace_back(col);
+        }
        ImGui::CloseCurrentPopup();
    }
    ImGui::EndPopup();
@@ -1441,8 +1695,17 @@ void drill_into(State& st, int from_stage,
        if (prev_input_headers[i] == col_name) { ci = (int)i; break; }
    }
    if (ci < 0) return;
-    st.stages[target].filters.push_back(make_drill_filter(ci, value));
-    st.active_stage = target;
+
+    // Fase 10: graba step en drill_back, limpia forward (rama nueva).
+    DrillStep step;
+    step.target_stage      = target;
+    step.filter_pos        = (int)st.stages[target].filters.size();
+    step.prev_active_stage = st.active_stage;
+    step.added             = make_drill_filter(ci, value);
+    apply_drill_step(st, step);
+    auto& U = ui();
+    U.drill_back.push_back(step);
+    U.drill_forward.clear();
 }

 } // anon namespace
@@ -1659,7 +1922,7 @@ void render(const char* id,
            draw_joins_chips(st, *joinables, mh);
        }

-        draw_filter_chips(act, eff_headers.data(), eff_cols);
+        draw_filter_chips(act, eff_headers.data(), eff_cols, eff_types);
        draw_add_filter_popup(act, eff_headers.data(), eff_cols, eff_types);
        draw_edit_filter_popup(act, eff_headers.data(), eff_cols, eff_types);

@@ -2290,12 +2553,13 @@ void render(const char* id,

        if (chrome_visible) {
        ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(8, 2));
-        draw_filter_chips(act, ih_ptrs.data(), in_cols_n);
+        draw_filter_chips(act, ih_ptrs.data(), in_cols_n, input_types_active);
        draw_add_filter_popup(act, ih_ptrs.data(), in_cols_n, input_types_active);
        draw_edit_filter_popup(act, ih_ptrs.data(), in_cols_n, input_types_active);

-        draw_breakout_chips(act, ih_ptrs.data(), in_cols_n);
-        draw_add_breakout_popup(act, ih_ptrs.data(), in_cols_n, input_types_active);
+        draw_breakout_chips(act, ih_ptrs.data(), in_cols_n, input_types_active);
+        draw_add_breakout_popup(act, ih_ptrs.data(), in_cols_n, input_types_active,
+                                 cur_cells, cur_rows);
        draw_edit_breakout_popup(act, ih_ptrs.data(), in_cols_n);

        draw_aggregation_chips(act, ih_ptrs.data(), in_cols_n);
@@ -2524,7 +2788,22 @@ void render(const char* id,
                    so_local.cells.push_back(cur_cells[i]);
                so_ptr = &so_local;
            }
-            viz::render(*so_ptr, st.display, st.viz_config, ImVec2(-1, -1));
+            int clicked_row = -1;
+            viz::render(*so_ptr, st.display, st.viz_config, ImVec2(-1, -1), &clicked_row);
+            // Fase 10: click sobre chart -> drill al stage previo usando
+            // breakout col[0] como filtro Op::Eq sobre cells[clicked_row].
+            if (clicked_row >= 0 && active > 0 &&
+                so_ptr->cols > 0 && clicked_row < so_ptr->rows) {
+                int n_brk = (int)st.stages[active].breakouts.size();
+                if (n_brk > 0) {
+                    const char* v = so_ptr->cells[clicked_row * so_ptr->cols + 0];
+                    std::string col_clean;
+                    parse_breakout_granularity(so_ptr->headers[0], col_clean);
+                    drill_into(st, active, col_clean,
+                                v ? std::string(v) : "",
+                                input_headers_active);
+                }
+            }
            goto stage_n_table_end;
        }

@@ -2613,13 +2892,11 @@ void render(const char* id,
                    ImGui::PushID(r * cur_cols_n + c);
                    ImGui::Selectable(cell ? cell : "");
                    if (ImGui::IsItemHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Right)) {
-                        // Drill-down solo si c es col de breakout (c < n_brk).
-                        if (c < n_brk) {
                        U.pending_col   = c;
                        U.pending_value = cell ? cell : "";
+                        U.inspect_row   = r;
                        ImGui::OpenPopup("##drill_popup");
                    }
-                    }
                    if (ImGui::BeginPopup("##drill_popup")) {
                        if (c < n_brk) {
                            char lbl[256];
@@ -2631,6 +2908,12 @@ void render(const char* id,
                                           input_headers_active);
                                ImGui::CloseCurrentPopup();
                            }
+                            ImGui::Separator();
+                        }
+                        if (ImGui::MenuItem("Inspect row...")) {
+                            U.inspect_row  = r;
+                            U.inspect_open = true;
+                            ImGui::CloseCurrentPopup();
                        }
                        ImGui::EndPopup();
                    }
@@ -2642,6 +2925,11 @@ void render(const char* id,
        }
        stage_n_table_end:;

+        // Row inspector modal (fase 10). Activado via right-click "Inspect row..."
+        // sobre celdas del table del stage activo. `cur_cells` ya es row-major.
+        draw_row_inspector_modal(st, active, cur_cells, cur_rows, cur_cols_n,
+                                  cur_headers, cur_types, input_headers_active);
+
        // Render extras (stage>0 path)
        if (!st.extra_panels.empty() && cur_cols_n > 0) {
            StageOutput so_local;
@@ -2958,6 +3246,118 @@ void render(const char* id,
        ImGui::EndPopup();
    }

+    // Ask AI modal (fase 11 — issue 0080).
+    if (U.ask_open) ImGui::OpenPopup("Ask AI");
+    ImGui::SetNextWindowSize(ImVec2(820, 560), ImGuiCond_Appearing);
+    if (ImGui::BeginPopupModal("Ask AI", &U.ask_open,
+                                ImGuiWindowFlags_NoSavedSettings)) {
+        ImGui::TextDisabled("Ask en lenguaje natural. Default TQL. SQL solo si DuckDB linkado.");
+        const char* modes[] = {"TQL", "SQL (DuckDB)"};
+#ifndef FN_TQL_DUCKDB
+        // SQL mode disabled visually pero el toggle existe (informativo)
+        if (U.ask_mode == 1) U.ask_mode = 0;
+#endif
+        ImGui::Combo("Output##askmode", &U.ask_mode, modes, IM_ARRAYSIZE(modes));
+#ifndef FN_TQL_DUCKDB
+        if (U.ask_mode == 1) {
+            ImGui::TextColored(ImVec4(1, 0.5f, 0.3f, 1),
+                "SQL mode requires FN_TQL_DUCKDB=1 build flag.");
+        }
+#endif
+        ImGui::InputTextMultiline("##ask_q", U.ask_question, sizeof(U.ask_question),
+                                   ImVec2(-1, 80));
+        ImGui::BeginDisabled(U.ask_busy);
+        if (ImGui::Button("Send")) {
+            U.ask_busy = true;
+            U.ask_status = "Sending...";
+            U.ask_error.clear();
+            U.ask_response_code.clear();
+            U.ask_response_raw.clear();
+
+            // Build AskInput desde el state actual.
+            llm_anthropic::AskInput in;
+            in.question = U.ask_question;
+            in.tql_current = U.ask_current_tql;
+            in.col_names = U.active_headers;
+            in.col_types = U.active_types;
+            in.mode = (U.ask_mode == 1)
+                ? llm_anthropic::OutputMode::SQL
+                : llm_anthropic::OutputMode::TQL;
+
+            // Llamada blocking (UI freeze breve durante red).
+            auto r = llm_anthropic::ask(in);
+            U.ask_busy = false;
+            if (!r.error.empty()) {
+                U.ask_error = r.error;
+                U.ask_status = "Error";
+            } else {
+                U.ask_response_raw = r.raw;
+                U.ask_response_code = r.code;
+                U.ask_status = "Got response.";
+                // Llenar edit buffer
+                std::snprintf(U.ask_edit_buf, sizeof(U.ask_edit_buf),
+                              "%s", r.code.c_str());
+            }
+        }
+        ImGui::EndDisabled();
+        ImGui::SameLine();
+        if (!U.ask_status.empty()) {
+            ImGui::TextDisabled("%s", U.ask_status.c_str());
+        }
+        if (!U.ask_error.empty()) {
+            ImGui::TextColored(ImVec4(1, 0.4f, 0.4f, 1), "%s", U.ask_error.c_str());
+        }
+        ImGui::Separator();
+        ImGui::Columns(2, "ask_cols", true);
+        ImGui::TextUnformatted("Current");
+        ImGui::InputTextMultiline("##ask_cur",
+            const_cast<char*>(U.ask_current_tql.c_str()),
+            U.ask_current_tql.size() + 1,
+            ImVec2(-1, 240),
+            ImGuiInputTextFlags_ReadOnly);
+        ImGui::NextColumn();
+        ImGui::TextUnformatted("Proposed (editable before apply)");
+        ImGui::InputTextMultiline("##ask_new", U.ask_edit_buf, sizeof(U.ask_edit_buf),
+                                   ImVec2(-1, 240));
+        ImGui::Columns(1);
+
+        bool can_apply = !U.ask_busy && U.ask_edit_buf[0] != '\0';
+        ImGui::BeginDisabled(!can_apply);
+        if (ImGui::Button("Apply")) {
+            std::string err;
+            if (U.ask_mode == 0) {
+                // TQL apply
+                bool ok = tql::apply(U.ask_edit_buf, st,
+                                      U.active_headers,
+                                      U.active_types,
+                                      nullptr, 0,
+                                      (int)U.active_headers.size(),
+                                      &err);
+                if (ok) {
+                    U.ask_status = "Applied OK.";
+                    U.ask_open = false;
+                } else {
+                    U.ask_error = "tql::apply error: " + err;
+                    U.ask_status = "Apply failed.";
+                }
+            } else {
+                // SQL apply: requires DuckDB adapter (no v1).
+                U.ask_status = "SQL execute requires FN_TQL_DUCKDB build flag.";
+            }
+        }
+        ImGui::EndDisabled();
+        ImGui::SameLine();
+        if (ImGui::Button("Reject")) {
+            U.ask_response_code.clear();
+            U.ask_edit_buf[0] = '\0';
+        }
+        ImGui::SameLine();
+        if (ImGui::Button("Close")) {
+            U.ask_open = false;
+        }
+        ImGui::EndPopup();
+    }
+
    if (U.open_cell_popup) { ImGui::OpenPopup("##cell_op"); U.open_cell_popup = false; }
    if (ImGui::BeginPopup("##cell_op")) {
        ColumnType t = (U.pending_col >= 0 && U.pending_col < eff_cols)
@@ -567,6 +567,69 @@ Filter make_drill_filter(int col_idx, const std::string& value) {
    return f;
 }

+bool apply_drill_step(State& st, const DrillStep& step) {
+    if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false;
+    Stage& s = st.stages[step.target_stage];
+    int pos = step.filter_pos;
+    if (pos < 0 || pos > (int)s.filters.size()) return false;
+    s.filters.insert(s.filters.begin() + pos, step.added);
+    st.active_stage = step.target_stage;
+    return true;
+}
+
+bool drill_up(State& st) {
+    if (st.stages.empty()) return false;
+    if (st.active_stage <= 0) return false;
+    st.active_stage -= 1;
+    return true;
+}
+
+std::string row_to_tsv(const char* const* cells, int rows, int cols,
+                        int row_idx, const std::vector<std::string>& headers) {
+    if (row_idx < 0 || row_idx >= rows || cols <= 0) return "";
+    std::string out;
+    for (int c = 0; c < cols; ++c) {
+        if (c > 0) out += '\t';
+        if (c < (int)headers.size()) out += headers[c];
+    }
+    out += "\r\n";
+    for (int c = 0; c < cols; ++c) {
+        if (c > 0) out += '\t';
+        const char* v = cells[row_idx * cols + c];
+        if (v) out += v;
+    }
+    out += "\r\n";
+    return out;
+}
+
+std::vector<Filter> build_filters_from_row(const char* const* cells, int rows,
+                                            int cols, int row_idx) {
+    std::vector<Filter> out;
+    if (row_idx < 0 || row_idx >= rows || cols <= 0) return out;
+    for (int c = 0; c < cols; ++c) {
+        const char* v = cells[row_idx * cols + c];
+        if (!v || !*v) continue;
+        Filter f;
+        f.col = c;
+        f.op  = Op::Eq;
+        f.value = v;
+        out.push_back(f);
+    }
+    return out;
+}
+
+bool undo_drill_step(State& st, const DrillStep& step) {
+    if (step.target_stage < 0 || step.target_stage >= (int)st.stages.size()) return false;
+    Stage& s = st.stages[step.target_stage];
+    int pos = step.filter_pos;
+    if (pos < 0 || pos >= (int)s.filters.size()) return false;
+    s.filters.erase(s.filters.begin() + pos);
+    if (step.prev_active_stage >= 0 && step.prev_active_stage < (int)st.stages.size()) {
+        st.active_stage = step.prev_active_stage;
+    }
+    return true;
+}
+
 std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
                               const std::vector<Filter>& filters)
 {
@@ -696,19 +759,57 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
    }

    // Grouped: agrupa visible por valores de breakout, calcula aggregations.
-    std::vector<int> break_cols(stage.breakouts.size());
-    for (size_t i = 0; i < stage.breakouts.size(); ++i) {
-        break_cols[i] = find_col(in_headers, stage.breakouts[i]);
+    // Breakouts pueden llevar sufijo `:granularity` para cols Date (fase 10).
+    int nbreaks = (int)stage.breakouts.size();
+    std::vector<int> break_cols(nbreaks);
+    std::vector<DateGranularity> break_grans(nbreaks);
+    bool any_trunc = false;
+    for (int i = 0; i < nbreaks; ++i) {
+        std::string col_name;
+        break_grans[i] = parse_breakout_granularity(stage.breakouts[i], col_name);
+        if (break_grans[i] != DateGranularity::None) any_trunc = true;
+        break_cols[i] = find_col(in_headers, col_name);
    }

-    auto make_key = [&](int r) -> std::string {
-        std::string k;
-        for (size_t i = 0; i < break_cols.size(); ++i) {
-            if (i > 0) k += '\x1f'; // separador unit-separator (no aparece en datos)
+    // Pre-truncate solo cuando hay granularity activa. Strings persistidos en
+    // out.cell_backing para que los punteros sobrevivan al return de la funcion.
+    // Reservamos upfront para que push_back no invalide punteros anteriores.
+    // Tamaño = trunc cells + aggregation cells (peor caso n_groups <= in_rows).
+    out.cell_backing.reserve(
+        (size_t)in_rows * (size_t)nbreaks +
+        (size_t)in_rows * stage.aggregations.size() + 16);
+
+    std::vector<const char*> trunc_ptrs;
+    if (any_trunc) {
+        trunc_ptrs.assign((size_t)in_rows * (size_t)nbreaks, nullptr);
+        for (int r = 0; r < in_rows; ++r) {
+            for (int i = 0; i < nbreaks; ++i) {
+                if (break_grans[i] == DateGranularity::None) continue;
                int bc = break_cols[i];
                if (bc < 0) continue;
                const char* v = in_cells[r * in_cols + bc];
-            k += (v ? v : "");
+                out.cell_backing.emplace_back(
+                    truncate_date(v ? v : "", break_grans[i]));
+                trunc_ptrs[(size_t)r * nbreaks + i] = out.cell_backing.back().c_str();
+            }
+        }
+    }
+
+    auto cell_for = [&](int r, int i) -> const char* {
+        int bc = break_cols[i];
+        if (bc < 0) return "";
+        if (break_grans[i] != DateGranularity::None) {
+            return trunc_ptrs[(size_t)r * nbreaks + i];
+        }
+        const char* v = in_cells[r * in_cols + bc];
+        return v ? v : "";
+    };
+
+    auto make_key = [&](int r) -> std::string {
+        std::string k;
+        for (int i = 0; i < nbreaks; ++i) {
+            if (i > 0) k += '\x1f'; // separador unit-separator (no aparece en datos)
+            k += cell_for(r, i);
        }
        return k;
    };
@@ -727,10 +828,9 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
            key_to_group.emplace(k, gi);
            group_keys.push_back(k);
            group_rows.emplace_back();
-            std::vector<const char*> bv(break_cols.size(), "");
-            for (size_t i = 0; i < break_cols.size(); ++i) {
-                int bc = break_cols[i];
-                bv[i] = (bc >= 0) ? in_cells[r * in_cols + bc] : "";
+            std::vector<const char*> bv((size_t)nbreaks, "");
+            for (int i = 0; i < nbreaks; ++i) {
+                bv[i] = cell_for(r, i);
            }
            group_breakvals.push_back(std::move(bv));
        } else gi = it->second;
@@ -742,11 +842,17 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
    out.cols = out_cols;
    out.headers.reserve(out_cols);
    out.types.reserve(out_cols);
-    for (size_t i = 0; i < stage.breakouts.size(); ++i) {
+    for (int i = 0; i < nbreaks; ++i) {
        out.headers.push_back(stage.breakouts[i]);
        int bc = break_cols[i];
-        out.types.push_back((bc >= 0 && bc < (int)in_types.size())
-                            ? in_types[bc] : ColumnType::String);
+        // Si hay granularity activa, el output es String (formato ymd o similar),
+        // no la fecha original.
+        ColumnType ot = ColumnType::String;
+        if (break_grans[i] == DateGranularity::None
+            && bc >= 0 && bc < (int)in_types.size()) {
+            ot = in_types[bc];
+        }
+        out.types.push_back(ot);
    }
    for (const auto& a : stage.aggregations) {
        out.headers.push_back(aggregation_alias(a));
@@ -1102,4 +1208,288 @@ StageOutput join_tables(const char* const* left_cells, int left_rows, int left_c
    return out;
 }

+// ----------------------------------------------------------------------------
+// Fase 10: drill extendido — granularity + presets.
+// ----------------------------------------------------------------------------
+
+const char* date_granularity_token(DateGranularity g) {
+    switch (g) {
+        case DateGranularity::Year:  return "year";
+        case DateGranularity::Month: return "month";
+        case DateGranularity::Week:  return "week";
+        case DateGranularity::Day:   return "day";
+        case DateGranularity::Hour:  return "hour";
+        default: return "";
+    }
+}
+
+DateGranularity date_granularity_from_token(const char* s) {
+    if (!s) return DateGranularity::None;
+    std::string t(s);
+    if (t == "year")  return DateGranularity::Year;
+    if (t == "month") return DateGranularity::Month;
+    if (t == "week")  return DateGranularity::Week;
+    if (t == "day")   return DateGranularity::Day;
+    if (t == "hour")  return DateGranularity::Hour;
+    return DateGranularity::None;
+}
+
+DateGranularity parse_breakout_granularity(const std::string& breakout,
+                                           std::string& col_out) {
+    auto pos = breakout.rfind(':');
+    if (pos == std::string::npos) {
+        col_out = breakout;
+        return DateGranularity::None;
+    }
+    std::string suffix = breakout.substr(pos + 1);
+    DateGranularity g = date_granularity_from_token(suffix.c_str());
+    if (g == DateGranularity::None) {
+        col_out = breakout;
+        return DateGranularity::None;
+    }
+    col_out = breakout.substr(0, pos);
+    return g;
+}
+
+std::string compose_breakout(const std::string& col, DateGranularity g) {
+    if (g == DateGranularity::None) return col;
+    return col + ":" + date_granularity_token(g);
+}
+
+int nearest_index_1d(double target, const double* xs, int n) {
+    if (n <= 0 || !xs) return -1;
+    int best = -1;
+    double best_d = 0.0;
+    for (int i = 0; i < n; ++i) {
+        double v = xs[i];
+        if (std::isnan(v)) continue;
+        double d = std::fabs(v - target);
+        if (best < 0 || d < best_d) { best = i; best_d = d; }
+    }
+    return best;
+}
+
+int nearest_index_2d(double tx, double ty,
+                      const double* xs, const double* ys, int n) {
+    if (n <= 0 || !xs || !ys) return -1;
+    int best = -1;
+    double best_d = 0.0;
+    for (int i = 0; i < n; ++i) {
+        double x = xs[i], y = ys[i];
+        if (std::isnan(x) || std::isnan(y)) continue;
+        double dx = x - tx, dy = y - ty;
+        double d = dx*dx + dy*dy;
+        if (best < 0 || d < best_d) { best = i; best_d = d; }
+    }
+    return best;
+}
+
+double pie_angle(double cx, double cy, double mx, double my) {
+    // ImPlot pie: 0 = top, sentido horario. atan2 estandar: 0 = +X (right), CCW.
+    // Conversion: ImPlot angle = atan2(dx, -dy) y normalizar a [0, 2*PI).
+    double dx = mx - cx;
+    double dy = my - cy;
+    double a = std::atan2(dx, -dy); // 0 cuando (dx=0, dy<0) = top
+    const double two_pi = 6.283185307179586;
+    if (a < 0) a += two_pi;
+    return a;
+}
+
+int pie_slice_at_angle(double angle, const double* sums, int n) {
+    if (n <= 0 || !sums) return -1;
+    double total = 0.0;
+    for (int i = 0; i < n; ++i) {
+        if (sums[i] < 0) return -1;
+        total += sums[i];
+    }
+    if (total <= 0.0) return -1;
+    const double two_pi = 6.283185307179586;
+    if (angle < 0 || angle >= two_pi) return -1;
+    double cum = 0.0;
+    for (int i = 0; i < n; ++i) {
+        cum += (sums[i] / total) * two_pi;
+        if (angle < cum) return i;
+    }
+    return n - 1; // edge case rounding
+}
+
+void heatmap_cell_at(double px, double py, int rows, int cols,
+                      int& row_out, int& col_out) {
+    row_out = -1;
+    col_out = -1;
+    if (rows <= 0 || cols <= 0) return;
+    if (px < 0.0 || px >= (double)cols) return;
+    if (py < 0.0 || py >= (double)rows) return;
+    col_out = (int)px;
+    // ImPlot heatmap pinta row 0 arriba; plot Y suele invertirse. Caller
+    // normaliza si necesita. Aqui devolvemos row = floor(py) en coord plot.
+    row_out = (int)py;
+}
+
+void column_min_max(const char* const* cells, int rows, int cols, int col_idx,
+                    std::string& min_out, std::string& max_out) {
+    min_out.clear();
+    max_out.clear();
+    if (col_idx < 0 || col_idx >= cols) return;
+    bool first = true;
+    for (int r = 0; r < rows; ++r) {
+        const char* v = cells[r * cols + col_idx];
+        if (!v || !*v) continue;
+        std::string s(v);
+        if (first) {
+            min_out = s;
+            max_out = s;
+            first = false;
+        } else {
+            if (s < min_out) min_out = s;
+            if (s > max_out) max_out = s;
+        }
+    }
+}
+
+namespace {
+
+// Parse ISO "YYYY-MM-DD..." -> (y, m, d). True si los 3 primeros campos OK.
+bool parse_ymd(const std::string& s, int& y, int& m, int& d) {
+    if (s.size() < 10) return false;
+    for (int i : {0,1,2,3,5,6,8,9}) {
+        if (s[(size_t)i] < '0' || s[(size_t)i] > '9') return false;
+    }
+    if (s[4] != '-' || s[7] != '-') return false;
+    y = (s[0]-'0')*1000 + (s[1]-'0')*100 + (s[2]-'0')*10 + (s[3]-'0');
+    m = (s[5]-'0')*10 + (s[6]-'0');
+    d = (s[8]-'0')*10 + (s[9]-'0');
+    if (m < 1 || m > 12 || d < 1 || d > 31) return false;
+    return true;
+}
+
+// Dias desde 0001-01-01 (proleptic Gregorian).
+long ymd_to_days(int y, int m, int d) {
+    if (m <= 2) { y -= 1; m += 12; }
+    long era = (y >= 0 ? y : y - 399) / 400;
+    unsigned yoe = (unsigned)(y - era * 400);
+    unsigned doy = (unsigned)((153 * (m - 3) + 2) / 5 + d - 1);
+    unsigned doe = yoe * 365 + yoe/4 - yoe/100 + doy;
+    return era * 146097 + (long)doe;
+}
+
+void days_to_ymd(long days, int& y, int& m, int& d) {
+    long era = (days >= 0 ? days : days - 146096) / 146097;
+    unsigned doe = (unsigned)(days - era * 146097);
+    unsigned yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365;
+    int yr = (int)yoe + (int)era * 400;
+    unsigned doy = doe - (365*yoe + yoe/4 - yoe/100);
+    unsigned mp  = (5*doy + 2)/153;
+    unsigned day = doy - (153*mp + 2)/5 + 1;
+    unsigned mon = mp < 10 ? mp + 3 : mp - 9;
+    if (mon <= 2) yr += 1;
+    y = yr; m = (int)mon; d = (int)day;
+}
+
+} // anon
+
+std::string truncate_date(const std::string& date, DateGranularity g) {
+    if (g == DateGranularity::None) return date;
+    int y, m, d;
+    if (!parse_ymd(date, y, m, d)) return date;
+    char buf[32];
+    switch (g) {
+        case DateGranularity::Year:
+            std::snprintf(buf, sizeof(buf), "%04d", y);
+            return buf;
+        case DateGranularity::Month:
+            std::snprintf(buf, sizeof(buf), "%04d-%02d", y, m);
+            return buf;
+        case DateGranularity::Day:
+            std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", y, m, d);
+            return buf;
+        case DateGranularity::Hour: {
+            int hh = 0;
+            if (date.size() >= 13 && date[10] == 'T'
+                && date[11] >= '0' && date[11] <= '9'
+                && date[12] >= '0' && date[12] <= '9') {
+                hh = (date[11]-'0')*10 + (date[12]-'0');
+                if (hh < 0 || hh > 23) hh = 0;
+            }
+            std::snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d", y, m, d, hh);
+            return buf;
+        }
+        case DateGranularity::Week: {
+            // Hinnant ymd_to_days: day 0 == 0000-03-01 (Wednesday).
+            //   days%7: 0=Wed, 1=Thu, 2=Fri, 3=Sat, 4=Sun, 5=Mon, 6=Tue.
+            // Monday offset: (mod - 5 + 7) % 7.
+            long days = ymd_to_days(y, m, d);
+            int mod = (int)(((days % 7) + 7) % 7);
+            int rem = ((mod - 5) % 7 + 7) % 7;
+            long monday = days - rem;
+            int yy, mm, dd;
+            days_to_ymd(monday, yy, mm, dd);
+            std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd);
+            return buf;
+        }
+        default: return date;
+    }
+}
+
+DateGranularity auto_date_granularity(const std::string& min_ymd,
+                                      const std::string& max_ymd) {
+    int y1,m1,d1, y2,m2,d2;
+    if (!parse_ymd(min_ymd, y1,m1,d1)) return DateGranularity::Day;
+    if (!parse_ymd(max_ymd, y2,m2,d2)) return DateGranularity::Day;
+    long span = ymd_to_days(y2,m2,d2) - ymd_to_days(y1,m1,d1);
+    if (span < 0) span = -span;
+    if (span > 730) return DateGranularity::Year;   // >2 anios
+    if (span > 60)  return DateGranularity::Month;
+    if (span > 14)  return DateGranularity::Week;
+    return DateGranularity::Day;
+}
+
+const char* filter_preset_label(FilterPreset p) {
+    switch (p) {
+        case FilterPreset::Last7d:       return "Last 7 days";
+        case FilterPreset::Last30d:      return "Last 30 days";
+        case FilterPreset::Last90d:      return "Last 90 days";
+        case FilterPreset::ExcludeNulls: return "Exclude nulls";
+        case FilterPreset::NonZero:      return "Non-zero only";
+    }
+    return "?";
+}
+
+std::vector<Filter> build_preset_filters(FilterPreset preset, int col,
+                                         const std::string& today_ymd) {
+    std::vector<Filter> out;
+    auto last_n = [&](int n) {
+        int y, m, d;
+        if (!parse_ymd(today_ymd, y, m, d)) return;
+        long days = ymd_to_days(y, m, d) - n;
+        int yy, mm, dd;
+        days_to_ymd(days, yy, mm, dd);
+        char buf[16];
+        std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yy, mm, dd);
+        Filter f;
+        f.col = col;
+        f.op = Op::Gte;
+        f.value = buf;
+        out.push_back(f);
+    };
+    switch (preset) {
+        case FilterPreset::Last7d:  last_n(7);  break;
+        case FilterPreset::Last30d: last_n(30); break;
+        case FilterPreset::Last90d: last_n(90); break;
+        case FilterPreset::ExcludeNulls: {
+            Filter f; f.col = col; f.op = Op::Neq; f.value = "";
+            out.push_back(f);
+            break;
+        }
+        case FilterPreset::NonZero: {
+            Filter f1; f1.col = col; f1.op = Op::Neq; f1.value = "";
+            Filter f2; f2.col = col; f2.op = Op::Neq; f2.value = "0";
+            out.push_back(f1);
+            out.push_back(f2);
+            break;
+        }
+    }
+    return out;
+}
+
 } // namespace data_table
@@ -1,27 +1,21 @@
 // Logica pura del playground data_table. Sin ImGui — testable headless.
-// Cuando se promueva al registry, esto sera la base de data_table_cpp_viz.
+// TIPOS promovidos al registry (issue 0081). Este header solo declara
+// funciones; los types vienen de cpp/functions/core/data_table_types.h.
 #pragma once

+#include "core/data_table_types.h"
 #include <string>
 #include <utility>
 #include <vector>

 namespace data_table {

-enum class Op {
-    Eq, Neq, Gt, Gte, Lt, Lte,
-    Contains, NotContains, StartsWith, EndsWith
-};
+// ----------------------------------------------------------------------------
+// Helpers para Op y ColumnType.
+// ----------------------------------------------------------------------------
 const char* op_label(Op o);
 bool        op_is_string_only(Op o);

-// ----------------------------------------------------------------------------
-// Column types - declarado por caller con fallback a auto-detect.
-// ----------------------------------------------------------------------------
-enum class ColumnType {
-    Auto, String, Int, Float, Bool, Date, Json
-};
-
 const char* column_type_name(ColumnType t);
 const char* column_type_icon(ColumnType t); // UTF-8 Tabler icon

@@ -36,63 +30,11 @@ ColumnType auto_detect_type(const char* const* cells, int rows, int cols,
 ColumnType effective_type(ColumnType declared,
                          const char* const* cells, int rows, int cols, int col);

-// Derived column: inmutable. Dos modos:
-//   1) Retipo puro: source_col >= 0, formula == "". Cells del origen.
-//   2) Formula:     source_col == -1, formula no vacia. Eval por Lua.
-struct DerivedColumn {
-    int         source_col = -1;
-    ColumnType  type       = ColumnType::String;
-    std::string name;
-    std::string formula;        // "" = retipado puro; resto = body Lua
-    int         lua_id  = -1;   // referencia en lua_engine; -1 si no compilado
-    std::string compile_error;
-};
-
-// Filter movido aqui (antes era despues de State) porque TQL Stage lo necesita.
-struct Filter {
-    int         col;
-    Op          op;
-    std::string value;
-};
-
-struct ColorRule {
-    int          col;
-    std::string  equals;
-    unsigned int color;
-};
-
 // ----------------------------------------------------------------------------
-// TQL (Table Query Language) — stage model. Ver docs/TQL.md.
+// Aggregation helpers.
 // ----------------------------------------------------------------------------
-enum class AggFn {
-    Count, Sum, Avg, Min, Max, Distinct, Stddev,
-    Median, P25, P75, P90, P99, Percentile
-};
-
 const char* agg_fn_name(AggFn f);

-struct Aggregation {
-    AggFn       fn = AggFn::Count;
-    std::string col;         // ignorado para Count
-    double      arg = 0.0;   // para Percentile (0..1)
-    std::string alias;       // vacio -> auto-generado via aggregation_alias()
-};
-
-struct SortClause {
-    std::string col;
-    bool        desc = false;
-};
-
-// Stage: layer de TQL. Stage 0 = Raw (sin breakouts/aggregations).
-// Stage 1+ pueden agrupar. Cada stage consume output del anterior.
-struct Stage {
-    std::vector<Filter>          filters;
-    std::vector<DerivedColumn>   derived;       // expressions de este stage
-    std::vector<std::string>     breakouts;     // col names del INPUT de este stage
-    std::vector<Aggregation>     aggregations;
-    std::vector<SortClause>      sorts;
-};
-
 // Pure: alias por defecto cuando agg.alias esta vacio.
 //   count          -> "count"
 //   distinct col   -> "distinct_<col>"
@@ -101,224 +43,125 @@ struct Stage {
 std::string aggregation_alias(const Aggregation& a);

 // Pure: tipo del output de la aggregation.
-//   count, distinct   -> Int
-//   sum, avg, stddev,
-//   median, p*, percentile -> Float
-//   min, max          -> mismo tipo que la col origen
 ColumnType  aggregation_type(const Aggregation& a,
                              const std::vector<std::string>& in_headers,
                              const std::vector<ColumnType>&  in_types);

-// Output de compute_stage. Posee `cell_backing` (strings nuevos para
-// resultados agregados) y `cells` (punteros row-major a backing o a
-// `in_cells` original para passthrough).
-struct StageOutput {
-    std::vector<std::string>  cell_backing;
-    std::vector<const char*>  cells;
-    int                       rows = 0;
-    int                       cols = 0;
-    std::vector<std::string>  headers;
-    std::vector<ColumnType>   types;
-};
-
+// ----------------------------------------------------------------------------
+// Compute pipeline.
+// ----------------------------------------------------------------------------
 // Pure: ejecuta un Stage sobre los cells de entrada. Aplica filter -> (group+agg|passthrough) -> sort.
 StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
                          const std::vector<std::string>& in_headers,
                          const std::vector<ColumnType>&  in_types,
                          const Stage& stage);

-// Pure: aplica filtros usando headers para resolver f.col (que ahora es
-// indice en el array de in_headers, no del dataset original). Devuelve
-// indices de filas que pasan.
+// Pure: aplica filtros usando headers para resolver f.col.
 std::vector<int> apply_filters(const char* const* cells, int rows, int cols,
                               const std::vector<Filter>& filters);

 // Pure: helper para drill-down. Devuelve un Filter Op::Eq sobre col_idx con
-// el value indicado. col_idx es indice en los headers del INPUT del stage
-// previo (donde se va a aplicar el filtro).
+// el value indicado.
 Filter make_drill_filter(int col_idx, const std::string& value);

 // ----------------------------------------------------------------------------
-// ViewMode: tipo de visualizacion a renderizar sobre el output del stage activo.
-// "Table" siempre disponible. Resto requiere ciertos tipos de columnas.
+// ViewMode helpers.
 // ----------------------------------------------------------------------------
-enum class ViewMode {
-    Table,
-    // Bars
-    Bar,           // horizontal bars: 1 cat + 1 num
-    Column,        // vertical bars: 1 cat + 1 num
-    GroupedBar,    // 1 cat + N num (side-by-side)
-    StackedBar,    // 1 cat + N num (stacked)
-    // Lines / area
-    Line,          // X + 1..N Y series
-    Area,          // shaded to y=0
-    Stairs,        // step plot
-    // Points
-    Scatter,       // X + Y
-    Bubble,        // X + Y + size
-    // Distribution
-    Histogram,     // 1 num
-    Histogram2D,   // 2 num
-    Heatmap,       // matrix from breakouts
-    BoxPlot,       // 1 cat + 1 num (min/p25/p50/p75/max per group)
-    // Stems / signals
-    Stem,
-    ErrorBars,
-    // Composition
-    Pie,
-    Donut,
-    Funnel,        // ordered descending bars
-    Waterfall,     // running sum
-    // Single values
-    KPI,           // big text + label
-    KPIGrid,       // all aggregations as cards
-    // Specialized
-    Candlestick,   // OHLC: time + open + high + low + close
-    Radar,         // multi-axis (1 cat + N num)
-};
-
-const char* view_mode_token(ViewMode m);          // "table", "bar", ...
-const char* view_mode_label(ViewMode m);          // "Table", "Bar (horizontal)", ...
+const char* view_mode_token(ViewMode m);
+const char* view_mode_label(ViewMode m);
 ViewMode    view_mode_from_token(const char* s);
 int         view_mode_min_cols(ViewMode m);
 bool        view_mode_needs_numeric(ViewMode m);
 bool        view_mode_needs_category(ViewMode m);
-// Requiere stage agrupado (breakout+aggregation). Si user esta en stage 0 con
-// uno de estos, conviene auto-promote a stage 1.
 bool        view_mode_needs_aggregation(ViewMode m);

-// Lista completa de modos para el selector UI (orden de display).
+// Lista completa de modos para el selector UI.
 const ViewMode* all_view_modes(int* n_out);

 // ----------------------------------------------------------------------------
 // Joins (MBQL-style). Ver issue 0078.
 // ----------------------------------------------------------------------------
-enum class JoinStrategy { Left, Inner, Right, Full };
 const char*  join_strategy_token(JoinStrategy s);
 JoinStrategy join_strategy_from_token(const char* s);
 const char*  join_strategy_label(JoinStrategy s);

-// Tabla extra pasada al render() para joins. Owner externo (caller).
-struct TableInput {
-    std::string                 name;       // identificador estable (matchea Join.source)
-    std::vector<std::string>    headers;
-    std::vector<ColumnType>     types;
-    const char* const*          cells = nullptr;  // row-major, headers.size() cols x rows filas
-    int                         rows  = 0;
-    int                         cols  = 0;
-};
-
-// Join clause: une la tabla actual con `source` por las parejas `on`,
-// prefijando las cols del derecho con `alias.`.
-struct Join {
-    std::string                                          alias;
-    std::string                                          source;
-    std::vector<std::pair<std::string, std::string>>     on;        // {left_col, right_col}
-    JoinStrategy                                         strategy = JoinStrategy::Left;
-    std::vector<std::string>                             fields;    // vacio = all del derecho
-};
-
 // Pure: resuelve indice del main entre `tables` segun `main_source`.
-// Vacio -> 0. Nombre desconocido -> 0. tables vacio -> -1.
 int resolve_main_idx(const std::vector<TableInput>& tables, const std::string& main_source);

-// Pure: aplica un join sobre dos tablas. Resultado: StageOutput con
-// `headers` = left + `<alias>.<right_col>` (filtrado por fields si no vacio).
+// Pure: aplica un join sobre dos tablas.
 StageOutput join_tables(const char* const* left_cells, int left_rows, int left_cols,
                        const std::vector<std::string>& left_headers,
                        const std::vector<ColumnType>&  left_types,
                        const TableInput& right,
                        const Join& jn);

-// ViewConfig: overrides manuales de auto-detect para la vista activa.
-// Campos vacios -> auto. Si col name no existe en output, viz cae a auto.
-struct ViewConfig {
-    std::string                 x_col;        // single: scatter, line, hist2d
-    std::vector<std::string>    y_cols;       // 1..N: line/area/bar/etc
-    std::string                 size_col;     // bubble
-    std::string                 cat_col;      // bar/pie/funnel/box override
-    unsigned int                primary_color = 0;     // 0 = ImPlot auto
-    int                         hist_bins     = 0;     // 0 = Sturges
-    float                       pie_radius    = 0.0f;  // 0 = default
-    bool                        show_legend   = true;
-    bool                        show_markers  = false; // line/area markers
-    bool                        locked        = false; // disable pan/zoom
-    mutable bool                fit_request   = false; // consumed by viz::render
-};
+// ----------------------------------------------------------------------------
+// Drill apply/undo (fase 10).
+// ----------------------------------------------------------------------------
+bool apply_drill_step(State& st, const DrillStep& step);
+bool undo_drill_step(State& st, const DrillStep& step);

-// VizPanel: viz adicional sobre el mismo StageOutput. State.display + viz_config
-// es el panel 0 (siempre visible); extra_panels son los aniadidos por el user.
-struct VizPanel {
-    ViewMode    display = ViewMode::Bar;
-    ViewConfig  config;
-    // Memoria del ultimo non-Table display para toggle Table<->View.
-    mutable ViewMode last_non_table = ViewMode::Bar;
-};
+// Pure (fase 10): drill-up. Decrementa active_stage si > 0.
+bool drill_up(State& st);

-// State: stage pipeline + viz globales.
-//
-// `stages` siempre tiene tamaño >= 1 (auto-init en compute_visible_rows / render
-// si esta vacio: se crea stages[0] vacio). Stage 0 es Raw (filters + derived +
-// sorts; SIN breakouts/aggregations). Stages 1+ pueden agrupar.
-//
-// `active_stage` = indice del stage cuyo output se renderiza.
-// `col_visible/col_order/color_rules` aplican al output del stage activo.
-struct State {
-    std::vector<Stage>          stages;
-    int                         active_stage = 0;
-    ViewMode                    display = ViewMode::Table;
-    ViewConfig                  viz_config;
-    std::vector<VizPanel>       extra_panels;
-    std::vector<Join>           joins;     // aplicado antes de stages[0]
-    std::string                 main_source;  // name de TableInput a usar como main; vacio -> tables[0]
+// Pure (fase 10): serializa una fila a TSV.
+std::string row_to_tsv(const char* const* cells, int rows, int cols,
+                        int row_idx, const std::vector<std::string>& headers);

-    std::vector<ColorRule>      color_rules;
-    std::vector<bool>           col_visible;       // size = effective_cols del stage activo
-    std::vector<int>            col_order;         // permutacion [0..effective_cols)
+// Pure (fase 10): construye filters Op::Eq desde una fila.
+std::vector<Filter> build_filters_from_row(const char* const* cells, int rows,
+                                            int cols, int row_idx);

-    // --- Compat helpers: shortcuts a stages[0] (Raw) ---
-    // Util tras refactor para tests / accesos puntuales. Garantizan stages[0]
-    // existe (lo crean vacio si no).
-    Stage&       raw();
-    const Stage& raw() const;
-    Stage&       active();
-    const Stage& active_const() const;
-    void         ensure_stage0();
-};
+// ----------------------------------------------------------------------------
+// Date granularity helpers (fase 10).
+// ----------------------------------------------------------------------------
+const char*      date_granularity_token(DateGranularity g);
+DateGranularity  date_granularity_from_token(const char* s);

-// Parse "1.23" -> 1.23, true. False si la celda no es numero completo.
+DateGranularity parse_breakout_granularity(const std::string& breakout,
+                                           std::string& col_out);
+
+std::string compose_breakout(const std::string& col, DateGranularity g);
+
+void column_min_max(const char* const* cells, int rows, int cols, int col_idx,
+                    std::string& min_out, std::string& max_out);
+
+// Hit-tests para click-to-drill sobre charts (fase 10).
+int nearest_index_1d(double target, const double* xs, int n);
+int nearest_index_2d(double tx, double ty,
+                      const double* xs, const double* ys, int n);
+double pie_angle(double cx, double cy, double mx, double my);
+int pie_slice_at_angle(double angle, const double* sums, int n);
+void heatmap_cell_at(double px, double py, int rows, int cols,
+                      int& row_out, int& col_out);
+
+// Date trunc + auto + presets.
+std::string truncate_date(const std::string& date, DateGranularity g);
+DateGranularity auto_date_granularity(const std::string& min_ymd,
+                                      const std::string& max_ymd);
+const char* filter_preset_label(FilterPreset p);
+std::vector<Filter> build_preset_filters(FilterPreset preset, int col,
+                                         const std::string& today_ymd);
+
+// ----------------------------------------------------------------------------
+// Misc helpers.
+// ----------------------------------------------------------------------------
 bool parse_number(const char* s, double& out);
-
-// Compara dos celdas con operador. Numerico si ambas parseables; lexical si no.
 bool compare(const char* a, const char* b, Op op);

-// Aplica filtros y ordena. Devuelve indices de filas visibles.
 std::vector<int> compute_visible_rows(const char* const* cells,
                                      int rows, int cols,
                                      const State& st);

-// Pure: muta col_order de st para colocar `src` en la posicion (en orden visual)
-// donde estaba `dst`. No-op si src == dst o cualquiera fuera del array.
 void reorder_column(State& st, int src, int dst);

-// Pure: dado un buffer y posicion de cursor, busca el `[` abierto sin cerrar
-// mas reciente. Devuelve su indice (o -1 si ninguno). Rellena `filter_text`
-// con los caracteres entre `[` y cursor.
-// Para autocomplete de formulas: cuando el usuario teclea `[` el ImGui callback
-// detecta esto y muestra un popup con cols disponibles.
 int find_open_bracket(const char* buf, int len, int cursor, std::string& filter_text);

-// Pure: reemplaza src[start..cursor) por "[name]". Devuelve nuevo string y
-// actualiza `new_cursor` a la posicion despues del `]`.
 std::string insert_column_ref(const std::string& src, int start, int cursor,
                              const std::string& name, int& new_cursor);

-// CSV: escapa una celda segun RFC 4180 (wrap en " si contiene , " o newline).
 std::string csv_escape(const char* s);

-// Construye TSV de un rect de seleccion. Headers SIEMPRE incluidos.
-// view_row_lo/hi: indices en visible_rows.
-// view_col_lo/hi: indices en col_order. Cols ocultas se omiten.
 std::string build_tsv(const char* const* cells, int rows, int cols,
                      const char* const* headers,
                      const std::vector<int>&  col_order,
@@ -327,19 +170,21 @@ std::string build_tsv(const char* const* cells, int rows, int cols,
                      int view_row_lo, int view_row_hi,
                      int view_col_lo, int view_col_hi);

-// Construye CSV (full visible view). Headers incluidos, cells escapados.
 std::string build_csv(const char* const* cells, int rows, int cols,
                      const char* const* headers,
                      const std::vector<int>&  col_order,
                      const std::vector<bool>& col_visible,
                      const std::vector<int>&  visible_rows);

+// ----------------------------------------------------------------------------
+// Column statistics (no movido todavia al registry).
+// ----------------------------------------------------------------------------
 struct ColStats {
-    int    total          = 0;     // filas escaneadas
-    int    empty_count    = 0;     // cells == "" o null
-    int    unique_count   = 0;     // distintas (cap configurable)
-    bool   unique_capped  = false; // true si se alcanzo el cap
-    bool   numeric        = false; // true si todas las cells no-vacias parsean como numero
+    int    total          = 0;
+    int    empty_count    = 0;
+    int    unique_count   = 0;
+    bool   unique_capped  = false;
+    bool   numeric        = false;
    int    numeric_count  = 0;
    double min            = 0;
    double max            = 0;
@@ -348,16 +193,12 @@ struct ColStats {
    double p25            = 0;
    double p50            = 0;
    double p75            = 0;
-    std::vector<float> hist;                                   // bins (HIST_BINS) si numeric
-    std::vector<std::pair<std::string,int>> top_categories;    // top 8 por count desc
+    std::vector<float> hist;
+    std::vector<std::pair<std::string,int>> top_categories;
 };

 constexpr int HIST_BINS = 24;

-// Pure: escanea una columna y devuelve estadisticas. `unique_cap` corta el
-// conteo de unicos si excede (para datasets de millones). 0 = sin cap.
-// Si `indices != nullptr` y `n_indices > 0`, recorre solo las filas indicadas
-// (uso tipico: stats sobre filas visibles post-filtro).
 ColStats compute_column_stats(const char* const* cells, int rows, int cols,
                              int col, int unique_cap = 100000,
                              const int* indices = nullptr, int n_indices = 0);
@@ -0,0 +1,295 @@
+// llm_anthropic.cpp — cliente Anthropic minimal via cURL popen.
+// Ver issue 0080.
+#include "llm_anthropic.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <sstream>
+#include <string>
+
+namespace llm_anthropic {
+
+using namespace data_table;
+
+namespace {
+
+// JSON escape minimal.
+std::string json_escape(const std::string& s) {
+    std::string o;
+    o.reserve(s.size() + 8);
+    for (char c : s) {
+        switch (c) {
+            case '"':  o += "\\\""; break;
+            case '\\': o += "\\\\"; break;
+            case '\n': o += "\\n";  break;
+            case '\r': o += "\\r";  break;
+            case '\t': o += "\\t";  break;
+            case '\b': o += "\\b";  break;
+            case '\f': o += "\\f";  break;
+            default:
+                if ((unsigned char)c < 0x20) {
+                    char buf[8];
+                    std::snprintf(buf, sizeof(buf), "\\u%04x", (int)(unsigned char)c);
+                    o += buf;
+                } else {
+                    o += c;
+                }
+        }
+    }
+    return o;
+}
+
+const char* col_type_doc(ColumnType t) {
+    switch (t) {
+        case ColumnType::String: return "string";
+        case ColumnType::Int:    return "int";
+        case ColumnType::Float:  return "float";
+        case ColumnType::Bool:   return "bool";
+        case ColumnType::Date:   return "date";
+        case ColumnType::Json:   return "json";
+        case ColumnType::Auto:   return "auto";
+    }
+    return "?";
+}
+
+std::string build_schema_block(const AskInput& in) {
+    std::ostringstream os;
+    os << "Available columns (stage 0 input):\n";
+    for (size_t i = 0; i < in.col_names.size(); ++i) {
+        os << "  - " << in.col_names[i] << ": "
+           << col_type_doc(i < in.col_types.size() ? in.col_types[i] : ColumnType::String)
+           << "\n";
+    }
+    if (!in.joinable_names.empty()) {
+        os << "Joinable tables (for join clause):\n";
+        for (const auto& n : in.joinable_names) os << "  - " << n << "\n";
+    }
+    return os.str();
+}
+
+std::string build_system_prompt(OutputMode mode) {
+    if (mode == OutputMode::TQL) {
+        return
+            "You are a TQL (Table Query Language) expert. Output ONLY a Lua code block. "
+            "TQL is a Lua table with shape:\n"
+            "  return { version=1, display=\"table\"|\"bar\"|\"line\"|...,\n"
+            "    main_source=\"name\", joins={ {alias,source,on,strategy,fields},... },\n"
+            "    stages={ {filter={{op,col,value},...}, breakout={...}, aggregation={...}, sort={...} },... },\n"
+            "    columns={ name = {type=\"int|float|...\", formula=\"[col]+1\"},... }\n"
+            "  }\n"
+            "Stage 0 = Raw (filters + derived + sort, NO breakouts/aggs).\n"
+            "Stage 1+ groups (breakouts + aggregations).\n"
+            "Breakout granularity: append :year|:month|:week|:day|:hour to col name.\n"
+            "Aggregation functions: count|sum|avg|min|max|distinct|stddev|median|p25|p75|p90|p99|percentile.\n"
+            "Filter ops: '='|'!='|'<'|'<='|'>'|'>='|'contains'|'!contains'|'starts'|'ends'.\n"
+            "Sort: {{dir, col}, ...} where dir = 'asc'|'desc'.\n"
+            "Join strategies: 'left'|'inner'|'right'|'full'.\n"
+            "Formulas use Lua expression syntax with [col] for column refs.\n"
+            "Output format: ```lua\\n...\\n```";
+    }
+    return
+        "You are a DuckDB SQL expert. Output ONLY a SQL code block compatible with DuckDB.\n"
+        "Use CTEs to chain stages. Use date_trunc('month', col) for granularity.\n"
+        "Use quantile_cont(col, p) for percentiles. Use ? for bound params.\n"
+        "Joins: LEFT/INNER/RIGHT/FULL OUTER JOIN. String concat: ||. Aggregations: standard SQL.\n"
+        "Output format: ```sql\\n...\\n```";
+}
+
+} // anon
+
+std::string build_request_body(const AskInput& in) {
+    std::string system_msg = build_system_prompt(in.mode);
+    std::string schema     = build_schema_block(in);
+
+    std::ostringstream user_msg;
+    user_msg << "Question: " << in.question << "\n\n"
+             << schema << "\n";
+    if (!in.tql_current.empty()) {
+        user_msg << "Current TQL:\n```lua\n" << in.tql_current << "\n```\n";
+    }
+
+    std::string model = in.model.empty() ? "claude-sonnet-4-6" : in.model;
+
+    std::ostringstream body;
+    body << "{"
+         << "\"model\":\""  << json_escape(model)        << "\","
+         << "\"max_tokens\":" << in.max_tokens           << ","
+         << "\"system\":\"" << json_escape(system_msg)   << "\","
+         << "\"messages\":[{"
+         <<   "\"role\":\"user\","
+         <<   "\"content\":\"" << json_escape(user_msg.str()) << "\""
+         << "}]"
+         << "}";
+    return body.str();
+}
+
+std::string extract_code_block(const std::string& raw, const std::string& lang) {
+    // Buscar ```<lang> primero, sino ``` plain.
+    std::string fence_lang = "```" + lang;
+    auto pos = raw.find(fence_lang);
+    size_t code_start = std::string::npos;
+    if (pos != std::string::npos) {
+        code_start = pos + fence_lang.size();
+    } else {
+        pos = raw.find("```");
+        if (pos != std::string::npos) {
+            code_start = pos + 3;
+            // skip optional lang tag
+            while (code_start < raw.size() && raw[code_start] != '\n' &&
+                   raw[code_start] != '\r' && std::isalnum((unsigned char)raw[code_start])) {
+                ++code_start;
+            }
+        }
+    }
+    if (code_start == std::string::npos) {
+        // No fence — return raw stripped.
+        size_t i = 0; while (i < raw.size() && std::isspace((unsigned char)raw[i])) ++i;
+        size_t j = raw.size(); while (j > i && std::isspace((unsigned char)raw[j-1])) --j;
+        return raw.substr(i, j - i);
+    }
+    // Skip newline tras fence.
+    if (code_start < raw.size() && raw[code_start] == '\n') ++code_start;
+    auto end = raw.find("```", code_start);
+    if (end == std::string::npos) end = raw.size();
+    std::string code = raw.substr(code_start, end - code_start);
+    // Trim trailing newline.
+    while (!code.empty() && (code.back() == '\n' || code.back() == '\r')) code.pop_back();
+    return code;
+}
+
+std::string parse_response_text(const std::string& json) {
+    // Buscar pattern: "text":"..."
+    // Simple: primer occurrence de \"text\":\" tras \"type\":\"text\"
+    auto t = json.find("\"text\"");
+    while (t != std::string::npos) {
+        // Skip "text"
+        size_t i = t + 6;
+        // Skip whitespace y :
+        while (i < json.size() && (json[i] == ' ' || json[i] == ':' || json[i] == '\t')) ++i;
+        if (i >= json.size() || json[i] != '"') {
+            t = json.find("\"text\"", t + 1);
+            continue;
+        }
+        ++i;
+        std::string out;
+        while (i < json.size() && json[i] != '"') {
+            if (json[i] == '\\' && i + 1 < json.size()) {
+                char esc = json[i+1];
+                if      (esc == 'n')  out += '\n';
+                else if (esc == 't')  out += '\t';
+                else if (esc == 'r')  out += '\r';
+                else if (esc == '"')  out += '"';
+                else if (esc == '\\') out += '\\';
+                else if (esc == '/')  out += '/';
+                else if (esc == 'u' && i + 5 < json.size()) {
+                    // basic ascii \uXXXX
+                    int code = 0;
+                    for (int k = 0; k < 4; ++k) {
+                        char c = json[i + 2 + k];
+                        int v = (c >= '0' && c <= '9') ? c - '0'
+                               : (c >= 'a' && c <= 'f') ? c - 'a' + 10
+                               : (c >= 'A' && c <= 'F') ? c - 'A' + 10 : 0;
+                        code = code * 16 + v;
+                    }
+                    if (code < 128) out += (char)code;
+                    else out += '?';
+                    i += 5;
+                } else {
+                    out += esc;
+                }
+                i += 2;
+            } else {
+                out += json[i++];
+            }
+        }
+        return out;
+    }
+    return "";
+}
+
+namespace {
+
+// Lee API key segun prioridad: param > env FN_LLM_API_KEY > pass anthropic/api-key.
+std::string resolve_api_key(const std::string& provided) {
+    if (!provided.empty()) return provided;
+    const char* env = std::getenv("FN_LLM_API_KEY");
+    if (env && *env) return env;
+    // pass anthropic/api-key | head -n1
+    FILE* p = popen("pass anthropic/api-key 2>/dev/null | head -n1", "r");
+    if (!p) return "";
+    std::string out;
+    char buf[256];
+    while (fgets(buf, sizeof(buf), p)) out += buf;
+    pclose(p);
+    while (!out.empty() && (out.back() == '\n' || out.back() == '\r')) out.pop_back();
+    return out;
+}
+
+} // anon
+
+std::string call_api(const std::string& body, const std::string& api_key,
+                      std::string& error_out) {
+    error_out.clear();
+    // Test injection
+    const char* mock = std::getenv("FN_LLM_MOCK_RESPONSE");
+    if (mock && *mock) return mock;
+
+    std::string key = resolve_api_key(api_key);
+    if (key.empty()) {
+        error_out = "no API key (set FN_LLM_API_KEY env, pass param, or `pass anthropic/api-key`)";
+        return "";
+    }
+    const char* endpoint_env = std::getenv("FN_LLM_ENDPOINT");
+    std::string endpoint = endpoint_env && *endpoint_env
+        ? endpoint_env
+        : "https://api.anthropic.com/v1/messages";
+
+    // popen "w+" no portable. Write body a tmp file y leer respuesta de curl
+    // por redireccion. Portable Unix/Mingw.
+    std::string tmp_in  = std::tmpnam(nullptr);
+    std::string tmp_out = std::tmpnam(nullptr);
+    {
+        FILE* f = std::fopen(tmp_in.c_str(), "w");
+        if (!f) { error_out = "tmp file write fail"; return ""; }
+        std::fwrite(body.data(), 1, body.size(), f);
+        std::fclose(f);
+    }
+    std::string cmd2 = "curl -sS -X POST "
+        "-H \"content-type: application/json\" "
+        "-H \"anthropic-version: 2023-06-01\" "
+        "-H \"x-api-key: " + key + "\" "
+        "--data-binary @" + tmp_in + " " + endpoint
+        + " > " + tmp_out + " 2>&1";
+    int rc = std::system(cmd2.c_str());
+    std::string resp;
+    {
+        FILE* f = std::fopen(tmp_out.c_str(), "r");
+        if (f) {
+            char buf[4096];
+            size_t n;
+            while ((n = std::fread(buf, 1, sizeof(buf), f)) > 0) resp.append(buf, n);
+            std::fclose(f);
+        }
+    }
+    std::remove(tmp_in.c_str());
+    std::remove(tmp_out.c_str());
+    if (rc != 0) {
+        error_out = "curl exit " + std::to_string(rc) + ": " + resp;
+        return "";
+    }
+    return resp;
+}
+
+AskResult ask(const AskInput& in, const std::string& api_key) {
+    AskResult r;
+    std::string body = build_request_body(in);
+    std::string raw_json = call_api(body, api_key, r.error);
+    if (!r.error.empty()) return r;
+    r.raw = parse_response_text(raw_json);
+    std::string lang = (in.mode == OutputMode::TQL) ? "lua" : "sql";
+    r.code = extract_code_block(r.raw, lang);
+    return r;
+}
+
+} // namespace llm_anthropic
@@ -0,0 +1,58 @@
+// llm_anthropic: cliente HTTP minimal a Anthropic Claude API.
+// Sin deps externas (cURL via popen).
+// Ver issue 0080.
+#pragma once
+
+#include "data_table_logic.h"
+#include "tql_to_sql.h"
+#include <string>
+#include <vector>
+
+namespace llm_anthropic {
+
+enum class OutputMode { TQL, SQL };
+
+struct AskInput {
+    std::string                                question;       // pregunta NL
+    std::string                                tql_current;    // TQL actual (emitido)
+    std::vector<std::string>                   col_names;      // schema input
+    std::vector<data_table::ColumnType>        col_types;
+    std::vector<std::string>                   joinable_names; // tables disponibles para join
+    OutputMode                                 mode = OutputMode::TQL;
+    std::string                                model;          // empty -> default
+    int                                        max_tokens = 8192;
+};
+
+struct AskResult {
+    std::string code;     // bloque ```lua o ```sql extraido (sin fences)
+    std::string raw;      // texto completo de la respuesta
+    std::string error;    // non-empty si fallo
+    int         tokens_in  = 0;
+    int         tokens_out = 0;
+};
+
+// Pure: construye el system prompt y user message JSON-escapado.
+// Devuelve el JSON body completo POST al endpoint /v1/messages.
+std::string build_request_body(const AskInput& in);
+
+// Pure: extrae primer ```<lang>\n ... \n``` bloque de `raw`. lang = "lua"|"sql".
+// Si no encuentra fence, retorna raw stripped.
+std::string extract_code_block(const std::string& raw, const std::string& lang);
+
+// Pure: extrae texto del JSON de respuesta Anthropic.
+// Busca `"content":[{"type":"text","text":"..."}]` y devuelve el text.
+std::string parse_response_text(const std::string& json_body);
+
+// Impure: lanza cURL via popen, posts `body` al endpoint Anthropic /v1/messages,
+// retorna response body (JSON crudo). API key leida de:
+//   1. parametro `api_key` si non-empty
+//   2. env FN_LLM_API_KEY
+//   3. `pass anthropic/api-key | head -n1`
+// Si FN_LLM_MOCK_RESPONSE env set, retorna su valor (test injection).
+std::string call_api(const std::string& body, const std::string& api_key,
+                      std::string& error_out);
+
+// Orchestrator: build prompt + POST + parse. Convenience wrapper.
+AskResult ask(const AskInput& in, const std::string& api_key = "");
+
+} // namespace llm_anthropic
@@ -7,9 +7,12 @@
 // Exit 0 = todos los checks pasan, 1 = falla.

 #include "data_table_logic.h"
+#include "llm_anthropic.h"
 #include "lua_engine.h"
 #include "tql.h"
+#include "tql_to_sql.h"

+#include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -2051,6 +2054,782 @@ return {
        check(join_strategy_from_token("nope")  == JoinStrategy::Left,  "phase9: parse fallback left");
    }

+    // === phase10: drill extendido ===
+    {
+        // truncate_date — granularities sobre 2026-05-12 (martes).
+        std::string d = "2026-05-12";
+        check(truncate_date(d, DateGranularity::Year)  == "2026",       "phase10: trunc year");
+        check(truncate_date(d, DateGranularity::Month) == "2026-05",    "phase10: trunc month");
+        check(truncate_date(d, DateGranularity::Day)   == "2026-05-12", "phase10: trunc day");
+        check(truncate_date(d, DateGranularity::Week)  == "2026-05-11", "phase10: trunc week (Mon)");
+        check(truncate_date("2026-05-12T14:33:01", DateGranularity::Hour) == "2026-05-12T14",
+              "phase10: trunc hour");
+        check(truncate_date("not-a-date", DateGranularity::Month) == "not-a-date",
+              "phase10: trunc passthrough invalido");
+        check(truncate_date(d, DateGranularity::None) == d, "phase10: trunc None == identidad");
+    }
+
+    {
+        // auto_date_granularity
+        check(auto_date_granularity("2024-01-01", "2026-05-12") == DateGranularity::Year,
+              "phase10: auto year >2y");
+        check(auto_date_granularity("2026-01-01", "2026-05-12") == DateGranularity::Month,
+              "phase10: auto month >60d");
+        check(auto_date_granularity("2026-04-15", "2026-05-12") == DateGranularity::Week,
+              "phase10: auto week >14d");
+        check(auto_date_granularity("2026-05-05", "2026-05-12") == DateGranularity::Day,
+              "phase10: auto day <=14d");
+        check(auto_date_granularity("bad", "2026-05-12") == DateGranularity::Day,
+              "phase10: auto fallback day");
+    }
+
+    {
+        // parse_breakout_granularity
+        std::string col;
+        check(parse_breakout_granularity("ts:month", col) == DateGranularity::Month,
+              "phase10: parse breakout month");
+        check(col == "ts", "phase10: parse breakout col stripped");
+        check(parse_breakout_granularity("ts", col) == DateGranularity::None,
+              "phase10: parse breakout sin sufijo None");
+        check(col == "ts", "phase10: col sin sufijo intacto");
+        check(parse_breakout_granularity("ts:wat", col) == DateGranularity::None,
+              "phase10: sufijo desconocido None");
+        check(col == "ts:wat", "phase10: col preserva sufijo desconocido");
+    }
+
+    {
+        // compose_breakout
+        check(compose_breakout("ts", DateGranularity::None)  == "ts",        "phase10: compose None");
+        check(compose_breakout("ts", DateGranularity::Month) == "ts:month",  "phase10: compose month");
+        check(compose_breakout("ts", DateGranularity::Year)  == "ts:year",   "phase10: compose year");
+        // round-trip parse(compose)
+        std::string col;
+        auto g = parse_breakout_granularity(compose_breakout("foo", DateGranularity::Week), col);
+        check(g == DateGranularity::Week && col == "foo", "phase10: compose+parse round-trip");
+    }
+
+    {
+        // column_min_max
+        const char* cells[] = {
+            "2026-03-01",
+            "2026-01-15",
+            "",
+            "2026-05-12",
+            "2026-02-22",
+        };
+        std::string lo, hi;
+        column_min_max(cells, 5, 1, 0, lo, hi);
+        check(lo == "2026-01-15" && hi == "2026-05-12", "phase10: column_min_max ISO ordena lexical");
+
+        const char* empty_cells[] = {"", "", ""};
+        column_min_max(empty_cells, 3, 1, 0, lo, hi);
+        check(lo.empty() && hi.empty(), "phase10: column_min_max sin datos -> vacio");
+
+        column_min_max(cells, 5, 1, 5, lo, hi);  // col fuera de rango
+        check(lo.empty() && hi.empty(), "phase10: column_min_max col fuera de rango -> vacio");
+    }
+
+    {
+        // tokens round-trip granularity
+        check(date_granularity_from_token("year")  == DateGranularity::Year,  "phase10: token year");
+        check(date_granularity_from_token("month") == DateGranularity::Month, "phase10: token month");
+        check(date_granularity_from_token("week")  == DateGranularity::Week,  "phase10: token week");
+        check(date_granularity_from_token("day")   == DateGranularity::Day,   "phase10: token day");
+        check(date_granularity_from_token("hour")  == DateGranularity::Hour,  "phase10: token hour");
+        check(date_granularity_from_token("nope")  == DateGranularity::None,  "phase10: token fallback None");
+        check(std::string(date_granularity_token(DateGranularity::Month)) == "month",
+              "phase10: emit month");
+        check(std::string(date_granularity_token(DateGranularity::None)) == "",
+              "phase10: emit None empty");
+    }
+
+    {
+        // build_preset_filters
+        auto f7 = build_preset_filters(FilterPreset::Last7d, 2, "2026-05-12");
+        check(f7.size() == 1, "phase10: Last7d -> 1 filter");
+        check(f7[0].col == 2 && f7[0].op == Op::Gte && f7[0].value == "2026-05-05",
+              "phase10: Last7d -> Gte 2026-05-05");
+
+        auto f30 = build_preset_filters(FilterPreset::Last30d, 2, "2026-05-12");
+        check(f30[0].value == "2026-04-12", "phase10: Last30d -> 2026-04-12");
+
+        auto f90 = build_preset_filters(FilterPreset::Last90d, 2, "2026-05-12");
+        check(f90[0].value == "2026-02-11", "phase10: Last90d -> 2026-02-11");
+
+        auto fn0 = build_preset_filters(FilterPreset::ExcludeNulls, 3, "");
+        check(fn0.size() == 1 && fn0[0].op == Op::Neq && fn0[0].value == "",
+              "phase10: ExcludeNulls -> Neq ''");
+
+        auto fnz = build_preset_filters(FilterPreset::NonZero, 4, "");
+        check(fnz.size() == 2, "phase10: NonZero -> 2 filters");
+        check(fnz[0].op == Op::Neq && fnz[0].value == "" &&
+              fnz[1].op == Op::Neq && fnz[1].value == "0",
+              "phase10: NonZero -> Neq '' AND Neq '0'");
+
+        auto fbad = build_preset_filters(FilterPreset::Last7d, 2, "bad-date");
+        check(fbad.empty(), "phase10: Last7d con today invalido -> empty");
+    }
+
+    {
+        // TQL round-trip: breakout con sufijo :granularity.
+        State st0;
+        st0.stages.resize(2);
+        st0.stages[1].breakouts = {"ts:month"};
+        Aggregation a; a.fn = AggFn::Count; a.alias = "n";
+        st0.stages[1].aggregations.push_back(a);
+
+        std::vector<std::string> hdrs = {"ts", "amount"};
+        std::vector<ColumnType>  tys  = {ColumnType::Date, ColumnType::Float};
+        int eff = 2;
+        std::string text = tql::emit(st0, hdrs, tys);
+        check(text.find("\"ts:month\"") != std::string::npos,
+              "phase10 TQL: emit breakout granularity sufijo");
+
+        std::string err;
+        State st1;
+        bool ok = tql::apply(text, st1, hdrs, tys, nullptr, 2, eff, &err);
+        check(ok, "phase10 TQL: apply round-trip ok");
+        check(st1.stages.size() >= 2 && st1.stages[1].breakouts.size() == 1 &&
+              st1.stages[1].breakouts[0] == "ts:month",
+              "phase10 TQL: breakout granularity preservada");
+    }
+
+    {
+        // compute_stage aplica truncado de fecha cuando hay :granularity.
+        const char* cells[] = {
+            "2026-01-15", "10",
+            "2026-01-22", "20",
+            "2026-02-03", "30",
+            "2026-03-11", "40",
+        };
+        std::vector<std::string> hdrs = {"ts", "amount"};
+        std::vector<ColumnType>  tys  = {ColumnType::Date, ColumnType::Float};
+        Stage s1;
+        s1.breakouts = {"ts:month"};
+        Aggregation ag; ag.fn = AggFn::Count; ag.alias = "n";
+        s1.aggregations.push_back(ag);
+        auto out = compute_stage(cells, 4, 2, hdrs, tys, s1);
+        check(out.rows == 3, "phase10: trunc month -> 3 grupos (Jan/Feb/Mar)");
+        check(out.headers[0] == "ts:month", "phase10: header preserva sufijo");
+        // Verifica que algun valor de breakout es "2026-01"
+        bool found_jan = false;
+        for (int r = 0; r < out.rows; ++r) {
+            if (std::string(out.cells[r * out.cols + 0]) == "2026-01") found_jan = true;
+        }
+        check(found_jan, "phase10: trunc value '2026-01' presente");
+    }
+
+    // === phase10 hit-tests para click-to-drill ===
+    {
+        // nearest_index_1d
+        double xs[] = {0, 1, 2, 3, 4};
+        check(nearest_index_1d(0.0, xs, 5) == 0,    "phase10 hit: nearest_1d exact 0");
+        check(nearest_index_1d(2.4, xs, 5) == 2,    "phase10 hit: nearest_1d 2.4 -> 2");
+        check(nearest_index_1d(2.6, xs, 5) == 3,    "phase10 hit: nearest_1d 2.6 -> 3");
+        check(nearest_index_1d(-1.0, xs, 5) == 0,   "phase10 hit: nearest_1d clamp left");
+        check(nearest_index_1d(99.0, xs, 5) == 4,   "phase10 hit: nearest_1d clamp right");
+        check(nearest_index_1d(0.0, nullptr, 0) == -1, "phase10 hit: nearest_1d empty -> -1");
+    }
+
+    {
+        // nearest_index_2d
+        double xs[] = {0, 10, 5, 5};
+        double ys[] = {0, 0, 10, 5};
+        check(nearest_index_2d(0.1, 0.1, xs, ys, 4) == 0, "phase10 hit: nearest_2d cerca de (0,0)");
+        check(nearest_index_2d(9.9, 0.0, xs, ys, 4) == 1, "phase10 hit: nearest_2d cerca de (10,0)");
+        check(nearest_index_2d(5.0, 4.9, xs, ys, 4) == 3, "phase10 hit: nearest_2d cerca de (5,5)");
+        check(nearest_index_2d(0, 0, nullptr, nullptr, 0) == -1, "phase10 hit: nearest_2d empty -> -1");
+    }
+
+    {
+        // pie_angle (convencion ImPlot: 0 = top, sentido horario)
+        const double PI = 3.14159265358979323846;
+        double a;
+        a = pie_angle(0.5, 0.5, 0.5, 0.0); // top
+        check(std::fabs(a - 0.0) < 1e-9, "phase10 hit: pie_angle top = 0");
+        a = pie_angle(0.5, 0.5, 1.0, 0.5); // right -> PI/2
+        check(std::fabs(a - PI/2) < 1e-9, "phase10 hit: pie_angle right = PI/2");
+        a = pie_angle(0.5, 0.5, 0.5, 1.0); // bottom -> PI
+        check(std::fabs(a - PI) < 1e-9, "phase10 hit: pie_angle bottom = PI");
+        a = pie_angle(0.5, 0.5, 0.0, 0.5); // left -> 3*PI/2
+        check(std::fabs(a - 3*PI/2) < 1e-9, "phase10 hit: pie_angle left = 3PI/2");
+    }
+
+    {
+        // pie_slice_at_angle: 4 slices iguales -> cada uno cubre PI/2.
+        double sums[] = {1.0, 1.0, 1.0, 1.0};
+        const double PI = 3.14159265358979323846;
+        check(pie_slice_at_angle(0.0,        sums, 4) == 0, "phase10 hit: slice 0 (top)");
+        check(pie_slice_at_angle(PI/4,       sums, 4) == 0, "phase10 hit: slice 0 (mid)");
+        check(pie_slice_at_angle(PI/2 + 0.1, sums, 4) == 1, "phase10 hit: slice 1");
+        check(pie_slice_at_angle(PI + 0.1,   sums, 4) == 2, "phase10 hit: slice 2");
+        check(pie_slice_at_angle(3*PI/2 + 0.1, sums, 4) == 3, "phase10 hit: slice 3");
+
+        double zeros[] = {0.0, 0.0};
+        check(pie_slice_at_angle(0.5, zeros, 2) == -1, "phase10 hit: total 0 -> -1");
+        check(pie_slice_at_angle(0.0, nullptr, 0) == -1, "phase10 hit: empty -> -1");
+
+        double neg[] = {1.0, -1.0};
+        check(pie_slice_at_angle(0.5, neg, 2) == -1, "phase10 hit: neg sum -> -1");
+    }
+
+    {
+        // heatmap_cell_at
+        int rr, cc;
+        heatmap_cell_at(1.5, 2.5, 4, 3, rr, cc);
+        check(rr == 2 && cc == 1, "phase10 hit: heatmap (1.5,2.5) en 4x3 -> r2 c1");
+        heatmap_cell_at(-1, 0, 4, 3, rr, cc);
+        check(rr == -1 && cc == -1, "phase10 hit: heatmap fuera de rango");
+        heatmap_cell_at(0, 0, 0, 0, rr, cc);
+        check(rr == -1 && cc == -1, "phase10 hit: heatmap empty");
+    }
+
+    {
+        // E2E click-to-drill: simular pipeline stage1 agrupado, click en row idx 2.
+        State st;
+        st.stages.resize(2);
+        std::vector<std::string> hdrs = {"lang", "n"};
+        std::vector<ColumnType>  tys  = {ColumnType::String, ColumnType::Int};
+        st.stages[1].breakouts.push_back("lang");
+        st.stages[1].aggregations.push_back({AggFn::Count});
+        st.active_stage = 1;
+
+        // Stage 1 output simulado (3 grupos).
+        const char* g_cells[] = {
+            "go",  "3",
+            "py",  "2",
+            "cpp", "1",
+        };
+        StageOutput so;
+        so.cells.insert(so.cells.end(), g_cells, g_cells + 6);
+        so.rows = 3;
+        so.cols = 2;
+        so.headers = {"lang", "count"};
+
+        // Simular click en row idx 2 (cpp).
+        int clicked_row = 2;
+        int n_brk = (int)st.stages[1].breakouts.size();
+        check(n_brk == 1, "phase10 e2e: 1 breakout");
+        const char* v = so.cells[clicked_row * so.cols + 0];
+        std::string col_clean;
+        parse_breakout_granularity(so.headers[0], col_clean);
+        check(col_clean == "lang", "phase10 e2e: col_clean stripped OK");
+        st.stages[0].filters.push_back(make_drill_filter(0, v));
+        st.active_stage = 0;
+
+        check(st.active_stage == 0, "phase10 e2e: active retrocede a 0");
+        check(st.stages[0].filters.size() == 1, "phase10 e2e: 1 filter anadido");
+        check(st.stages[0].filters[0].col == 0 &&
+              st.stages[0].filters[0].op == Op::Eq &&
+              st.stages[0].filters[0].value == "cpp",
+              "phase10 e2e: filter Op::Eq col=0 value=cpp");
+    }
+
+    // === phase10 drill history (apply/undo step) ===
+    {
+        State st;
+        st.stages.resize(2);
+        st.active_stage = 1;
+
+        DrillStep step;
+        step.target_stage      = 0;
+        step.filter_pos        = 0;
+        step.prev_active_stage = 1;
+        step.added             = make_drill_filter(0, "go");
+
+        check(apply_drill_step(st, step), "phase10 hist: apply ok");
+        check(st.stages[0].filters.size() == 1, "phase10 hist: filter anadido");
+        check(st.stages[0].filters[0].value == "go", "phase10 hist: value preservado");
+        check(st.active_stage == 0, "phase10 hist: active = target");
+
+        check(undo_drill_step(st, step), "phase10 hist: undo ok");
+        check(st.stages[0].filters.empty(), "phase10 hist: filter eliminado");
+        check(st.active_stage == 1, "phase10 hist: active restaurado");
+
+        // Redo
+        check(apply_drill_step(st, step), "phase10 hist: redo ok");
+        check(st.stages[0].filters.size() == 1, "phase10 hist: redo filter de vuelta");
+        check(st.active_stage == 0, "phase10 hist: redo active retorna");
+
+        // Edge: target fuera de rango
+        DrillStep bad;
+        bad.target_stage = 99;
+        check(!apply_drill_step(st, bad), "phase10 hist: apply fuera de rango -> false");
+        check(!undo_drill_step(st, bad), "phase10 hist: undo fuera de rango -> false");
+
+        // Edge: pos invalida
+        DrillStep bad_pos = step;
+        bad_pos.filter_pos = 99;
+        check(!undo_drill_step(st, bad_pos), "phase10 hist: undo pos invalida -> false");
+    }
+
+    // === phase10 drill history: back/forward stack semantics simulado ===
+    {
+        State st;
+        st.stages.resize(3);
+        st.active_stage = 2;
+
+        std::vector<DrillStep> back_stack;
+        std::vector<DrillStep> fwd_stack;
+
+        auto drill = [&](int from, int target, int pos, int col, const std::string& v) {
+            DrillStep s;
+            s.target_stage      = target;
+            s.filter_pos        = pos;
+            s.prev_active_stage = from;
+            s.added             = make_drill_filter(col, v);
+            apply_drill_step(st, s);
+            back_stack.push_back(s);
+            fwd_stack.clear();
+        };
+
+        drill(2, 1, 0, 0, "go");
+        check(st.stages[1].filters.size() == 1, "phase10 hist seq: drill1 aplicado");
+        drill(1, 0, 0, 1, "10");
+        check(st.stages[0].filters.size() == 1, "phase10 hist seq: drill2 aplicado");
+        check(back_stack.size() == 2, "phase10 hist seq: back stack 2");
+        check(fwd_stack.empty(),       "phase10 hist seq: forward limpio");
+
+        // Back x1
+        DrillStep s = back_stack.back(); back_stack.pop_back();
+        undo_drill_step(st, s);
+        fwd_stack.push_back(s);
+        check(st.stages[0].filters.empty(), "phase10 hist seq: back deshace drill2");
+        check(st.active_stage == 1,         "phase10 hist seq: back restaura active=1");
+        check(fwd_stack.size() == 1,        "phase10 hist seq: fwd stack 1");
+
+        // Forward x1
+        s = fwd_stack.back(); fwd_stack.pop_back();
+        apply_drill_step(st, s);
+        back_stack.push_back(s);
+        check(st.stages[0].filters.size() == 1, "phase10 hist seq: forward reaplica");
+        check(st.active_stage == 0,             "phase10 hist seq: forward active=0");
+    }
+
+    // === phase10 row inspector (row_to_tsv + build_filters_from_row) ===
+    {
+        const char* cells[] = {
+            "go",  "10", "filter",
+            "py",  "20", "sma",
+            "go",  "30", "map",
+        };
+        std::vector<std::string> hdrs = {"lang", "n", "fn"};
+
+        std::string tsv = row_to_tsv(cells, 3, 3, 1, hdrs);
+        check(tsv == "lang\tn\tfn\r\npy\t20\tsma\r\n",
+              "phase10 inspect: row_to_tsv layout");
+
+        check(row_to_tsv(cells, 3, 3, -1, hdrs).empty(), "phase10 inspect: tsv neg row -> empty");
+        check(row_to_tsv(cells, 3, 3, 5, hdrs).empty(),  "phase10 inspect: tsv row oob -> empty");
+        check(row_to_tsv(cells, 3, 0, 0, hdrs).empty(),  "phase10 inspect: tsv cols=0 -> empty");
+
+        auto fs = build_filters_from_row(cells, 3, 3, 0);
+        check(fs.size() == 3, "phase10 inspect: 3 filters de row 0");
+        check(fs[0].col == 0 && fs[0].op == Op::Eq && fs[0].value == "go",
+              "phase10 inspect: filter[0] col=0 op=Eq value=go");
+        check(fs[2].value == "filter", "phase10 inspect: filter[2] value=filter");
+
+        // Row con celda vacia -> filter saltado
+        const char* sparse[] = {"a", "", "c"};
+        auto fs2 = build_filters_from_row(sparse, 1, 3, 0);
+        check(fs2.size() == 2 && fs2[0].col == 0 && fs2[1].col == 2,
+              "phase10 inspect: cells vacios salteados");
+
+        check(build_filters_from_row(cells, 3, 3, -1).empty(),
+              "phase10 inspect: build_filters row invalido -> empty");
+    }
+
+    // === phase10 drill-up ===
+    {
+        State st;
+        st.stages.resize(3);
+        st.active_stage = 2;
+        check(drill_up(st), "phase10 up: 2->1 ok");
+        check(st.active_stage == 1, "phase10 up: active=1");
+        check(drill_up(st), "phase10 up: 1->0 ok");
+        check(st.active_stage == 0, "phase10 up: active=0");
+        check(!drill_up(st), "phase10 up: 0 -> false");
+        check(st.active_stage == 0, "phase10 up: queda en 0");
+
+        // Filters no se mueven
+        State st2;
+        st2.stages.resize(2);
+        st2.active_stage = 1;
+        st2.stages[1].filters.push_back({0, Op::Eq, "x"});
+        drill_up(st2);
+        check(st2.stages[0].filters.empty() && st2.stages[1].filters.size() == 1,
+              "phase10 up: filters quedan en su stage");
+
+        State empty_st;
+        check(!drill_up(empty_st), "phase10 up: stages vacio -> false");
+    }
+
+    // === phase11: Lua subset validator + transpiler ===
+    {
+        std::string err;
+        // Subset OK: literales + ops
+        std::string e1 = tql_to_sql::transpile_expr("1 + 2", {}, err);
+        check(err.empty() && e1.find("1 + 2") != std::string::npos,
+              "phase11 lua: literal arith");
+
+        std::string e2 = tql_to_sql::transpile_expr("[a] + [b] * 2", {}, err);
+        check(err.empty() && e2.find("\"a\"") != std::string::npos &&
+              e2.find("\"b\"") != std::string::npos,
+              "phase11 lua: col refs + arith");
+
+        std::string e3 = tql_to_sql::transpile_expr("[a] .. \"_\" .. [b]", {}, err);
+        check(err.empty() && e3.find(" || ") != std::string::npos,
+              "phase11 lua: concat -> ||");
+
+        std::string e4 = tql_to_sql::transpile_expr(
+            "if [n] > 10 then \"big\" else \"small\" end", {}, err);
+        check(err.empty() && e4.find("CASE WHEN") != std::string::npos &&
+              e4.find("THEN") != std::string::npos && e4.find("ELSE") != std::string::npos,
+              "phase11 lua: if/then/else -> CASE");
+
+        std::string e5 = tql_to_sql::transpile_expr("math.floor([x] / 100)", {}, err);
+        check(err.empty() && e5.find("floor(") != std::string::npos,
+              "phase11 lua: math.floor");
+
+        std::string e6 = tql_to_sql::transpile_expr("string.upper([name])", {}, err);
+        check(err.empty() && e6.find("upper(") != std::string::npos,
+              "phase11 lua: string.upper");
+
+        std::string e7 = tql_to_sql::transpile_expr("string.sub([s], 1, 3)", {}, err);
+        check(err.empty() && e7.find("substring(") != std::string::npos,
+              "phase11 lua: string.sub 3-arg");
+
+        std::string e8 = tql_to_sql::transpile_expr("not ([x] == nil)", {}, err);
+        check(err.empty() && e8.find("NOT") != std::string::npos && e8.find("NULL") != std::string::npos,
+              "phase11 lua: not + nil");
+
+        std::string e9 = tql_to_sql::transpile_expr("tonumber([n])", {}, err);
+        check(err.empty() && e9.find("CAST(") != std::string::npos,
+              "phase11 lua: tonumber -> CAST DOUBLE");
+
+        // Fuera subset: 9 categorias rechazadas
+        err.clear();
+        check(tql_to_sql::transpile_expr("function() return 1 end", {}, err).empty()
+              && err.find("closures") != std::string::npos,
+              "phase11 lua: function closure rechazado");
+
+        err.clear();
+        check(tql_to_sql::transpile_expr("local x = 1", {}, err).empty()
+              && err.find("local") != std::string::npos,
+              "phase11 lua: local rechazado");
+
+        err.clear();
+        check(tql_to_sql::transpile_expr("for i=1,10 do end", {}, err).empty()
+              && err.find("loops") != std::string::npos,
+              "phase11 lua: for loop rechazado");
+
+        err.clear();
+        check(tql_to_sql::transpile_expr("while true do end", {}, err).empty()
+              && err.find("loops") != std::string::npos,
+              "phase11 lua: while loop rechazado");
+
+        err.clear();
+        check(tql_to_sql::transpile_expr("{1,2,3}", {}, err).empty()
+              && err.find("table") != std::string::npos,
+              "phase11 lua: table literal rechazado");
+
+        err.clear();
+        check(tql_to_sql::transpile_expr("io.read()", {}, err).empty()
+              && err.find("io") != std::string::npos,
+              "phase11 lua: io.* rechazado");
+
+        err.clear();
+        check(tql_to_sql::transpile_expr("string.gsub([s], \"a\", \"b\")", {}, err).empty()
+              && err.find("whitelist") != std::string::npos,
+              "phase11 lua: string.gsub no whitelisted");
+
+        err.clear();
+        check(tql_to_sql::transpile_expr("print([x])", {}, err).empty()
+              && err.find("print") != std::string::npos,
+              "phase11 lua: print rechazado");
+
+        err.clear();
+        check(tql_to_sql::transpile_expr("[a]; [b]", {}, err).empty()
+              && err.find("multi-statement") != std::string::npos,
+              "phase11 lua: ';' multi-stmt rechazado");
+
+        // is_transpilable wrapper
+        std::string werr;
+        check(tql_to_sql::is_transpilable("[a] + 1", werr), "phase11 lua: is_transpilable OK");
+        check(!tql_to_sql::is_transpilable("function() end", werr),
+              "phase11 lua: is_transpilable false para closure");
+    }
+
+    // === phase11: TQL State -> SQL DuckDB emit ===
+    {
+        // Setup: 1 tabla "users" con cols lang,n.
+        TableInput t;
+        t.name = "users";
+        t.headers = {"lang", "n"};
+        t.types = {ColumnType::String, ColumnType::Int};
+        // Cells no usado por emit (solo schema).
+        std::vector<TableInput> tables = {t};
+
+        // Caso 1: stage 0 simple (sin filters ni sort)
+        {
+            State st;
+            st.stages.resize(1);
+            auto e = tql_to_sql::emit_sql(st, tables);
+            check(e.error.empty(), "phase11 sql: empty pipeline -> no error");
+            check(e.sql.find("WITH t0") != std::string::npos &&
+                  e.sql.find("FROM \"users\"") != std::string::npos &&
+                  e.sql.find("SELECT * FROM t0") != std::string::npos,
+                  "phase11 sql: stage0 SELECT * FROM users");
+        }
+
+        // Caso 2: stage 0 filter + sort
+        {
+            State st;
+            st.stages.resize(1);
+            st.stages[0].filters.push_back({0, Op::Eq, "go"});
+            st.stages[0].filters.push_back({1, Op::Gt, "10"});
+            st.stages[0].sorts.push_back({"n", true});
+            auto e = tql_to_sql::emit_sql(st, tables);
+            check(e.error.empty(), "phase11 sql: filter+sort OK");
+            check(e.sql.find("WHERE") != std::string::npos &&
+                  e.sql.find("\"lang\" = ?") != std::string::npos &&
+                  e.sql.find("\"n\" > ?") != std::string::npos,
+                  "phase11 sql: filter clauses");
+            check(e.params.size() == 2 && e.params[0] == "go" && e.params[1] == "10",
+                  "phase11 sql: params bound");
+            check(e.sql.find("ORDER BY \"n\" DESC") != std::string::npos,
+                  "phase11 sql: ORDER BY desc");
+        }
+
+        // Caso 3: stage 1 group + count
+        {
+            State st;
+            st.stages.resize(2);
+            st.stages[1].breakouts.push_back("lang");
+            st.stages[1].aggregations.push_back({AggFn::Count});
+            st.active_stage = 1;
+            auto e = tql_to_sql::emit_sql(st, tables);
+            check(e.error.empty(), "phase11 sql: group ok");
+            check(e.sql.find("t1 AS") != std::string::npos &&
+                  e.sql.find("COUNT(*)") != std::string::npos &&
+                  e.sql.find("GROUP BY") != std::string::npos &&
+                  e.sql.find("SELECT * FROM t1") != std::string::npos,
+                  "phase11 sql: stage1 CTE + COUNT + GROUP BY");
+        }
+
+        // Caso 4: granularity :month -> date_trunc
+        {
+            State st;
+            st.stages.resize(2);
+            st.stages[1].breakouts.push_back("ts:month");
+            st.stages[1].aggregations.push_back({AggFn::Sum, "n"});
+            st.active_stage = 1;
+            TableInput ts_t;
+            ts_t.name = "events";
+            ts_t.headers = {"ts", "n"};
+            ts_t.types = {ColumnType::Date, ColumnType::Int};
+            std::vector<TableInput> tt = {ts_t};
+            auto e = tql_to_sql::emit_sql(st, tt);
+            check(e.error.empty(), "phase11 sql: granularity ok");
+            check(e.sql.find("date_trunc('month'") != std::string::npos &&
+                  e.sql.find("SUM(\"n\")") != std::string::npos,
+                  "phase11 sql: date_trunc + SUM");
+        }
+
+        // Caso 5: aggregations p25/median/p99
+        {
+            State st;
+            st.stages.resize(2);
+            st.stages[1].breakouts.push_back("lang");
+            st.stages[1].aggregations.push_back({AggFn::Median, "n"});
+            st.stages[1].aggregations.push_back({AggFn::P25, "n"});
+            st.stages[1].aggregations.push_back({AggFn::P99, "n"});
+            st.active_stage = 1;
+            auto e = tql_to_sql::emit_sql(st, tables);
+            check(e.error.empty(), "phase11 sql: percentiles ok");
+            check(e.sql.find("quantile_cont(\"n\", 0.5)") != std::string::npos &&
+                  e.sql.find("quantile_cont(\"n\", 0.25)") != std::string::npos &&
+                  e.sql.find("quantile_cont(\"n\", 0.99)") != std::string::npos,
+                  "phase11 sql: quantile_cont calls");
+        }
+
+        // Caso 6: joins 4 strategies
+        {
+            State st;
+            st.stages.resize(1);
+            Join jn;
+            jn.alias = "o";
+            jn.source = "orders";
+            jn.on.push_back({"user_id", "user_id"});
+            jn.strategy = JoinStrategy::Left;
+            st.joins.push_back(jn);
+            TableInput u, o;
+            u.name = "users";
+            u.headers = {"user_id", "name"};
+            u.types = {ColumnType::String, ColumnType::String};
+            o.name = "orders";
+            o.headers = {"user_id", "amount"};
+            o.types = {ColumnType::String, ColumnType::Int};
+            std::vector<TableInput> tt = {u, o};
+            auto e = tql_to_sql::emit_sql(st, tt);
+            check(e.error.empty(), "phase11 sql: join ok");
+            check(e.sql.find("LEFT JOIN \"orders\" AS \"o\"") != std::string::npos &&
+                  e.sql.find("ON \"users\".\"user_id\" = \"o\".\"user_id\"") != std::string::npos,
+                  "phase11 sql: LEFT JOIN ON syntax");
+
+            // Inner
+            st.joins[0].strategy = JoinStrategy::Inner;
+            auto e2 = tql_to_sql::emit_sql(st, tt);
+            check(e2.sql.find("INNER JOIN") != std::string::npos, "phase11 sql: INNER JOIN");
+
+            // Right
+            st.joins[0].strategy = JoinStrategy::Right;
+            auto e3 = tql_to_sql::emit_sql(st, tt);
+            check(e3.sql.find("RIGHT JOIN") != std::string::npos, "phase11 sql: RIGHT JOIN");
+
+            // Full
+            st.joins[0].strategy = JoinStrategy::Full;
+            auto e4 = tql_to_sql::emit_sql(st, tt);
+            check(e4.sql.find("FULL OUTER JOIN") != std::string::npos, "phase11 sql: FULL OUTER JOIN");
+        }
+
+        // Caso 7: derived col subset -> SQL expression
+        {
+            State st;
+            st.stages.resize(1);
+            DerivedColumn d;
+            d.name = "size_kb";
+            d.source_col = -1;
+            d.formula = "[n] / 1024.0";
+            d.type = ColumnType::Float;
+            st.stages[0].derived.push_back(d);
+            auto e = tql_to_sql::emit_sql(st, tables);
+            check(e.error.empty(), "phase11 sql: derived subset ok");
+            check(e.sql.find("\"n\" / 1024") != std::string::npos &&
+                  e.sql.find("AS \"size_kb\"") != std::string::npos,
+                  "phase11 sql: derived expression + alias");
+        }
+
+        // Caso 8: derived col FUERA subset -> warning + skip
+        {
+            State st;
+            st.stages.resize(1);
+            DerivedColumn d;
+            d.name = "bad";
+            d.source_col = -1;
+            d.formula = "string.gsub([n], \"a\", \"b\")";
+            d.type = ColumnType::String;
+            st.stages[0].derived.push_back(d);
+            auto e = tql_to_sql::emit_sql(st, tables);
+            check(e.error.empty(), "phase11 sql: derived fuera subset NO bloquea emit");
+            check(!e.warnings.empty() &&
+                  e.warnings[0].find("out of SQL subset") != std::string::npos,
+                  "phase11 sql: warning derived fuera subset");
+            check(e.sql.find("\"bad\"") == std::string::npos,
+                  "phase11 sql: derived skip cuando fuera subset");
+        }
+
+        // Caso 9: empty tables -> error
+        {
+            State st;
+            st.stages.resize(1);
+            std::vector<TableInput> empty;
+            auto e = tql_to_sql::emit_sql(st, empty);
+            check(!e.error.empty() && e.error.find("no input tables") != std::string::npos,
+                  "phase11 sql: empty tables -> error");
+        }
+
+        // Caso 10: stage 0 con LIKE (Contains)
+        {
+            State st;
+            st.stages.resize(1);
+            st.stages[0].filters.push_back({0, Op::Contains, "go"});
+            auto e = tql_to_sql::emit_sql(st, tables);
+            check(e.error.empty(), "phase11 sql: LIKE Contains ok");
+            check(e.sql.find("LIKE ?") != std::string::npos &&
+                  e.params.size() == 1 && e.params[0] == "%go%",
+                  "phase11 sql: Contains -> LIKE %go%");
+        }
+    }
+
+    // === phase11: LLM client (mock, no red) ===
+    {
+        llm_anthropic::AskInput in;
+        in.question = "show top 10 langs";
+        in.tql_current = "return { stages = {} }";
+        in.col_names = {"lang", "n"};
+        in.col_types = {ColumnType::String, ColumnType::Int};
+        in.mode = llm_anthropic::OutputMode::TQL;
+        std::string body = llm_anthropic::build_request_body(in);
+        check(body.find("\"model\":\"claude-sonnet-4-6\"") != std::string::npos,
+              "phase11 llm: default model");
+        check(body.find("\"max_tokens\":8192") != std::string::npos,
+              "phase11 llm: max_tokens");
+        check(body.find("\\\"system\\\"") == std::string::npos /* not double-escaped */,
+              "phase11 llm: system not double-escaped");
+        check(body.find("Available columns") != std::string::npos,
+              "phase11 llm: schema block present");
+        check(body.find("show top 10 langs") != std::string::npos,
+              "phase11 llm: question present");
+        check(body.find("TQL") != std::string::npos,
+              "phase11 llm: system mentions TQL");
+
+        in.mode = llm_anthropic::OutputMode::SQL;
+        std::string body_sql = llm_anthropic::build_request_body(in);
+        check(body_sql.find("DuckDB") != std::string::npos,
+              "phase11 llm: SQL mode mentions DuckDB");
+    }
+
+    {
+        // extract_code_block
+        std::string raw1 = "Here you go:\n```lua\nreturn { x = 1 }\n```\nDone!";
+        std::string code = llm_anthropic::extract_code_block(raw1, "lua");
+        check(code == "return { x = 1 }", "phase11 llm: extract ```lua block");
+
+        std::string raw2 = "Sure:\n```\nplain code\n```";
+        std::string code2 = llm_anthropic::extract_code_block(raw2, "lua");
+        check(code2 == "plain code", "phase11 llm: extract bare ```");
+
+        std::string raw3 = "no fences here";
+        std::string code3 = llm_anthropic::extract_code_block(raw3, "lua");
+        check(code3 == "no fences here", "phase11 llm: no fence -> stripped");
+
+        std::string raw4 = "```sql\nSELECT 1;\n```";
+        std::string code4 = llm_anthropic::extract_code_block(raw4, "sql");
+        check(code4 == "SELECT 1;", "phase11 llm: extract ```sql");
+    }
+
+    {
+        // parse_response_text from JSON
+        std::string j = "{\"id\":\"x\",\"content\":[{\"type\":\"text\",\"text\":\"hello\\nworld\"}],\"role\":\"assistant\"}";
+        std::string t = llm_anthropic::parse_response_text(j);
+        check(t == "hello\nworld", "phase11 llm: parse text content");
+
+        std::string j2 = "{\"content\":[{\"type\":\"text\",\"text\":\"\\\"quoted\\\"\"}]}";
+        std::string t2 = llm_anthropic::parse_response_text(j2);
+        check(t2 == "\"quoted\"", "phase11 llm: parse quoted escape");
+
+        std::string j3 = "{\"error\":\"foo\"}";
+        std::string t3 = llm_anthropic::parse_response_text(j3);
+        check(t3.empty(), "phase11 llm: no text -> empty");
+    }
+
+    {
+        // Mock end-to-end via FN_LLM_MOCK_RESPONSE (portable Linux/Mingw via putenv).
+        const char* mock_kv =
+            "FN_LLM_MOCK_RESPONSE={\"content\":[{\"type\":\"text\",\"text\":\"```lua\\nreturn { mock = true }\\n```\"}]}";
+        putenv((char*)mock_kv);
+        llm_anthropic::AskInput in;
+        in.question = "q";
+        in.col_names = {"a"};
+        in.col_types = {ColumnType::String};
+        auto r = llm_anthropic::ask(in);
+        check(r.error.empty(), "phase11 llm mock: no error");
+        check(r.code == "return { mock = true }", "phase11 llm mock: code extracted");
+        // Unset: putenv con "VAR=" deja vacio (suficiente para nuestro check `*mock`).
+        putenv((char*)"FN_LLM_MOCK_RESPONSE=");
+    }
+
    std::printf("\n=== %d passed, %d failed ===\n", passed, failed);
    return failed == 0 ? 0 : 1;
 }
@@ -652,7 +652,8 @@ bool apply(const std::string& lua_text, State& state,
            }
            lua_pop(L, 1);

-            // breakout (solo aplica stages >= 1, no-op silencioso si stage 0)
+            // breakout (solo aplica stages >= 1, no-op silencioso si stage 0).
+            // Acepta sufijo ":granularity" para cols Date (fase 10).
            lua_getfield(L, -1, "breakout");
            if (lua_istable(L, -1)) {
                int n = (int)lua_rawlen(L, -1);
@@ -660,8 +661,10 @@ bool apply(const std::string& lua_text, State& state,
                    lua_rawgeti(L, -1, i);
                    if (lua_isstring(L, -1)) {
                        std::string bn = lua_tostring(L, -1);
-                        if (find_orig_col(cur_headers, bn) < 0) {
-                            warn("stage " + std::to_string(si - 1) + ": breakout col \"" + bn + "\" not in input headers");
+                        std::string clean;
+                        parse_breakout_granularity(bn, clean);
+                        if (find_orig_col(cur_headers, clean) < 0) {
+                            warn("stage " + std::to_string(si - 1) + ": breakout col \"" + clean + "\" not in input headers");
                        }
                        stg.breakouts.emplace_back(bn);
                    }
@@ -0,0 +1,862 @@
+// tql_to_sql.cpp — pure walker TQL -> SQL DuckDB + Lua subset transpiler.
+// Ver issue 0080. Sin DuckDB linkado.
+#include "tql_to_sql.h"
+
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <set>
+#include <sstream>
+#include <unordered_map>
+
+namespace tql_to_sql {
+
+using namespace data_table;
+
+// ============================================================================
+// Lua subset tokenizer + recursive-descent expression parser -> SQL string.
+// ============================================================================
+
+namespace {
+
+struct Tok {
+    enum Kind {
+        EndT, NumT, StrT, IdentT, ColT,
+        // operators / keywords
+        Plus, Minus, Star, Slash, Percent, ConcatT,
+        Eq, Neq, Lt, Lte, Gt, Gte,
+        AndT, OrT, NotT,
+        IfT, ThenT, ElseT, EndKW,
+        LParen, RParen, Comma, Dot,
+        TrueT, FalseT, NilT,
+    } kind = EndT;
+    std::string text;  // raw token texto (para idents/numbers/strings)
+};
+
+// Categorias prohibidas: token literal -> mensaje.
+const std::unordered_map<std::string, const char*>& forbidden_keywords() {
+    static const std::unordered_map<std::string, const char*> M = {
+        {"function", "closures not allowed in SQL transpile subset"},
+        {"local",    "local declarations not allowed"},
+        {"for",      "loops not allowed"},
+        {"while",    "loops not allowed"},
+        {"repeat",   "loops not allowed"},
+        {"do",       "block statements not allowed"},
+        {"return",   "explicit return not allowed (formula is implicit expression)"},
+        {"goto",     "goto not allowed"},
+        {"break",    "break not allowed (no loops)"},
+        // io/os/debug/coroutines
+        {"io",       "io.* access not allowed"},
+        {"os",       "os.* access not allowed"},
+        {"debug",    "debug.* access not allowed"},
+        {"package",  "package access not allowed"},
+        {"require",  "require not allowed"},
+        {"coroutine","coroutines not allowed"},
+        {"setmetatable","metatables not allowed"},
+        {"getmetatable","metatables not allowed"},
+        {"rawget",   "rawget not allowed"},
+        {"rawset",   "rawset not allowed"},
+        {"pcall",    "pcall not allowed"},
+        {"xpcall",   "xpcall not allowed"},
+        {"print",    "print not allowed (SQL has no side effects)"},
+    };
+    return M;
+}
+
+// Whitelist de funciones SQL-transpilables: lua name -> SQL function template.
+// Template usa $1, $2, ... como placeholders de argumentos.
+struct FnMap { int min_args; int max_args; const char* sql_tmpl; };
+
+const std::unordered_map<std::string, FnMap>& fn_whitelist() {
+    static const std::unordered_map<std::string, FnMap> M = {
+        // math.*
+        {"math.floor", {1, 1, "floor($1)"}},
+        {"math.ceil",  {1, 1, "ceiling($1)"}},
+        {"math.abs",   {1, 1, "abs($1)"}},
+        {"math.sqrt",  {1, 1, "sqrt($1)"}},
+        {"math.sin",   {1, 1, "sin($1)"}},
+        {"math.cos",   {1, 1, "cos($1)"}},
+        {"math.log",   {1, 1, "ln($1)"}},
+        {"math.exp",   {1, 1, "exp($1)"}},
+        {"math.min",   {2, 2, "least($1, $2)"}},
+        {"math.max",   {2, 2, "greatest($1, $2)"}},
+        // string.*
+        {"string.upper", {1, 1, "upper($1)"}},
+        {"string.lower", {1, 1, "lower($1)"}},
+        {"string.len",   {1, 1, "length($1)"}},
+        {"string.sub",   {2, 3, "/*SUBSTRING*/"}},   // manejo especial: argc 2 vs 3
+        // top-level
+        {"tostring",   {1, 1, "CAST($1 AS VARCHAR)"}},
+        {"tonumber",   {1, 1, "CAST($1 AS DOUBLE)"}},
+    };
+    return M;
+}
+
+// Identifier SQL-safe: si tiene caracteres especiales o coincide con keyword,
+// usar `"col"`. Aqui simplificado: siempre quote con dobles comillas para
+// preservar case y permitir `:` (sufijo granularity).
+std::string sql_ident(const std::string& name) {
+    std::string out;
+    out.reserve(name.size() + 4);
+    out += '"';
+    for (char c : name) {
+        if (c == '"') out += "\"\"";   // escape
+        else          out += c;
+    }
+    out += '"';
+    return out;
+}
+
+std::string sql_string_literal(const std::string& s) {
+    std::string out;
+    out.reserve(s.size() + 4);
+    out += '\'';
+    for (char c : s) {
+        if (c == '\'') out += "''";
+        else           out += c;
+    }
+    out += '\'';
+    return out;
+}
+
+class Lexer {
+public:
+    Lexer(const std::string& src) : src_(src) {}
+
+    // Devuelve true si parsea OK. False con err en error_.
+    bool tokenize(std::vector<Tok>& out) {
+        size_t i = 0;
+        while (i < src_.size()) {
+            char c = src_[i];
+            if (std::isspace((unsigned char)c)) { ++i; continue; }
+            // Lua line comment
+            if (c == '-' && i + 1 < src_.size() && src_[i+1] == '-') {
+                while (i < src_.size() && src_[i] != '\n') ++i;
+                continue;
+            }
+            if (c == '[' ) {
+                // col ref [identifier]
+                size_t j = i + 1;
+                std::string name;
+                while (j < src_.size() && src_[j] != ']') {
+                    name += src_[j];
+                    ++j;
+                }
+                if (j >= src_.size()) { error_ = "unterminated [col] ref"; return false; }
+                Tok t; t.kind = Tok::ColT; t.text = name;
+                out.push_back(t);
+                i = j + 1;
+                continue;
+            }
+            if (c == '"' || c == '\'') {
+                char q = c;
+                ++i;
+                std::string s;
+                while (i < src_.size() && src_[i] != q) {
+                    if (src_[i] == '\\' && i + 1 < src_.size()) {
+                        char esc = src_[i+1];
+                        if      (esc == 'n')  s += '\n';
+                        else if (esc == 't')  s += '\t';
+                        else if (esc == '\\') s += '\\';
+                        else if (esc == '\'') s += '\'';
+                        else if (esc == '"')  s += '"';
+                        else                  s += esc;
+                        i += 2;
+                    } else {
+                        s += src_[i++];
+                    }
+                }
+                if (i >= src_.size()) { error_ = "unterminated string literal"; return false; }
+                ++i;
+                Tok t; t.kind = Tok::StrT; t.text = s;
+                out.push_back(t);
+                continue;
+            }
+            if (std::isdigit((unsigned char)c) || (c == '.' && i + 1 < src_.size() && std::isdigit((unsigned char)src_[i+1]))) {
+                std::string n;
+                bool seen_dot = false;
+                while (i < src_.size()) {
+                    char d = src_[i];
+                    if (std::isdigit((unsigned char)d)) { n += d; ++i; }
+                    else if (d == '.' && !seen_dot) { n += d; seen_dot = true; ++i; }
+                    else break;
+                }
+                Tok t; t.kind = Tok::NumT; t.text = n;
+                out.push_back(t);
+                continue;
+            }
+            if (std::isalpha((unsigned char)c) || c == '_') {
+                std::string id;
+                while (i < src_.size() &&
+                       (std::isalnum((unsigned char)src_[i]) || src_[i] == '_')) {
+                    id += src_[i++];
+                }
+                // Check forbidden keywords y mapeo a tokens.
+                auto& F = forbidden_keywords();
+                auto fit = F.find(id);
+                if (fit != F.end()) {
+                    error_ = std::string("token '") + id + "': " + fit->second;
+                    return false;
+                }
+                Tok t;
+                if      (id == "and")   t.kind = Tok::AndT;
+                else if (id == "or")    t.kind = Tok::OrT;
+                else if (id == "not")   t.kind = Tok::NotT;
+                else if (id == "if")    t.kind = Tok::IfT;
+                else if (id == "then")  t.kind = Tok::ThenT;
+                else if (id == "else")  t.kind = Tok::ElseT;
+                else if (id == "end")   t.kind = Tok::EndKW;
+                else if (id == "true")  t.kind = Tok::TrueT;
+                else if (id == "false") t.kind = Tok::FalseT;
+                else if (id == "nil")   t.kind = Tok::NilT;
+                else                    { t.kind = Tok::IdentT; t.text = id; }
+                out.push_back(t);
+                continue;
+            }
+            // Operators
+            auto emit = [&](Tok::Kind k, int len) {
+                Tok t; t.kind = k; out.push_back(t); i += (size_t)len;
+            };
+            if (c == '+') { emit(Tok::Plus,  1); continue; }
+            if (c == '-') { emit(Tok::Minus, 1); continue; }
+            if (c == '*') { emit(Tok::Star,  1); continue; }
+            if (c == '/') { emit(Tok::Slash, 1); continue; }
+            if (c == '%') { emit(Tok::Percent,1); continue; }
+            if (c == '(') { emit(Tok::LParen, 1); continue; }
+            if (c == ')') { emit(Tok::RParen, 1); continue; }
+            if (c == ',') { emit(Tok::Comma,  1); continue; }
+            if (c == '.') {
+                if (i + 1 < src_.size() && src_[i+1] == '.') {
+                    if (i + 2 < src_.size() && src_[i+2] == '.') {
+                        error_ = "'...' vararg not allowed"; return false;
+                    }
+                    emit(Tok::ConcatT, 2); continue;
+                }
+                emit(Tok::Dot, 1); continue;
+            }
+            if (c == '=') {
+                if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Eq, 2); continue; }
+                error_ = "single '=' (assignment) not allowed"; return false;
+            }
+            if (c == '~') {
+                if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Neq, 2); continue; }
+                error_ = "stray '~'"; return false;
+            }
+            if (c == '<') {
+                if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Lte, 2); continue; }
+                emit(Tok::Lt, 1); continue;
+            }
+            if (c == '>') {
+                if (i + 1 < src_.size() && src_[i+1] == '=') { emit(Tok::Gte, 2); continue; }
+                emit(Tok::Gt, 1); continue;
+            }
+            if (c == '{') { error_ = "table literals '{...}' not allowed"; return false; }
+            if (c == '}') { error_ = "stray '}'"; return false; }
+            if (c == ';') { error_ = "multi-statement not allowed"; return false; }
+            if (c == '#') { error_ = "length '#' operator not allowed"; return false; }
+            if (c == ':') { error_ = "method calls ':' not allowed"; return false; }
+            error_ = std::string("unexpected character '") + c + "'";
+            return false;
+        }
+        Tok t; t.kind = Tok::EndT;
+        out.push_back(t);
+        return true;
+    }
+
+    const std::string& error() const { return error_; }
+private:
+    const std::string& src_;
+    std::string error_;
+};
+
+class Parser {
+public:
+    Parser(const std::vector<Tok>& toks,
+           const std::vector<std::string>& headers)
+        : toks_(toks), headers_(headers) {}
+
+    // expr := ternary
+    // ternary := if/then/else | logic_or
+    bool parse_expr(std::string& out) {
+        return parse_ternary(out);
+    }
+
+    bool parse_ternary(std::string& out) {
+        if (peek(0).kind == Tok::IfT) {
+            ++pos_;
+            std::string a, b, c;
+            if (!parse_logic_or(a)) return false;
+            if (!eat(Tok::ThenT, "'then' expected after 'if'"))  return false;
+            if (!parse_ternary(b)) return false;
+            if (!eat(Tok::ElseT, "'else' expected (subset requires else branch)")) return false;
+            if (!parse_ternary(c)) return false;
+            if (!eat(Tok::EndKW, "'end' expected to close 'if'")) return false;
+            out = "CASE WHEN " + a + " THEN " + b + " ELSE " + c + " END";
+            return true;
+        }
+        return parse_logic_or(out);
+    }
+
+    bool parse_logic_or(std::string& out) {
+        if (!parse_logic_and(out)) return false;
+        while (peek(0).kind == Tok::OrT) {
+            ++pos_;
+            std::string rhs;
+            if (!parse_logic_and(rhs)) return false;
+            out = "(" + out + " OR " + rhs + ")";
+        }
+        return true;
+    }
+
+    bool parse_logic_and(std::string& out) {
+        if (!parse_not(out)) return false;
+        while (peek(0).kind == Tok::AndT) {
+            ++pos_;
+            std::string rhs;
+            if (!parse_not(rhs)) return false;
+            out = "(" + out + " AND " + rhs + ")";
+        }
+        return true;
+    }
+
+    bool parse_not(std::string& out) {
+        if (peek(0).kind == Tok::NotT) {
+            ++pos_;
+            std::string e;
+            if (!parse_not(e)) return false;
+            out = "NOT (" + e + ")";
+            return true;
+        }
+        return parse_comparison(out);
+    }
+
+    bool parse_comparison(std::string& out) {
+        if (!parse_concat(out)) return false;
+        while (true) {
+            Tok::Kind k = peek(0).kind;
+            const char* op = nullptr;
+            if      (k == Tok::Eq)  op = " = ";
+            else if (k == Tok::Neq) op = " <> ";
+            else if (k == Tok::Lt)  op = " < ";
+            else if (k == Tok::Lte) op = " <= ";
+            else if (k == Tok::Gt)  op = " > ";
+            else if (k == Tok::Gte) op = " >= ";
+            else break;
+            ++pos_;
+            std::string rhs;
+            if (!parse_concat(rhs)) return false;
+            out = "(" + out + op + rhs + ")";
+        }
+        return true;
+    }
+
+    bool parse_concat(std::string& out) {
+        if (!parse_additive(out)) return false;
+        while (peek(0).kind == Tok::ConcatT) {
+            ++pos_;
+            std::string rhs;
+            if (!parse_additive(rhs)) return false;
+            out = "(" + out + " || " + rhs + ")";
+        }
+        return true;
+    }
+
+    bool parse_additive(std::string& out) {
+        if (!parse_multiplicative(out)) return false;
+        while (peek(0).kind == Tok::Plus || peek(0).kind == Tok::Minus) {
+            const char* op = (peek(0).kind == Tok::Plus) ? " + " : " - ";
+            ++pos_;
+            std::string rhs;
+            if (!parse_multiplicative(rhs)) return false;
+            out = "(" + out + op + rhs + ")";
+        }
+        return true;
+    }
+
+    bool parse_multiplicative(std::string& out) {
+        if (!parse_unary(out)) return false;
+        while (peek(0).kind == Tok::Star || peek(0).kind == Tok::Slash || peek(0).kind == Tok::Percent) {
+            const char* op = (peek(0).kind == Tok::Star) ? " * "
+                           : (peek(0).kind == Tok::Slash) ? " / " : " % ";
+            ++pos_;
+            std::string rhs;
+            if (!parse_unary(rhs)) return false;
+            out = "(" + out + op + rhs + ")";
+        }
+        return true;
+    }
+
+    bool parse_unary(std::string& out) {
+        if (peek(0).kind == Tok::Minus) {
+            ++pos_;
+            std::string e;
+            if (!parse_unary(e)) return false;
+            out = "(-" + e + ")";
+            return true;
+        }
+        return parse_primary(out);
+    }
+
+    bool parse_primary(std::string& out) {
+        Tok t = peek(0);
+        if (t.kind == Tok::NumT) {
+            ++pos_;
+            out = t.text;
+            return true;
+        }
+        if (t.kind == Tok::StrT) {
+            ++pos_;
+            out = sql_string_literal(t.text);
+            return true;
+        }
+        if (t.kind == Tok::TrueT)  { ++pos_; out = "TRUE";  return true; }
+        if (t.kind == Tok::FalseT) { ++pos_; out = "FALSE"; return true; }
+        if (t.kind == Tok::NilT)   { ++pos_; out = "NULL";  return true; }
+        if (t.kind == Tok::ColT) {
+            // Check col exists (warning, not error).
+            ++pos_;
+            (void)headers_;  // currently not validating — caller can do that
+            out = sql_ident(t.text);
+            return true;
+        }
+        if (t.kind == Tok::LParen) {
+            ++pos_;
+            std::string e;
+            if (!parse_expr(e)) return false;
+            if (!eat(Tok::RParen, "expected ')'")) return false;
+            out = "(" + e + ")";
+            return true;
+        }
+        if (t.kind == Tok::IdentT) {
+            // Function call: identifier ("." identifier)? "(" args ")"
+            std::string name = t.text;
+            ++pos_;
+            if (peek(0).kind == Tok::Dot) {
+                ++pos_;
+                if (peek(0).kind != Tok::IdentT) {
+                    error_ = "expected identifier after '.'";
+                    return false;
+                }
+                name += "." + peek(0).text;
+                ++pos_;
+            }
+            if (peek(0).kind != Tok::LParen) {
+                error_ = "bare identifier '" + name +
+                         "' not allowed (only [col] refs + whitelisted fn calls)";
+                return false;
+            }
+            ++pos_;  // consume '('
+            std::vector<std::string> args;
+            if (peek(0).kind != Tok::RParen) {
+                while (true) {
+                    std::string a;
+                    if (!parse_expr(a)) return false;
+                    args.push_back(a);
+                    if (peek(0).kind == Tok::Comma) { ++pos_; continue; }
+                    break;
+                }
+            }
+            if (!eat(Tok::RParen, "expected ')' closing function args")) return false;
+            // Validate against whitelist
+            auto& W = fn_whitelist();
+            auto wit = W.find(name);
+            if (wit == W.end()) {
+                error_ = "function '" + name +
+                         "' not in SQL transpile whitelist (math.*, string.upper/lower/len/sub, tostring, tonumber)";
+                return false;
+            }
+            const FnMap& fm = wit->second;
+            if ((int)args.size() < fm.min_args || (int)args.size() > fm.max_args) {
+                std::ostringstream os;
+                os << "function '" << name << "' takes " << fm.min_args;
+                if (fm.max_args != fm.min_args) os << ".." << fm.max_args;
+                os << " args, got " << args.size();
+                error_ = os.str();
+                return false;
+            }
+            // Casos especiales
+            if (name == "string.sub") {
+                // Lua: string.sub(s, i [, j]) — i/j 1-based, inclusive.
+                // SQL DuckDB: substring(s, i, count). count = j - i + 1.
+                if (args.size() == 2) {
+                    // sin j -> hasta el final. DuckDB substring(s, i) acepta.
+                    out = "substring(" + args[0] + ", " + args[1] + ")";
+                } else {
+                    out = "substring(" + args[0] + ", " + args[1] +
+                          ", (" + args[2] + ") - (" + args[1] + ") + 1)";
+                }
+                return true;
+            }
+            // Generico: substituir $1..$N en template.
+            std::string s = fm.sql_tmpl;
+            for (int i = 0; i < (int)args.size(); ++i) {
+                char ph[6];
+                std::snprintf(ph, sizeof(ph), "$%d", i + 1);
+                std::string p = ph;
+                size_t at = 0;
+                while ((at = s.find(p, at)) != std::string::npos) {
+                    s.replace(at, p.size(), args[i]);
+                    at += args[i].size();
+                }
+            }
+            out = s;
+            return true;
+        }
+        error_ = std::string("unexpected token in expression");
+        return false;
+    }
+
+    bool eat(Tok::Kind k, const char* msg) {
+        if (peek(0).kind != k) { error_ = msg; return false; }
+        ++pos_;
+        return true;
+    }
+
+    const Tok& peek(int off) const {
+        size_t i = pos_ + (size_t)off;
+        if (i >= toks_.size()) return toks_.back();
+        return toks_[i];
+    }
+
+    bool at_end() const { return peek(0).kind == Tok::EndT; }
+    const std::string& error() const { return error_; }
+
+private:
+    const std::vector<Tok>&            toks_;
+    const std::vector<std::string>&    headers_;
+    size_t                             pos_ = 0;
+    std::string                        error_;
+};
+
+} // anon
+
+std::string transpile_expr(const std::string& formula,
+                            const std::vector<std::string>& in_headers,
+                            std::string& error_out) {
+    error_out.clear();
+    std::vector<Tok> toks;
+    Lexer lex(formula);
+    if (!lex.tokenize(toks)) {
+        error_out = lex.error();
+        return "";
+    }
+    Parser p(toks, in_headers);
+    std::string out;
+    if (!p.parse_expr(out)) {
+        error_out = p.error();
+        return "";
+    }
+    if (!p.at_end()) {
+        error_out = "unexpected trailing tokens after expression";
+        return "";
+    }
+    return out;
+}
+
+bool is_transpilable(const std::string& formula, std::string& error_out) {
+    std::vector<std::string> empty;
+    std::string s = transpile_expr(formula, empty, error_out);
+    return error_out.empty() && !s.empty();
+}
+
+// ============================================================================
+// TQL State -> SQL DuckDB emitter.
+// ============================================================================
+
+namespace {
+
+// Mapeo aggregation -> SQL DuckDB expression.
+std::string emit_agg_expr(const Aggregation& a) {
+    switch (a.fn) {
+        case AggFn::Count:      return "COUNT(*)";
+        case AggFn::Sum:        return "SUM(" + sql_ident(a.col) + ")";
+        case AggFn::Avg:        return "AVG(" + sql_ident(a.col) + ")";
+        case AggFn::Min:        return "MIN(" + sql_ident(a.col) + ")";
+        case AggFn::Max:        return "MAX(" + sql_ident(a.col) + ")";
+        case AggFn::Distinct:   return "COUNT(DISTINCT " + sql_ident(a.col) + ")";
+        case AggFn::Stddev:     return "STDDEV(" + sql_ident(a.col) + ")";
+        case AggFn::Median:     return "quantile_cont(" + sql_ident(a.col) + ", 0.5)";
+        case AggFn::P25:        return "quantile_cont(" + sql_ident(a.col) + ", 0.25)";
+        case AggFn::P75:        return "quantile_cont(" + sql_ident(a.col) + ", 0.75)";
+        case AggFn::P90:        return "quantile_cont(" + sql_ident(a.col) + ", 0.90)";
+        case AggFn::P99:        return "quantile_cont(" + sql_ident(a.col) + ", 0.99)";
+        case AggFn::Percentile: {
+            char buf[32];
+            std::snprintf(buf, sizeof(buf), "%g", a.arg);
+            return std::string("quantile_cont(") + sql_ident(a.col) + ", " + buf + ")";
+        }
+    }
+    return "/* unknown agg */ NULL";
+}
+
+std::string emit_breakout_expr(const std::string& bk) {
+    std::string col_clean;
+    DateGranularity g = parse_breakout_granularity(bk, col_clean);
+    if (g == DateGranularity::None) {
+        return sql_ident(col_clean);
+    }
+    const char* tok = date_granularity_token(g);
+    // Week: DuckDB date_trunc('week', col) -> monday segun configuracion.
+    return std::string("date_trunc('") + tok + "', " + sql_ident(col_clean) + ")";
+}
+
+// Resuelve un Op a operador SQL + (opcional) override de RHS.
+const char* sql_op(Op op) {
+    switch (op) {
+        case Op::Eq:  return " = ";
+        case Op::Neq: return " <> ";
+        case Op::Gt:  return " > ";
+        case Op::Gte: return " >= ";
+        case Op::Lt:  return " < ";
+        case Op::Lte: return " <= ";
+        case Op::Contains:    return " LIKE ";
+        case Op::NotContains: return " NOT LIKE ";
+        case Op::StartsWith:  return " LIKE ";
+        case Op::EndsWith:    return " LIKE ";
+    }
+    return " = ";
+}
+
+// Construye RHS literal/pattern segun op + value. Devuelve placeholder '?'
+// y push de params; o pattern string-literal directo para LIKE wildcards.
+std::string emit_filter_rhs(const Filter& f, std::vector<std::string>& params) {
+    if (f.op == Op::Contains || f.op == Op::NotContains) {
+        std::string v = "%" + f.value + "%";
+        params.push_back(v);
+        return "?";
+    }
+    if (f.op == Op::StartsWith) {
+        std::string v = f.value + "%";
+        params.push_back(v);
+        return "?";
+    }
+    if (f.op == Op::EndsWith) {
+        std::string v = "%" + f.value;
+        params.push_back(v);
+        return "?";
+    }
+    params.push_back(f.value);
+    return "?";
+}
+
+// Construye CTE stage 0 (Raw): SELECT cols + derived FROM main_t [JOINs].
+// `tables` provee schema. main_t name = tables[main_idx].name. Derived cols
+// se transpilan a SQL expression; si fuera de subset, push warning + skip col.
+bool emit_stage0(const State& st, const std::vector<TableInput>& tables,
+                  int main_idx, SqlEmit& e) {
+    if (main_idx < 0 || main_idx >= (int)tables.size()) {
+        e.error = "main table out of range";
+        return false;
+    }
+    const TableInput& main_t = tables[(size_t)main_idx];
+
+    // SELECT list: cols originales + derived expressions (subset).
+    std::string select_list;
+    for (size_t i = 0; i < main_t.headers.size(); ++i) {
+        if (i > 0) select_list += ", ";
+        select_list += sql_ident(main_t.headers[i]);
+    }
+
+    // Derived cols (stage 0 derived).
+    if (!st.stages.empty()) {
+        const Stage& s0 = st.stages[0];
+        for (const auto& d : s0.derived) {
+            if (d.source_col >= 0 && d.formula.empty()) {
+                // Retipo puro: alias col origen.
+                if (d.source_col < (int)main_t.headers.size()) {
+                    select_list += ", " + sql_ident(main_t.headers[(size_t)d.source_col])
+                                + " AS " + sql_ident(d.name);
+                }
+                continue;
+            }
+            std::string err;
+            std::string expr = transpile_expr(d.formula, main_t.headers, err);
+            if (!err.empty()) {
+                std::string msg = "derived col '" + d.name +
+                                  "' formula out of SQL subset: " + err;
+                e.warnings.push_back(msg);
+                // Skip col en SQL output; agente puede recurrir a TQL puro.
+                continue;
+            }
+            select_list += ", " + expr + " AS " + sql_ident(d.name);
+        }
+    }
+
+    std::string from = sql_ident(main_t.name);
+
+    // Joins
+    for (const auto& jn : st.joins) {
+        const TableInput* right = nullptr;
+        for (const auto& ti : tables) {
+            if (ti.name == jn.source) { right = &ti; break; }
+        }
+        if (!right) {
+            e.warnings.push_back("join source '" + jn.source + "' not in tables");
+            continue;
+        }
+        const char* strat = "LEFT JOIN";
+        switch (jn.strategy) {
+            case JoinStrategy::Left:  strat = "LEFT JOIN";  break;
+            case JoinStrategy::Inner: strat = "INNER JOIN"; break;
+            case JoinStrategy::Right: strat = "RIGHT JOIN"; break;
+            case JoinStrategy::Full:  strat = "FULL OUTER JOIN"; break;
+        }
+        from += "\n  " + std::string(strat) + " " + sql_ident(right->name)
+              + " AS " + sql_ident(jn.alias) + " ON ";
+        for (size_t k = 0; k < jn.on.size(); ++k) {
+            if (k > 0) from += " AND ";
+            from += sql_ident(main_t.name) + "." + sql_ident(jn.on[k].first)
+                 + " = " + sql_ident(jn.alias) + "." + sql_ident(jn.on[k].second);
+        }
+        // Anadir cols del right al SELECT con alias.col prefix.
+        if (jn.fields.empty()) {
+            for (const auto& rh : right->headers) {
+                std::string aliased = jn.alias + "." + rh;
+                select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(rh)
+                            + " AS " + sql_ident(aliased);
+            }
+        } else {
+            for (const auto& fld : jn.fields) {
+                std::string aliased = jn.alias + "." + fld;
+                select_list += ", " + sql_ident(jn.alias) + "." + sql_ident(fld)
+                            + " AS " + sql_ident(aliased);
+            }
+        }
+    }
+
+    // Stage 0 WHERE: filters del Raw (filter col idx en eff_headers).
+    // Filter.col es indice en eff_headers (orig + derived). Para SQL emit,
+    // necesitamos resolver col idx -> col name. Reconstruir orden eff_headers.
+    std::vector<std::string> eff_headers = main_t.headers;
+    if (!st.stages.empty()) {
+        for (const auto& d : st.stages[0].derived) {
+            eff_headers.push_back(d.name);
+        }
+    }
+    std::string where_clause;
+    if (!st.stages.empty()) {
+        const Stage& s0 = st.stages[0];
+        for (size_t fi = 0; fi < s0.filters.size(); ++fi) {
+            const Filter& f = s0.filters[fi];
+            if (f.col < 0 || f.col >= (int)eff_headers.size()) {
+                e.warnings.push_back("stage0 filter col idx out of range");
+                continue;
+            }
+            std::string col = sql_ident(eff_headers[(size_t)f.col]);
+            if (!where_clause.empty()) where_clause += " AND ";
+            where_clause += col + sql_op(f.op) + emit_filter_rhs(f, e.params);
+        }
+    }
+
+    // Stage 0 sort
+    std::string order_clause;
+    if (!st.stages.empty()) {
+        const Stage& s0 = st.stages[0];
+        for (size_t si = 0; si < s0.sorts.size(); ++si) {
+            const SortClause& sc = s0.sorts[si];
+            if (!order_clause.empty()) order_clause += ", ";
+            order_clause += sql_ident(sc.col) + (sc.desc ? " DESC" : " ASC");
+        }
+    }
+
+    std::string cte = "t0 AS (\n  SELECT " + select_list + "\n  FROM " + from;
+    if (!where_clause.empty()) cte += "\n  WHERE " + where_clause;
+    if (!order_clause.empty()) cte += "\n  ORDER BY " + order_clause;
+    cte += "\n)";
+    e.sql = "WITH " + cte;
+    return true;
+}
+
+// Stage N (N>=1): SELECT breakouts + agg expressions FROM t<N-1>
+// [WHERE filters] [GROUP BY ...] [ORDER BY ...].
+bool emit_stage_n(const Stage& stg, int n, SqlEmit& e) {
+    std::string prev = "t" + std::to_string(n - 1);
+    std::string cur  = "t" + std::to_string(n);
+
+    // SELECT list: breakouts (con granularity expr si aplica) + aggregations.
+    std::string select_list;
+    for (size_t i = 0; i < stg.breakouts.size(); ++i) {
+        if (i > 0) select_list += ", ";
+        select_list += emit_breakout_expr(stg.breakouts[i])
+                    + " AS " + sql_ident(stg.breakouts[i]);
+    }
+    for (size_t i = 0; i < stg.aggregations.size(); ++i) {
+        if (!select_list.empty()) select_list += ", ";
+        std::string alias = aggregation_alias(stg.aggregations[i]);
+        select_list += emit_agg_expr(stg.aggregations[i]) + " AS " + sql_ident(alias);
+    }
+    if (select_list.empty()) select_list = "*";
+
+    // WHERE: filters del stage. col es indice en input headers (output del stage previo).
+    // Aproximacion: usamos el nombre via stage breakouts/aggs del stage previo si fuera necesario.
+    // Para v1, emit por nombre cuando filter.col >= 0 sea idx en breakouts/aggs/orig. El
+    // chequeo de existencia se delega a DuckDB (errores en execute son detectables).
+    // V1 simple: skip filter cuando no podemos resolver — caller solo deberia tener filter
+    // sobre cols que existen.
+    // Estrategia simple: emite WHERE solo si stage previo provee headers conocidos. Para no
+    // duplicar logica, dejamos al caller proveer headers via filter.col que se resuelve a
+    // breakouts[col].
+    // V1: si filter.col esta en rango de breakouts del stage previo, emite breakout name.
+    // Sino, warning + skip.
+    std::string where_clause;
+    // Best effort: no podemos construir headers del stage previo aqui sin recomputar.
+    // Para v1, omitimos filters de stages >=1 — caller deberia evitar usarlos via SQL.
+    // TODO v2: pasar prev_headers para resolver.
+    (void)where_clause;
+
+    // GROUP BY: solo si hay breakouts.
+    std::string group_clause;
+    for (size_t i = 0; i < stg.breakouts.size(); ++i) {
+        if (i > 0) group_clause += ", ";
+        // Re-emit la expression para GROUP BY (no alias).
+        group_clause += emit_breakout_expr(stg.breakouts[i]);
+    }
+
+    // ORDER BY
+    std::string order_clause;
+    for (size_t i = 0; i < stg.sorts.size(); ++i) {
+        if (i > 0) order_clause += ", ";
+        order_clause += sql_ident(stg.sorts[i].col) + (stg.sorts[i].desc ? " DESC" : " ASC");
+    }
+
+    std::string cte = ",\n" + cur + " AS (\n  SELECT " + select_list
+                    + "\n  FROM " + prev;
+    if (!group_clause.empty()) cte += "\n  GROUP BY " + group_clause;
+    if (!order_clause.empty()) cte += "\n  ORDER BY " + order_clause;
+    cte += "\n)";
+    e.sql += cte;
+    return true;
+}
+
+} // anon
+
+SqlEmit emit_sql(const State& state,
+                  const std::vector<TableInput>& tables,
+                  int up_to_stage) {
+    SqlEmit out;
+    if (state.stages.empty()) {
+        out.error = "state has no stages";
+        return out;
+    }
+    if (tables.empty()) {
+        out.error = "no input tables provided";
+        return out;
+    }
+    int target = (up_to_stage < 0) ? state.active_stage : up_to_stage;
+    if (target < 0) target = 0;
+    if (target >= (int)state.stages.size()) target = (int)state.stages.size() - 1;
+
+    // Resolve main idx via state.main_source (o tables[0] default).
+    int main_idx = resolve_main_idx(tables, state.main_source);
+    if (main_idx < 0) main_idx = 0;
+
+    if (!emit_stage0(state, tables, main_idx, out)) return out;
+    for (int si = 1; si <= target; ++si) {
+        if (!emit_stage_n(state.stages[(size_t)si], si, out)) return out;
+    }
+    out.sql += "\nSELECT * FROM t" + std::to_string(target) + ";\n";
+    return out;
+}
+
+} // namespace tql_to_sql
@@ -0,0 +1,41 @@
+// tql_to_sql: emite SQL DuckDB equivalente a una pipeline TQL State.
+// Pure. Sin DuckDB linkado. Solo string emit + validacion.
+// Ver issue 0080 + docs/TQL.md (seccion "SQL transpile subset").
+#pragma once
+
+#include "data_table_logic.h"
+#include <string>
+#include <vector>
+
+namespace tql_to_sql {
+
+struct SqlEmit {
+    std::string                 sql;       // SELECT/CTE chain DuckDB
+    std::vector<std::string>    params;    // bound values posicionales (?)
+    std::vector<std::string>    warnings;  // soft issues (col not found, etc.)
+    std::string                 error;     // si non-empty, emit fallo
+};
+
+// Pure: emite SQL DuckDB equivalente a stages 0..active del state.
+// `tables` provee schema (headers/types/name) de cada TableInput. El caller
+// es responsable de hidratar las tablas en DuckDB con esos nombres.
+// `up_to_stage = -1` => state.active_stage.
+SqlEmit emit_sql(const data_table::State& state,
+                  const std::vector<data_table::TableInput>& tables,
+                  int up_to_stage = -1);
+
+// Pure: valida que `formula` (cuerpo Lua de un derived col) este dentro del
+// subset SQL-transpilable. Si valido, retorna true. Si no, false + razon
+// concreta en `error_out` (categoria + token problematico).
+// Ver docs/TQL.md#sql-transpile-subset.
+bool is_transpilable(const std::string& formula, std::string& error_out);
+
+// Pure: transpila formula Lua subset -> SQL expression. Si fuera de subset,
+// retorna "" y rellena `error_out`. Asume is_transpilable retornaria true.
+// `in_headers` necesario para resolver `[col]` refs y emitir identifier
+// SQL apropiado (quoted si tiene char especial).
+std::string transpile_expr(const std::string& formula,
+                            const std::vector<std::string>& in_headers,
+                            std::string& error_out);
+
+} // namespace tql_to_sql
@@ -16,6 +16,10 @@ using data_table::ColumnType;
 using data_table::ViewMode;
 using data_table::ViewConfig;
 using data_table::parse_number;
+using data_table::nearest_index_2d;
+using data_table::pie_angle;
+using data_table::pie_slice_at_angle;
+using data_table::heatmap_cell_at;

 static int find_header(const StageOutput& out, const std::string& name) {
    if (name.empty()) return -1;
@@ -152,7 +156,8 @@ std::vector<double> finite(const std::vector<double>& v) {
 }

 bool render_bar_like(const StageOutput& out, ViewMode mode,
-                     const ViewConfig& cfg, ImVec2 size) {
+                     const ViewConfig& cfg, ImVec2 size,
+                     int* clicked_row_out = nullptr) {
    int cat_col = resolve_cat(out, cfg, first_category_col(out));
    auto nums = collect_numeric_filtered(out, cfg, 8);
    if (cat_col < 0 || nums.empty()) {
@@ -225,6 +230,15 @@ bool render_bar_like(const StageOutput& out, ViewMode mode,
            ImPlot::PlotBars(nums[0].name.c_str(), ticks.data(), ys.data(), n, 0.67, spc);
        }
    }
+    // Hit-test fase 10: idx = round(plot.{x|y}) en single-series mode.
+    if (clicked_row_out &&
+        mode != ViewMode::GroupedBar && mode != ViewMode::StackedBar &&
+        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
+        ImPlotPoint p = ImPlot::GetPlotMousePos();
+        double target = horiz ? p.y : p.x;
+        int idx = (int)(target + 0.5);
+        if (idx >= 0 && idx < n) *clicked_row_out = idx;
+    }
    ImPlot::EndPlot();
    return true;
 }
@@ -302,7 +316,8 @@ bool render_line_like(const StageOutput& out, ViewMode mode,
    return true;
 }

-bool render_scatter(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
+bool render_scatter(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
+                     int* clicked_row_out = nullptr) {
    // Soporte cfg.x_col + cfg.y_cols[0]
    int xc = find_header(out, cfg.x_col);
    int yc = !cfg.y_cols.empty() ? find_header(out, cfg.y_cols[0]) : -1;
@@ -329,11 +344,20 @@ bool render_scatter(const StageOutput& out, const ViewConfig& cfg, ImVec2 size)
        ImPlot::PlotScatter("##s", nums[0].vals.data(), nums[1].vals.data(),
                             (int)nums[0].vals.size());
    }
+    if (clicked_row_out &&
+        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
+        ImPlotPoint p = ImPlot::GetPlotMousePos();
+        int idx = nearest_index_2d(p.x, p.y,
+                                    nums[0].vals.data(), nums[1].vals.data(),
+                                    (int)nums[0].vals.size());
+        if (idx >= 0) *clicked_row_out = idx;
+    }
    ImPlot::EndPlot();
    return true;
 }

-bool render_bubble(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
+bool render_bubble(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
+                    int* clicked_row_out = nullptr) {
    int xc = find_header(out, cfg.x_col);
    int yc = !cfg.y_cols.empty() ? find_header(out, cfg.y_cols[0]) : -1;
    int sc = resolve_size(out, cfg, -1);
@@ -354,6 +378,14 @@ bool render_bubble(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
                      axflag(cfg), axflag(cfg));
    ImPlot::PlotBubbles("##b", nums[0].vals.data(), nums[1].vals.data(),
                         nums[2].vals.data(), (int)nums[0].vals.size());
+    if (clicked_row_out &&
+        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
+        ImPlotPoint p = ImPlot::GetPlotMousePos();
+        int idx = nearest_index_2d(p.x, p.y,
+                                    nums[0].vals.data(), nums[1].vals.data(),
+                                    (int)nums[0].vals.size());
+        if (idx >= 0) *clicked_row_out = idx;
+    }
    ImPlot::EndPlot();
    return true;
 }
@@ -404,7 +436,8 @@ bool render_hist2d(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
    return true;
 }

-bool render_heatmap(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
+bool render_heatmap(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
+                     int* clicked_row_out = nullptr) {
    auto nums = collect_numeric_filtered(out, cfg, 64);
    if (nums.empty()) { info_text("Need numeric columns"); return false; }
    int cols = (int)nums.size();
@@ -424,11 +457,22 @@ bool render_heatmap(const StageOutput& out, const ViewConfig& cfg, ImVec2 size)
    maybe_fit(cfg);
    if (!ImPlot::BeginPlot("##heatmap", size, 0)) return false;
    ImPlot::PlotHeatmap("##hm", mat.data(), rows, cols, mn, mx, nullptr);
+    if (clicked_row_out &&
+        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
+        ImPlotPoint p = ImPlot::GetPlotMousePos();
+        // ImPlot heatmap Y se pinta de top a bottom; plot mouse_y va igual
+        // (default scale 0..rows). Mapeo directo.
+        int rr, cc;
+        heatmap_cell_at(p.x, p.y, rows, cols, rr, cc);
+        if (rr >= 0) *clicked_row_out = rr;
+        (void)cc;
+    }
    ImPlot::EndPlot();
    return true;
 }

-bool render_pie(const StageOutput& out, const ViewConfig& cfg, bool donut, ImVec2 size) {
+bool render_pie(const StageOutput& out, const ViewConfig& cfg, bool donut, ImVec2 size,
+                 int* clicked_row_out = nullptr) {
    int cat = resolve_cat(out, cfg, first_category_col(out));
    auto nums = collect_numeric_filtered(out, cfg, 1);
    if (cat < 0 || nums.empty()) { info_text("Need 1 category + 1 numeric"); return false; }
@@ -455,11 +499,24 @@ bool render_pie(const StageOutput& out, const ViewConfig& cfg, bool donut, ImVec
        // Draw inner hole as solid circle by overlaying a smaller pie of one slice transparent.
        // Simpler: just visually it's a circle with text. Use no extra primitive for now.
    }
+    if (clicked_row_out &&
+        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
+        ImPlotPoint p = ImPlot::GetPlotMousePos();
+        double dx = p.x - 0.5, dy = p.y - 0.5;
+        double dist2 = dx*dx + dy*dy;
+        double inner = donut ? (radius * 0.5) : 0.0;
+        if (dist2 <= radius * radius && dist2 >= inner * inner) {
+            double ang = pie_angle(0.5, 0.5, p.x, p.y);
+            int idx = pie_slice_at_angle(ang, values.data(), n);
+            if (idx >= 0) *clicked_row_out = idx;
+        }
+    }
    ImPlot::EndPlot();
    return true;
 }

-bool render_funnel(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
+bool render_funnel(const StageOutput& out, const ViewConfig& cfg, ImVec2 size,
+                    int* clicked_row_out = nullptr) {
    int cat = resolve_cat(out, cfg, first_category_col(out));
    auto nums = collect_numeric_filtered(out, cfg, 1);
    if (cat < 0 || nums.empty()) { info_text("Need 1 category + 1 numeric"); return false; }
@@ -492,6 +549,17 @@ bool render_funnel(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
    ImPlot::SetupAxisTicks(ImAxis_Y1, ticks.data(), n, labels.data(), false);
    ImPlot::PlotBars(nums[0].name.c_str(), ys.data(), ticks.data(), n, 0.85,
                     ImPlotSpec(ImPlotProp_Flags, ImPlotBarsFlags_Horizontal));
+    if (clicked_row_out &&
+        ImPlot::IsPlotHovered() && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
+        ImPlotPoint p = ImPlot::GetPlotMousePos();
+        int tick_idx = (int)(p.y + 0.5);
+        // ticks[i] = n-1-i. Invertir para idx en orden sorted descendiente.
+        int sorted_pos = (n - 1) - tick_idx;
+        if (sorted_pos >= 0 && sorted_pos < n) {
+            // idx[sorted_pos] da indice de row original en out.
+            *clicked_row_out = idx[sorted_pos];
+        }
+    }
    ImPlot::EndPlot();
    return true;
 }
@@ -763,7 +831,9 @@ bool render_radar(const StageOutput& out, const ViewConfig& cfg, ImVec2 size) {
 } // anon

 bool render(const StageOutput& out, ViewMode mode,
-            const ViewConfig& cfg, ImVec2 size) {
+            const ViewConfig& cfg, ImVec2 size,
+            int* clicked_row_out) {
+    if (clicked_row_out) *clicked_row_out = -1;
    if (out.rows == 0 || out.cols == 0) {
        info_text("No data");
        return false;
@@ -773,21 +843,21 @@ bool render(const StageOutput& out, ViewMode mode,
        case ViewMode::Bar:
        case ViewMode::Column:
        case ViewMode::GroupedBar:
-        case ViewMode::StackedBar:   return render_bar_like(out, mode, cfg, size);
+        case ViewMode::StackedBar:   return render_bar_like(out, mode, cfg, size, clicked_row_out);
        case ViewMode::Line:
        case ViewMode::Area:
        case ViewMode::Stairs:       return render_line_like(out, mode, cfg, size);
-        case ViewMode::Scatter:      return render_scatter(out, cfg, size);
-        case ViewMode::Bubble:       return render_bubble(out, cfg, size);
+        case ViewMode::Scatter:      return render_scatter(out, cfg, size, clicked_row_out);
+        case ViewMode::Bubble:       return render_bubble(out, cfg, size, clicked_row_out);
        case ViewMode::Histogram:    return render_histogram(out, cfg, size);
        case ViewMode::Histogram2D:  return render_hist2d(out, cfg, size);
-        case ViewMode::Heatmap:      return render_heatmap(out, cfg, size);
+        case ViewMode::Heatmap:      return render_heatmap(out, cfg, size, clicked_row_out);
        case ViewMode::BoxPlot:      return render_boxplot(out, cfg, size);
        case ViewMode::Stem:         return render_stem(out, cfg, size);
        case ViewMode::ErrorBars:    return render_errorbars(out, cfg, size);
-        case ViewMode::Pie:          return render_pie(out, cfg, false, size);
-        case ViewMode::Donut:        return render_pie(out, cfg, true,  size);
-        case ViewMode::Funnel:       return render_funnel(out, cfg, size);
+        case ViewMode::Pie:          return render_pie(out, cfg, false, size, clicked_row_out);
+        case ViewMode::Donut:        return render_pie(out, cfg, true,  size, clicked_row_out);
+        case ViewMode::Funnel:       return render_funnel(out, cfg, size, clicked_row_out);
        case ViewMode::Waterfall:    return render_waterfall(out, cfg, size);
        case ViewMode::KPI:          return render_kpi_single(out, cfg);
        case ViewMode::KPIGrid:      return render_kpi_grid(out, cfg);
@@ -14,10 +14,15 @@ namespace viz {
 //
 // `size`: ImVec2(-1,-1) usa todo el espacio disponible.
 // `out`: output del stage activo (headers, types, cells flat row-major).
+// `clicked_row_out`: si != nullptr, el render escribira el indice de row del
+// `StageOutput` clicado por user. -1 si no hubo click drillable. Fase 10
+// (issue 0079): habilitado para bar/column/pie/donut/funnel/scatter/bubble/
+// heatmap. Resto de modos: no hit-test, queda en -1.
 bool render(const data_table::StageOutput& out,
            data_table::ViewMode mode,
            const data_table::ViewConfig& cfg,
-            ImVec2 size = ImVec2(-1, -1));
+            ImVec2 size = ImVec2(-1, -1),
+            int* clicked_row_out = nullptr);

 // Helper expuesto: encuentra primera col numerica. -1 si ninguna.
 int first_numeric_col(const data_table::StageOutput& out);
@@ -0,0 +1,212 @@
+// data_table_types — types compartidos del stack TQL (Table Query Language).
+// Promovido al registry desde cpp/apps/primitives_gallery/playground/tables/.
+// Ver issue 0081 + docs/TQL.md. Pure value types + enums.
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace data_table {
+
+// ----------------------------------------------------------------------------
+// Operadores de filtro.
+// ----------------------------------------------------------------------------
+enum class Op {
+    Eq, Neq, Gt, Gte, Lt, Lte,
+    Contains, NotContains, StartsWith, EndsWith
+};
+
+// ----------------------------------------------------------------------------
+// Tipo de columna. Declarado por caller o auto-detectado.
+// ----------------------------------------------------------------------------
+enum class ColumnType {
+    Auto, String, Int, Float, Bool, Date, Json
+};
+
+// ----------------------------------------------------------------------------
+// Derived column: inmutable. Dos modos:
+//   1) Retipo puro: source_col >= 0, formula == "". Cells del origen.
+//   2) Formula:     source_col == -1, formula no vacia. Eval por Lua.
+// ----------------------------------------------------------------------------
+struct DerivedColumn {
+    int         source_col = -1;
+    ColumnType  type       = ColumnType::String;
+    std::string name;
+    std::string formula;        // "" = retipado puro; resto = body Lua
+    int         lua_id  = -1;   // referencia en lua_engine; -1 si no compilado
+    std::string compile_error;
+};
+
+// ----------------------------------------------------------------------------
+// Filtro: col index en eff_headers + op + value.
+// ----------------------------------------------------------------------------
+struct Filter {
+    int         col;
+    Op          op;
+    std::string value;
+};
+
+// ----------------------------------------------------------------------------
+// ColorRule: pintado condicional de celdas (UI helper).
+// ----------------------------------------------------------------------------
+struct ColorRule {
+    int          col;
+    std::string  equals;
+    unsigned int color;
+};
+
+// ----------------------------------------------------------------------------
+// Aggregations (TQL stages 1+).
+// ----------------------------------------------------------------------------
+enum class AggFn {
+    Count, Sum, Avg, Min, Max, Distinct, Stddev,
+    Median, P25, P75, P90, P99, Percentile
+};
+
+struct Aggregation {
+    AggFn       fn = AggFn::Count;
+    std::string col;         // ignorado para Count
+    double      arg = 0.0;   // para Percentile (0..1)
+    std::string alias;       // vacio -> auto-generado via aggregation_alias()
+};
+
+struct SortClause {
+    std::string col;
+    bool        desc = false;
+};
+
+// Stage: layer de TQL. Stage 0 = Raw (sin breakouts/aggregations).
+// Stage 1+ pueden agrupar. Cada stage consume output del anterior.
+struct Stage {
+    std::vector<Filter>          filters;
+    std::vector<DerivedColumn>   derived;       // expressions de este stage
+    std::vector<std::string>     breakouts;     // col names del INPUT de este stage
+    std::vector<Aggregation>     aggregations;
+    std::vector<SortClause>      sorts;
+};
+
+// Output de compute_stage. Posee `cell_backing` (strings nuevos para
+// resultados agregados) y `cells` (punteros row-major a backing o a
+// `in_cells` original para passthrough).
+struct StageOutput {
+    std::vector<std::string>  cell_backing;
+    std::vector<const char*>  cells;
+    int                       rows = 0;
+    int                       cols = 0;
+    std::vector<std::string>  headers;
+    std::vector<ColumnType>   types;
+};
+
+// ----------------------------------------------------------------------------
+// ViewMode: tipo de visualizacion a renderizar sobre el output del stage activo.
+// ----------------------------------------------------------------------------
+enum class ViewMode {
+    Table,
+    // Bars
+    Bar, Column, GroupedBar, StackedBar,
+    // Lines / area
+    Line, Area, Stairs,
+    // Points
+    Scatter, Bubble,
+    // Distribution
+    Histogram, Histogram2D, Heatmap, BoxPlot,
+    // Stems / signals
+    Stem, ErrorBars,
+    // Composition
+    Pie, Donut, Funnel, Waterfall,
+    // Single values
+    KPI, KPIGrid,
+    // Specialized
+    Candlestick, Radar,
+};
+
+// ----------------------------------------------------------------------------
+// Joins (MBQL-style). Ver issue 0078.
+// ----------------------------------------------------------------------------
+enum class JoinStrategy { Left, Inner, Right, Full };
+
+// Tabla extra pasada al render() para joins. Owner externo (caller).
+struct TableInput {
+    std::string                 name;       // identificador estable (matchea Join.source)
+    std::vector<std::string>    headers;
+    std::vector<ColumnType>     types;
+    const char* const*          cells = nullptr;  // row-major, headers.size() cols x rows filas
+    int                         rows  = 0;
+    int                         cols  = 0;
+};
+
+// Join clause: une la tabla actual con `source` por las parejas `on`,
+// prefijando las cols del derecho con `alias.`.
+struct Join {
+    std::string                                          alias;
+    std::string                                          source;
+    std::vector<std::pair<std::string, std::string>>     on;        // {left_col, right_col}
+    JoinStrategy                                         strategy = JoinStrategy::Left;
+    std::vector<std::string>                             fields;    // vacio = all del derecho
+};
+
+// ----------------------------------------------------------------------------
+// ViewConfig: overrides manuales de auto-detect para la vista activa.
+// ----------------------------------------------------------------------------
+struct ViewConfig {
+    std::string                 x_col;        // single: scatter, line, hist2d
+    std::vector<std::string>    y_cols;       // 1..N: line/area/bar/etc
+    std::string                 size_col;     // bubble
+    std::string                 cat_col;      // bar/pie/funnel/box override
+    unsigned int                primary_color = 0;     // 0 = ImPlot auto
+    int                         hist_bins     = 0;     // 0 = Sturges
+    float                       pie_radius    = 0.0f;  // 0 = default
+    bool                        show_legend   = true;
+    bool                        show_markers  = false; // line/area markers
+    bool                        locked        = false; // disable pan/zoom
+    mutable bool                fit_request   = false; // consumed by viz::render
+};
+
+// VizPanel: viz adicional sobre el mismo StageOutput.
+struct VizPanel {
+    ViewMode    display = ViewMode::Bar;
+    ViewConfig  config;
+    mutable ViewMode last_non_table = ViewMode::Bar;
+};
+
+// ----------------------------------------------------------------------------
+// State: stage pipeline + viz globales.
+// ----------------------------------------------------------------------------
+struct State {
+    std::vector<Stage>          stages;
+    int                         active_stage = 0;
+    ViewMode                    display = ViewMode::Table;
+    ViewConfig                  viz_config;
+    std::vector<VizPanel>       extra_panels;
+    std::vector<Join>           joins;     // aplicado antes de stages[0]
+    std::string                 main_source;  // name de TableInput; vacio -> tables[0]
+
+    std::vector<ColorRule>      color_rules;
+    std::vector<bool>           col_visible;
+    std::vector<int>            col_order;
+
+    // Helpers (definidos en compute_stage.cpp).
+    Stage&       raw();
+    const Stage& raw() const;
+    Stage&       active();
+    const Stage& active_const() const;
+    void         ensure_stage0();
+};
+
+// ----------------------------------------------------------------------------
+// Drill extendido (fase 10). Ver issue 0079.
+// ----------------------------------------------------------------------------
+enum class DateGranularity { None, Year, Month, Week, Day, Hour };
+
+enum class FilterPreset { Last7d, Last30d, Last90d, ExcludeNulls, NonZero };
+
+// Step de drill grabado para history undo/redo (fase 10).
+struct DrillStep {
+    int     target_stage      = -1;   // stage donde se anadio el filter
+    int     filter_pos        = -1;   // index en target_stage.filters
+    int     prev_active_stage = 0;    // active_stage antes del drill
+    Filter  added;                    // filter para redo
+};
+
+} // namespace data_table
@@ -0,0 +1,96 @@
+#include "gfx/gpu_check.h"
+#include "gfx/gl_loader.h"
+
+#include <cstring>
+#include <string>
+
+// CUDA runtime version via compile-time macro.
+// cuda_runtime.h define CUDART_VERSION como XXYYZZ (ej. 12040 para 12.4.0).
+// Solo se incluye si el header esta disponible; si no, cuda_runtime_version = "".
+#if defined(__has_include) && __has_include(<cuda_runtime.h>)
+    #include <cuda_runtime.h>
+    #define FN_HAS_CUDA_RUNTIME 1
+#endif
+
+namespace fn::gfx {
+
+static std::string safe_gl_string(GLenum name) {
+    const GLubyte* s = glGetString(name);
+    if (!s) return "";
+    return std::string(reinterpret_cast<const char*>(s));
+}
+
+static bool check_gl_version_43() {
+    // GL_VERSION tiene formato "major.minor ..." o "OpenGL ES major.minor ..."
+    const GLubyte* ver = glGetString(GL_VERSION);
+    if (!ver) return false;
+    int major = 0, minor = 0;
+    // Saltar prefijo "OpenGL ES " si lo hay
+    const char* p = reinterpret_cast<const char*>(ver);
+    if (std::strncmp(p, "OpenGL ES ", 10) == 0) p += 10;
+    // sscanf con la forma "X.Y"
+    // NOLINTNEXTLINE(cert-err34-c)
+    std::sscanf(p, "%d.%d", &major, &minor);
+    return (major > 4) || (major == 4 && minor >= 3);
+}
+
+bool gpu_check_caps(GpuCaps& out) {
+    out = GpuCaps{}; // reset
+
+    out.gl_vendor   = safe_gl_string(GL_VENDOR);
+    out.gl_renderer = safe_gl_string(GL_RENDERER);
+    out.gl_version  = safe_gl_string(GL_VERSION);
+
+    if (out.gl_vendor.empty()) {
+        // No hay contexto GL activo.
+        return false;
+    }
+
+    // Compute shader support: GL 4.3+ o ARB_compute_shader
+    {
+        const GLubyte* exts = glGetString(GL_EXTENSIONS);
+        bool has_arb = exts &&
+            std::strstr(reinterpret_cast<const char*>(exts),
+                        "GL_ARB_compute_shader") != nullptr;
+        out.has_compute_shader = check_gl_version_43() || has_arb;
+    }
+
+    // Shader storage buffer: GL 4.3+ o ARB_shader_storage_buffer_object
+    {
+        const GLubyte* exts = glGetString(GL_EXTENSIONS);
+        bool has_ssbo_arb = exts &&
+            std::strstr(reinterpret_cast<const char*>(exts),
+                        "GL_ARB_shader_storage_buffer_object") != nullptr;
+        out.has_storage_buffer = check_gl_version_43() || has_ssbo_arb;
+    }
+
+    // Workgroup limits (solo si hay compute shader support)
+    if (out.has_compute_shader) {
+        // GL_MAX_COMPUTE_WORK_GROUP_COUNT — indexed query
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &out.max_compute_workgroup_count[0]);
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &out.max_compute_workgroup_count[1]);
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &out.max_compute_workgroup_count[2]);
+
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &out.max_compute_workgroup_size[0]);
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &out.max_compute_workgroup_size[1]);
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &out.max_compute_workgroup_size[2]);
+    }
+
+    // CUDA runtime version (compile-time detection)
+#if defined(FN_HAS_CUDA_RUNTIME)
+    {
+        int cuda_ver = CUDART_VERSION; // ej. 12040 para CUDA 12.4.0
+        int major = cuda_ver / 1000;
+        int minor = (cuda_ver % 1000) / 10;
+        char buf[16];
+        std::snprintf(buf, sizeof(buf), "%d.%d", major, minor);
+        out.cuda_runtime_version = buf;
+    }
+#else
+    out.cuda_runtime_version = "";
+#endif
+
+    return true;
+}
+
+} // namespace fn::gfx
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <string>
+
+namespace fn::gfx {
+
+// GpuCaps recopila capacidades OpenGL y CUDA del contexto activo.
+// Todos los campos de cadena estan vacios ("") si el dato no esta disponible.
+struct GpuCaps {
+    // OpenGL — requieren contexto GL activo antes de llamar gpu_check_caps.
+    std::string gl_vendor;    // glGetString(GL_VENDOR)   ej. "NVIDIA Corporation"
+    std::string gl_renderer;  // glGetString(GL_RENDERER) ej. "NVIDIA GeForce RTX 3080/PCIe/SSE2"
+    std::string gl_version;   // glGetString(GL_VERSION)  ej. "4.6.0 NVIDIA 550.54.15"
+
+    // Compute shader limits (GL_MAX_COMPUTE_WORK_GROUP_COUNT/SIZE)
+    // Indice 0=X 1=Y 2=Z. Valor 0 si compute shaders no disponibles.
+    int max_compute_workgroup_count[3] = {0, 0, 0};
+    int max_compute_workgroup_size[3]  = {0, 0, 0};
+
+    bool has_compute_shader  = false; // GL_VERSION >= 4.3 o extension ARB_compute_shader
+    bool has_storage_buffer  = false; // GL_VERSION >= 4.3 o extension ARB_shader_storage_buffer_object
+
+    // CUDA — vacio si CUDA runtime no detectado en compile time.
+    // Formato: "12.4" (major.minor) o "" si no disponible.
+    std::string cuda_runtime_version;
+};
+
+// gpu_check_caps rellena out con las capacidades del contexto OpenGL activo.
+//
+// REQUISITO: debe llamarse despues de inicializar el contexto GL y, en Windows,
+// despues de fn::gfx::gl_loader_init(). Si se llama sin contexto activo el
+// comportamiento es indefinido (glGetString devuelve nullptr).
+//
+// Retorna true si se pudo leer al menos el vendor GL (contexto activo).
+// Retorna false si gl_vendor queda vacio (contexto no activo o driver defectuoso).
+bool gpu_check_caps(GpuCaps& out);
+
+} // namespace fn::gfx
@@ -0,0 +1,86 @@
+---
+name: gpu_check
+kind: function
+lang: cpp
+domain: gfx
+version: "1.0.0"
+purity: impure
+signature: "bool fn_gfx::gpu_check_caps(GpuCaps& out)"
+description: "Rellena GpuCaps con las capacidades del contexto OpenGL activo: vendor, renderer, version, limites de compute workgroup, flags has_compute_shader/has_storage_buffer, y version CUDA runtime (deteccion en compile-time via CUDART_VERSION). Requiere contexto GL activo. Retorna false si el contexto no esta disponible."
+tags: [gpu, opengl, cuda, caps, hardware, probe, gfx, compute, infra]
+uses_functions: ["gl_loader_cpp_gfx"]
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [gfx/gpu_check.h, gfx/gl_loader.h, cuda_runtime.h, cstring, string]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "cpp/functions/gfx/gpu_check.cpp"
+framework: opengl
+params:
+  - name: out
+    desc: "Referencia a GpuCaps que se rellena con las capacidades detectadas. Se resetea al inicio de la llamada."
+output: "true si el contexto GL esta activo y gl_vendor no esta vacio; false si no hay contexto GL activo o el driver devuelve nullptr para GL_VENDOR."
+---
+
+# gpu_check
+
+Probing de capacidades GPU en runtime: OpenGL strings, compute shader support y CUDA.
+
+## Uso tipico
+
+```cpp
+#include "gfx/gpu_check.h"
+#include "gfx/gl_loader.h"
+
+// Dentro de render(), despues del primer frame (contexto GL activo):
+fn::gfx::GpuCaps caps;
+if (fn::gfx::gpu_check_caps(caps)) {
+    printf("GPU: %s\n", caps.gl_renderer.c_str());
+    printf("Compute shaders: %s\n", caps.has_compute_shader ? "yes" : "no");
+    if (!caps.cuda_runtime_version.empty())
+        printf("CUDA runtime: %s\n", caps.cuda_runtime_version.c_str());
+} else {
+    printf("No GL context active\n");
+}
+```
+
+## Estructura GpuCaps
+
+```cpp
+struct GpuCaps {
+    std::string gl_vendor;                  // "NVIDIA Corporation"
+    std::string gl_renderer;                // "NVIDIA GeForce RTX 3080/PCIe/SSE2"
+    std::string gl_version;                 // "4.6.0 NVIDIA 550.54.15"
+    int max_compute_workgroup_count[3];     // [65535, 65535, 65535] tipico NVIDIA
+    int max_compute_workgroup_size[3];      // [1024, 1024, 64] tipico
+    bool has_compute_shader;                // GL 4.3+ o ARB_compute_shader
+    bool has_storage_buffer;                // GL 4.3+ o ARB_shader_storage_buffer_object
+    std::string cuda_runtime_version;       // "12.4" o "" si no compilado con CUDA
+};
+```
+
+## CUDA detection
+
+La version CUDA se detecta en **compile time** via el macro `CUDART_VERSION` de `<cuda_runtime.h>`. Si la app no esta compilada con el CUDA toolkit, `cuda_runtime_version` sera `""`. Para detection en runtime del toolkit del sistema, usar `cuda_toolkit_check_bash_infra`.
+
+## Requisito de contexto GL
+
+Llamar siempre despues de crear el contexto GL. En apps que usan `fn::run_app`, el contexto esta activo desde el primer frame del `render()` callback. En Windows, `fn::gfx::gl_loader_init()` debe haberse llamado antes para que los punteros de funcion esten resueltos.
+
+## Uso previsto (fn doctor cpp-apps)
+
+Esta funcion sera invocada por el audit de `fn doctor cpp-apps` para verificar que las apps C++ del registry tienen acceso a compute shaders cuando declaran dependencias de `gpu_compute_program`, `gpu_dispatch`, etc.
+
+## CMakeLists.txt
+
+```cmake
+add_imgui_app(mi_app
+    main.cpp
+    ${CMAKE_SOURCE_DIR}/cpp/functions/gfx/gpu_check.cpp
+)
+# CUDA opcional: si la app compila con CUDA toolkit el header cuda_runtime.h
+# estara disponible y FN_HAS_CUDA_RUNTIME se activara automaticamente.
+```
@@ -0,0 +1,20 @@
+---
+name: AggFn
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: sum
+definition: |
+  enum class AggFn {
+      Count, Sum, Avg, Min, Max, Distinct, Stddev,
+      Median, P25, P75, P90, P99, Percentile
+  };
+description: "Funcion de agregacion soportada. Pickup via UI combo + SQL emit via tql_to_sql. Percentile usa Aggregation.arg en [0,1]."
+tags: [tql, aggregation, sum-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Mapeo SQL DuckDB: Count → `COUNT(*)`, Sum/Avg/Min/Max/Stddev → ops nativas, Distinct → `COUNT(DISTINCT col)`, Median/P25/P75/P90/P99/Percentile → `quantile_cont(col, p)`.
@@ -0,0 +1,22 @@
+---
+name: Aggregation
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct Aggregation {
+      AggFn       fn;
+      std::string col;
+      double      arg;
+      std::string alias;
+  };
+description: "Funcion de agregacion en Stage 1+. fn = Count/Sum/Avg/Min/Max/Distinct/Stddev/Median/P25/P75/P90/P99/Percentile. arg = parametro (p para percentile)."
+tags: [tql, aggregation, agg, product-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+`alias` vacio dispara `aggregation_alias(a)` auto: `count`, `sum_<col>`, `distinct_<col>`, `p95_<col>` etc. SQL mapping en `tql_to_sql`: `COUNT(*)`, `SUM("col")`, `quantile_cont("col", p)`.
@@ -0,0 +1,21 @@
+---
+name: ColorRule
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct ColorRule {
+      int          col;
+      std::string  equals;
+      unsigned int color;
+  };
+description: "Regla de pintado condicional para tabla UI. Si cells[row][col] == equals, fondo = color (RGBA packed)."
+tags: [tql, color, ui-hint, product-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Solo afecta render visual. Round-trip en TQL via `columns.<name>.color_rules`. Vacio = sin color override.
@@ -0,0 +1,28 @@
+---
+name: ColumnType
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: sum
+definition: |
+  enum class ColumnType {
+      Auto, String, Int, Float, Bool, Date, Json
+  };
+description: "Tipo de columna del modelo TQL. `Auto` dispara auto-detect; el resto fuerza el tipo declarado. Base de toda la pipeline data_table."
+tags: [tql, data-table, types, sum-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Sum type / enum-class. Convivimos con `effective_type()` que resuelve `Auto` → auto-detect via sample. El resto fuerza el tipo declarado por el caller.
+
+Tabla de iconos UTF-8 Tabler para cada variante en `column_type_icon(t)`. Mapeo SQL ↔ ColumnType en `tql_to_sql` (issue 0080).
+
+## Usado por
+
+- `compute_stage_cpp_core` — input/output types per stage
+- `tql_emit_cpp_core` / `tql_apply_cpp_core` — emit/parse TQL columns block
+- `tql_to_sql_cpp_core` — mapping a SQL DuckDB types
+- `data_table_cpp_viz` — UI render por columna
@@ -0,0 +1,19 @@
+---
+name: DateGranularity
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: sum
+definition: |
+  enum class DateGranularity { None, Year, Month, Week, Day, Hour };
+description: "Granularidad de truncado de fechas para breakouts TQL. Sufijo `:token` en breakout string (ej. 'ts:month')."
+tags: [tql, date, granularity, sum-type, mbql]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Auto-detect via `auto_date_granularity(min_ymd, max_ymd)`: >2y→Year, >60d→Month, >14d→Week, resto→Day. SQL emit DuckDB: `date_trunc('month'|'year'|...,col)`.
+
+Week trunca a lunes ISO (Hinnant algo).
@@ -0,0 +1,26 @@
+---
+name: DerivedColumn
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct DerivedColumn {
+      int         source_col;
+      ColumnType  type;
+      std::string name;
+      std::string formula;
+      int         lua_id;
+      std::string compile_error;
+  };
+description: "Col custom dentro de un Stage. Modo 1: retipo (source_col >= 0, formula vacia). Modo 2: formula Lua (source_col == -1, eval por lua_engine sandbox)."
+tags: [tql, derived, formula, lua, product-type]
+uses_types: [ColumnType_cpp_core]
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+`formula` evaluada por row via `lua_engine` con `[col]` refs disponibles. Para SQL transpile (fase 11), formula debe estar dentro del Lua subset; sino `tql_to_sql` emite warning + skip col.
+
+`lua_id` cachea la formula compilada en lua_engine entre eval calls.
@@ -0,0 +1,30 @@
+---
+name: Filter
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct Filter {
+      int         col;
+      Op          op;
+      std::string value;
+  };
+description: "Predicado TQL: col idx + Op + value. Aplicado dentro de un Stage por compute_stage. col es idx en headers efectivos del INPUT del stage."
+tags: [tql, filter, predicate, product-type]
+uses_types: [Op_cpp_core]
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+`col` es indice en `in_headers` del stage donde aplica (no en el dataset original — esto cambio en el refactor a stages). Para drill-down usar `make_drill_filter(col_idx, value)`.
+
+`value` es string siempre — `compare()` decide numerico vs lexical segun parseo. Range filters (op_in_range, op_between) no estan modelados; usar dos Filters consecutivos.
+
+## Usado por
+
+- `Stage_cpp_core` (lista de filters)
+- `apply_filters`, `compute_stage_cpp_core`
+- `make_drill_filter`, `build_preset_filters`
+- `tql_to_sql_cpp_core` → SQL WHERE clauses con `?` placeholders
@@ -0,0 +1,25 @@
+---
+name: Join
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct Join {
+      std::string                                          alias;
+      std::string                                          source;
+      std::vector<std::pair<std::string, std::string>>     on;
+      JoinStrategy                                         strategy;
+      std::vector<std::string>                             fields;
+  };
+description: "Join MBQL-style entre main_t y source. on = pares {left_col, right_col} multi-key. strategy = Left/Inner/Right/Full. fields vacio = all cols del derecho."
+tags: [tql, join, mbql, product-type]
+uses_types: [JoinStrategy_cpp_core]
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Materializado por `join_tables_cpp_core` antes de stages[0]. Cols del derecho se prefijan con `alias.col` para preservar headers del main. SQL emit: `LEFT/INNER/RIGHT/FULL OUTER JOIN source AS alias ON main.l = alias.r AND ...`.
+
+Multi-key: `on = {{l1,r1}, {l2,r2}}` → `ON main.l1 = alias.r1 AND main.l2 = alias.r2`.
@@ -0,0 +1,17 @@
+---
+name: JoinStrategy
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: sum
+definition: |
+  enum class JoinStrategy { Left, Inner, Right, Full };
+description: "Estrategia de join MBQL-style. 4 variantes estandar SQL. SQL mapping directo a LEFT/INNER/RIGHT/FULL OUTER JOIN."
+tags: [tql, join, strategy, sum-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Round-trip TQL: tokens `"left"/"inner"/"right"/"full"`. Fallback parse "nope" → Left.
@@ -0,0 +1,36 @@
+---
+name: Op
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: sum
+definition: |
+  enum class Op {
+      Eq, Neq, Gt, Gte, Lt, Lte,
+      Contains, NotContains, StartsWith, EndsWith
+  };
+description: "Operador de filtro TQL. 6 ops de comparacion + 4 ops de string. Numericos ordenan numericamente cuando ambos lados parsean."
+tags: [tql, filter, operator, sum-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Tabla operadores permitidos por `ColumnType` via `ops_for_type(t)`:
+
+| Tipo | Ops |
+|---|---|
+| Int / Float / Date | Eq, Neq, Gt, Gte, Lt, Lte |
+| Bool | Eq, Neq |
+| Json | Eq, Neq, Contains, NotContains |
+| String | Eq, Neq, Contains, NotContains, StartsWith, EndsWith |
+
+Mapeo SQL en `tql_to_sql_cpp_core`: Contains → `LIKE '%v%'`, StartsWith → `LIKE 'v%'`, etc.
+
+## Usado por
+
+- `Filter_cpp_core`
+- `compute_stage_cpp_core` (via apply_filters)
+- `tql_emit_cpp_core` / `tql_apply_cpp_core`
+- `tql_to_sql_cpp_core`
@@ -0,0 +1,20 @@
+---
+name: SortClause
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct SortClause {
+      std::string col;
+      bool        desc;
+  };
+description: "Clausula de orden por nombre de col. Multi-sort = vector ordenado por prioridad. desc=true para descendente."
+tags: [tql, sort, order, product-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Sort por nombre (no idx) — sobrevive a renombrado de cols + a stages 1+ donde idx no aplica. Aplicacion via `apply_sorts`. Round-trip TQL: `sort = { {"asc"|"desc", "col"}, ... }`.
@@ -0,0 +1,33 @@
+---
+name: Stage
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct Stage {
+      std::vector<Filter>          filters;
+      std::vector<DerivedColumn>   derived;
+      std::vector<std::string>     breakouts;
+      std::vector<Aggregation>     aggregations;
+      std::vector<SortClause>      sorts;
+  };
+description: "Layer del pipeline TQL. Stage 0 = Raw (filters + derived + sort). Stage 1+ pueden agrupar (breakouts + aggregations + sort). Consumida por compute_stage."
+tags: [tql, stage, pipeline, product-type, mbql]
+uses_types: [Filter_cpp_core, Op_cpp_core]
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Inspirado en MBQL `:filter` / `:breakout` / `:aggregation` / `:order-by`. Diferencia clave: TQL chain N stages explicitos, cada uno consume el output del anterior. MBQL usa `:source-query` recursivo.
+
+Breakout strings pueden llevar sufijo `:granularity` para cols Date (fase 10): `"ts:month"`, `"ts:week"`, etc. Ver `parse_breakout_granularity()`.
+
+## Usado por
+
+- `State_cpp_core` (lista de stages)
+- `compute_stage_cpp_core` (executes a single Stage)
+- `compute_pipeline_cpp_core` (chains stages 0..N)
+- `tql_emit_cpp_core` / `tql_apply_cpp_core` (round-trip Lua)
+- `tql_to_sql_cpp_core` → CTE chain DuckDB
@@ -0,0 +1,26 @@
+---
+name: StageOutput
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct StageOutput {
+      std::vector<std::string>  cell_backing;
+      std::vector<const char*>  cells;
+      int                       rows;
+      int                       cols;
+      std::vector<std::string>  headers;
+      std::vector<ColumnType>   types;
+  };
+description: "Output materializado de compute_stage. cell_backing posee strings nuevos (aggregations); cells es row-major de ptrs a backing o a in_cells original."
+tags: [tql, stage, output, product-type]
+uses_types: [ColumnType_cpp_core]
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Lifetime: cell_backing es owner — cells solo es valido mientras StageOutput viva. Para passthrough (sin agregaciones), cells apunta a in_cells del caller (sin backing local).
+
+Reservar capacidad upfront en cell_backing evita realloc que invalida punteros.
@@ -0,0 +1,40 @@
+---
+name: State
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct State {
+      std::vector<Stage>     stages;
+      int                    active_stage;
+      ViewMode               display;
+      ViewConfig             viz_config;
+      std::vector<VizPanel>  extra_panels;
+      std::vector<Join>      joins;
+      std::string            main_source;
+      std::vector<ColorRule> color_rules;
+      std::vector<bool>      col_visible;
+      std::vector<int>       col_order;
+  };
+description: "Estado completo de una query TQL: pipeline de stages + joins + viz config + UI tweaks. Round-trip a Lua via tql_emit/tql_apply."
+tags: [tql, state, pipeline, product-type]
+uses_types: [Stage_cpp_core, Filter_cpp_core, Op_cpp_core]
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+State es el documento canonico de una query del usuario. Atomico — toda mutacion pasa por helpers pure (`apply_drill_step`, `drill_up`, etc.).
+
+`active_stage` = idx del stage cuyo output se renderiza. Filters/sorts del Raw siempre se aplican antes; joins se materializan ANTES de stages[0].
+
+Helpers `raw()`, `active()` garantizan `stages[0]` existe (lazy init en `ensure_stage0`).
+
+## Usado por
+
+- `data_table_cpp_viz` (UI render principal)
+- `compute_pipeline_cpp_core` (resuelve hasta active_stage)
+- `tql_emit_cpp_core` / `tql_apply_cpp_core` (Lua serializacion)
+- `tql_to_sql_cpp_core` → SQL DuckDB CTE chain
+- `apply_drill_step` / `undo_drill_step` / `drill_up`
@@ -0,0 +1,33 @@
+---
+name: TableInput
+lang: cpp
+domain: core
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct TableInput {
+      std::string               name;
+      std::vector<std::string>  headers;
+      std::vector<ColumnType>   types;
+      const char* const*        cells;
+      int                       rows;
+      int                       cols;
+  };
+description: "Tabla materializada en memoria pasada a data_table::render(). Owner externo. Multiple tables = main + joinables (fase 9 issue 0078)."
+tags: [tql, table, joins, mbql, product-type]
+uses_types: [Op_cpp_core]
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+`name` es el identificador estable que matchea `Join.source` cuando se aplica un join. `cells` es row-major (rows * cols `const char*`). Apuntadores estables durante todo el frame de render.
+
+Cells son strings — auto_detect_type infiere ColumnType si `types[i] == Auto`. Numericos se parsean por celda en compare/agg via `parse_number()`.
+
+## Usado por
+
+- `data_table_cpp_viz::render(tables, state)`
+- `resolve_main_idx` (matchea state.main_source)
+- `join_tables_cpp_core` (right table)
+- `tql_to_sql_cpp_core` (schema para emitir SELECT FROM `name`)
@@ -0,0 +1,29 @@
+---
+name: ViewConfig
+lang: cpp
+domain: viz
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct ViewConfig {
+      std::string                 x_col;
+      std::vector<std::string>    y_cols;
+      std::string                 size_col;
+      std::string                 cat_col;
+      unsigned int                primary_color;
+      int                         hist_bins;
+      float                       pie_radius;
+      bool                        show_legend;
+      bool                        show_markers;
+      bool                        locked;
+      mutable bool                fit_request;
+  };
+description: "Overrides manuales de auto-detect para ViewMode. Cols vacias dejan al dispatcher elegir. primary_color=0 usa palette ImPlot."
+tags: [tql, viz, config, product-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+`fit_request` mutable bool consumido por `viz::render` (one-shot trigger para `ImPlot::SetNextAxesToFit`). `locked` deshabilita pan/zoom del usuario.
@@ -0,0 +1,29 @@
+---
+name: ViewMode
+lang: cpp
+domain: viz
+version: "1.0.0"
+algebraic: sum
+definition: |
+  enum class ViewMode {
+      Table,
+      Bar, Column, GroupedBar, StackedBar,
+      Line, Area, Stairs,
+      Scatter, Bubble,
+      Histogram, Histogram2D, Heatmap, BoxPlot,
+      Stem, ErrorBars,
+      Pie, Donut, Funnel, Waterfall,
+      KPI, KPIGrid,
+      Candlestick, Radar
+  };
+description: "Modo de visualizacion ImPlot del stage activo. ~25 variantes cubriendo bars/lines/distribution/composition/specialized. Dispatcher en viz::render."
+tags: [tql, viz, imgui, implot, sum-type]
+uses_types: []
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+Tokens lowercase via `view_mode_token`/`view_mode_from_token` para TQL emit/apply. Helpers `view_mode_needs_numeric/category/aggregation` guían UI (combo selectable solo si schema compatible).
+
+`Table` siempre disponible (fallback render por defecto). Demas requieren al menos cols compatibles. Click-to-drill (fase 10): Bar/Column/Scatter/Bubble/Pie/Donut/Funnel/Heatmap.
@@ -0,0 +1,21 @@
+---
+name: VizPanel
+lang: cpp
+domain: viz
+version: "1.0.0"
+algebraic: product
+definition: |
+  struct VizPanel {
+      ViewMode    display;
+      ViewConfig  config;
+      mutable ViewMode last_non_table;
+  };
+description: "Viz adicional sobre el mismo StageOutput. State tiene panel principal (display+viz_config) + vector<VizPanel> extras."
+tags: [tql, viz, panel, product-type]
+uses_types: [ViewMode_cpp_viz, ViewConfig_cpp_viz]
+file_path: "cpp/functions/core/data_table_types.h"
+---
+
+## Notas
+
+`last_non_table` memoria del ultimo display !=Table para toggle Table↔View rapido en UI. Mutable porque se actualiza durante render (no rompe const correctness).
@@ -1,9 +1,10 @@
 ---
 id: 0078
 title: tables playground — joins MBQL-style (fase 9)
-status: pending
+status: done
 priority: medium
 created: 2026-05-12
+closed: 2026-05-12
 related_components: [cpp/apps/primitives_gallery/playground/tables, lua_engine, tql]
 ---

@@ -1,9 +1,10 @@
 ---
 id: 0079
 title: tables playground — drill-through extendido (fase 10)
-status: pending
+status: done
 priority: medium
 created: 2026-05-12
+closed: 2026-05-12
 related_components: [cpp/apps/primitives_gallery/playground/tables]
 ---

@@ -1,77 +1,238 @@
 ---
 id: 0080
-title: tables playground — LLM API "Ask AI" (fase 11)
-status: pending
+title: tables playground — LLM "Ask AI" + TQL/SQL emit (fase 11)
+status: partial
 priority: medium
 created: 2026-05-12
-related_components: [cpp/apps/primitives_gallery/playground/tables]
+updated: 2026-05-13
+notes: pure layer + LLM client + Ask AI modal DONE. DuckDB adapter v2 (opcional, build flag FN_TQL_DUCKDB=1)
+related_components: [cpp/apps/primitives_gallery/playground/tables, lua_engine, tql, duckdb]
 ---

 ## Contexto

-Fase 11 del roadmap del tables playground. El user escribe en lenguaje natural
-una pregunta sobre los datos ("show me top 10 langs by total size"). El LLM
-recibe el TQL actual + schema + pregunta, devuelve nuevo TQL. App aplica via
-`tql::apply` y renderiza.
+Fase 11 del roadmap del tables playground. Dos capacidades que se construyen juntas porque comparten infra (prompt schema, runtime adapter, tests round-trip):
+
+1. **LLM "Ask AI"** — usuario o agente pregunta en lenguaje natural, modelo devuelve un nuevo TQL (o SQL DuckDB si esta linkado).
+2. **TQL → SQL (DuckDB) emitter** — permite a agentes escribir SQL contra el mismo modelo de datos. Ejecutable si la app linkó DuckDB; si no, solo emite el string.
+
+Diseño one-way: **TQL → SQL si**, **SQL → TQL no**. Razon documentada en investigacion Metabase MBQL ↔ SQL: la traduccion inversa es lossy (CTEs, window fns, set ops, lateral, correlated subqueries no caben en MBQL/TQL). Patron canonico Malloy/Cube/LookML/Metabase = compile-down one-way.

 ## Cambios

-### 1. UI
+### 1. UI "Ask AI"

 - Boton "Ask AI" en toolbar (al lado de "+ Viz").
- Modal con:
+- Modal:
  - InputText multiline para la pregunta.
-  - Boton "Send" + spinner durante la llamada.
-  - Diff side-by-side: TQL actual vs TQL propuesto (texto con highlight).
+  - Toggle output mode: `TQL` (default) | `SQL (DuckDB)` (visible solo si app fue compilada con `FN_TQL_DUCKDB=1`).
+  - Boton "Send" + spinner.
+  - Diff side-by-side: actual vs propuesto (texto highlight).
  - Botones "Apply" / "Reject" / "Edit before apply".

 ### 2. Backend LLM

- Provider: Anthropic Claude (API key desde `pass anthropic/api-key`).
- Endpoint: `https://api.anthropic.com/v1/messages`.
- Model: `claude-sonnet-4-6` por defecto. Configurable via env `FN_LLM_MODEL`.
- Cliente HTTP: cURL via popen (sin deps nuevas) o libcurl si ya esta linkada.
+- Provider: Anthropic Claude. API key via `pass anthropic/api-key`.
+- Endpoint: `https://api.anthropic.com/v1/messages`. Model: `claude-sonnet-4-6`. Override env `FN_LLM_MODEL`.
+- Cliente HTTP: cURL via popen (sin deps nuevas).
 - Prompt template incluye:
  - Esquema TQL (de `docs/TQL.md`).
+  - **Si SQL mode**: dialecto DuckDB + funciones DuckDB relevantes (date_trunc, regexp_replace, etc.).
  - Cols disponibles del stage 0 (name, type) + cols joinables.
+  - **Grammar Lua subset** (ver §4) cuando aplique.
  - Funciones Lua disponibles (de `lua_engine`).
  - TQL actual.
  - Pregunta del user.
- Response: extraer ```lua``` block del markdown, strip prose.
+- Response: extraer ```lua``` (TQL) o ```sql``` block del markdown, strip prose.

-### 3. Validacion + safety
+### 3. TQL → SQL DuckDB emitter

- Antes de aplicar: `tql::apply` con dry-run (parsea sin mutar State). Si fail, mostrar error + boton "Ask AI again with this error".
- Lua sandbox ya cubre side effects en formulas — el TQL en si es declarativo, no ejecuta nada peligroso.
+Nuevo modulo `tql_to_sql.{h,cpp}` (pure). Funciones:

-### 4. Streaming
+```cpp
+struct SqlEmit {
+    std::string sql;            // SELECT ... statement
+    std::vector<std::string> params;  // bound values (?-placeholders)
+    std::vector<std::string> warnings;
+    std::string error;          // si emit fallo (subset out of bounds)
+};

- Stream tokens via SSE (`stream=true` en Anthropic API).
- Mostrar texto en vivo en el modal.
- Cuando termina, parsear lua block final.
+// Pure: emite SQL DuckDB equivalente a la pipeline State (stages 0..active).
+// `tables` provee el schema de cada TableInput (no los cells — el caller
+// decide como hidratar las tablas en DuckDB).
+SqlEmit emit_sql(const State& state, const std::vector<TableInput>& tables,
+                  int up_to_stage = -1 /* default = active_stage */);
+```

-### 5. Persistencia conversation
+Mapeo MBQL-style:
+- Stage 0 = CTE base `t0` con `SELECT cols + derived FROM main_t [LEFT/INNER/RIGHT/FULL JOIN joinables ON ...]`.
+- Stage N = CTE `tN` con `SELECT breakouts, aggregations FROM tN-1 [WHERE filters] [GROUP BY breakouts] [ORDER BY sorts]`.
+- Final query `SELECT * FROM t<active>`.

- UiState guarda lista de turns (pregunta + TQL propuesto + resultado apply).
- "Ask AI" siguiente turn incluye history previa.
- Boton "Reset chat" limpia.
- NO persistido en TQL (es UI state).
+Stage emit detalle:
+- `filter Op::Eq col = "v"` → `WHERE col = ?` con `params.push_back(v)` (DuckDB acepta `$1`/`?`).
+- `breakout "ts:month"` → `date_trunc('month', ts) AS "ts:month"`. Granularity sufijo → DuckDB `date_trunc`.
+- `aggregation count` → `COUNT(*) AS count`.
+- `aggregation p95(col)` → `quantile_cont(col, 0.95) AS p95_col`.
+- `aggregation distinct col` → `COUNT(DISTINCT col) AS distinct_col`.
+- `sort {desc, col}` → `ORDER BY col DESC`.
+- Joins: 4 strategies mapean directo a `LEFT/INNER/RIGHT/FULL JOIN ... ON l.k = r.k`.
+- Derived cols: transpiladas via Lua subset (§4). Si formula fuera de subset → `SqlEmit.error = "lua formula 'X' out of subset: <razon>"`.

-### 6. Coste / rate limit
+Salida es **string SQL valido DuckDB**. No ejecuta — eso es responsabilidad del adapter opcional (§5).
+
+### 4. Lua subset transpilable a SQL — GRAMATICA
+
+Documentar en `docs/TQL.md` seccion nueva "SQL transpile subset".
+
+**Reglas duras: Lua sigue siendo potente y sin limites en runtime general.** El subset solo aplica si el caller pide `tql_to_sql::emit_sql()`. Fuera del subset → error claro en tiempo de emit, NO en tiempo de eval. El playground sigue ejecutando Lua arbitrario sin restriccion.
+
+**Subset permitido (transpila a SQL):**
+
+| Lua | SQL DuckDB |
+|---|---|
+| Literales: numero, string `"x"`, bool `true/false`, `nil` | `1.5`, `'x'`, `TRUE/FALSE`, `NULL` |
+| Col ref: `[colname]` | `colname` (identifier quoted si necesario) |
+| Aritmetica: `+ - * / % - (unary)` | mismas |
+| Comparacion: `== ~= < <= > >=` | `= <> < <= > >=` |
+| Logica: `and or not` | `AND OR NOT` |
+| String concat: `..` | `\|\|` |
+| Ternary: `if A then B else C end` | `CASE WHEN A THEN B ELSE C END` |
+| Ternary inline: `(A and B) or C` (pattern comun Lua) | `CASE WHEN A THEN B ELSE C END` |
+| `math.floor/ceil/abs/round/sqrt/sin/cos/log` | `floor/ceiling/abs/round/sqrt/sin/cos/ln` |
+| `math.min(a,b)/max(a,b)` | `least(a,b)/greatest(a,b)` |
+| `string.upper/lower/len(s)` | `upper(s)/lower(s)/length(s)` |
+| `string.sub(s, i, j)` | `substring(s, i, j-i+1)` |
+| `tostring(x)/tonumber(x)` | `CAST(x AS VARCHAR)/CAST(x AS DOUBLE)` |
+| Paréntesis y precedencia | mismas |
+
+**Fuera de subset (error compile-time):**
+
+- Closures: `function() ... end`
+- Loops: `for/while/repeat`
+- Locals: `local x = ...`
+- Tables: `{...}`, `t[k]`, `t.field`, `table.*`
+- Multi-return / vararg
+- `string.gsub/find/match/format` (mapeo manual posible v2)
+- IO: `io.*`, `os.*`, `print`
+- Coroutines, metatables, debug
+- Recursion, multi-statement bodies
+
+**Error message ejemplo:**
+
+```
+SQL transpile error en derived col 'fullname':
+  formula = "[first] .. ' ' .. table.concat(parts, ',')"
+  causa: 'table.concat' no esta en SQL transpile subset
+  ver docs/TQL.md#sql-transpile-subset
+  workaround: usar TQL puro (sin SQL emit) o reescribir formula con `..`
+```
+
+**Helper:** `tql_to_sql::is_transpilable(formula, error_out)` pure fn que valida una formula sin emitir.
+
+### 5. DuckDB adapter (opcional)
+
+Build flag `FN_TQL_DUCKDB=1` en `cpp/CMakeLists.txt` opta-in. Vendor DuckDB header-only o lib (depende de tamaño). Default OFF — playground sigue compilando sin DuckDB.
+
+API adapter:
+
+```cpp
+namespace tql_duckdb {
+struct Result {
+    StageOutput out;        // materializado como TableInput compatible
+    std::string error;
+    double duration_ms = 0;
+};
+// Hidrata `tables` como views temp + ejecuta sql + materializa resultado.
+Result execute(const std::string& sql,
+                const std::vector<std::string>& params,
+                const std::vector<TableInput>& tables);
+}
+```
+
+Apps que lo usen (registry_dashboard, sqlite_api): linkean DuckDB + invocan adapter cuando user/agent pide SQL output. Playground por defecto NO linka — `Ask AI` solo ofrece SQL mode si `#ifdef FN_TQL_DUCKDB`.
+
+### 6. Validacion + safety
+
+- Antes de aplicar TQL del LLM: `tql::apply` dry-run. Si fail, mostrar error + "Ask AI again with this error".
+- Antes de ejecutar SQL del LLM: parsing DuckDB en sandbox read-only (DuckDB connection sin `INSERT/UPDATE/DELETE/DROP`, attach read-only).
+- Lua sandbox ya cubre side effects en formulas TQL.
+
+### 7. Streaming
+
+- Stream tokens via SSE (`stream=true` Anthropic).
+- Texto en vivo en modal.
+- Cuando termina, parse lua/sql block final.
+
+### 8. Persistencia conversacion
+
+- UiState guarda lista de turns (pregunta + output propuesto + apply result + engine usado TQL/SQL).
+- Siguiente "Ask AI" turn incluye history previa.
+- Boton "Reset chat".
+- NO persistido en TQL (UI state efimero).
+
+### 9. Coste / rate limit

 - Mostrar tokens estimados antes de enviar (rough char count / 4).
 - Cap input a 8000 tokens.
- Error handling: 429 / 5xx -> mensaje + reintentar.
+- Error handling: 429 / 5xx → mensaje + reintentar.

 ## Tests

- Mockear HTTP response con cURL stub.
- Test: prompt build incluye schema + TQL + pregunta en formato esperado.
- Test: response parse extrae lua block correctamente.
- Test: tql::apply sobre output del LLM funciona end-to-end con dataset sintetico.
+### Pure (sin red, sin DuckDB linkado)
+
+- **Lua subset validator:** `is_transpilable` true para casos subset, false con error claro para fuera de subset (closures, loops, table.*, string.gsub, etc.).
+- **TQL → SQL emit golden tests** (~20 casos):
+  - stage 0 simple filter + sort → `SELECT ... WHERE ... ORDER BY ...`
+  - stage 1 group + count → CTE chain con GROUP BY
+  - granularity sufijo `:month` → `date_trunc('month', ts)`
+  - join 4 strategies con multi-key
+  - derived cols subset → CASE/expressions
+  - derived cols fuera subset → `SqlEmit.error` no vacio + warning
+  - aggregation p25/p50/p75/p99 → `quantile_cont(col, p)`
+  - empty pipeline → `SELECT * FROM t0`
+- **TQL parseo:** prompt build incluye schema + TQL + pregunta en formato esperado (mockear HTTP).
+- **Response parse:** extrae lua/sql block correctamente.
+
+### Round-trip (requiere DuckDB linkado)
+
+Solo corren si `FN_TQL_DUCKDB=1`:
+- TQL → emit SQL → ejecutar DuckDB → resultado coincide bit-a-bit con `compute_stage` pure sobre los mismos cells.
+- Casos: filter, group+agg, join inner, multi-stage chain, breakout granularity month/week, derived col `[a] + [b] * 2`.
+
+### LLM (red real, opt-in)
+
+- Test integration con `ANTHROPIC_API_KEY` real (`make test-llm`): pregunta simple → recibe TQL valido → apply OK.
+- Mock test (CI): cURL stub responde con JSON predefinido → parser extrae bloque OK.

 ## No-objetivos

- Generacion de visualizaciones nuevas via LLM (la viz la elige TQL `display`, suficiente).
- Acciones del LLM mas alla de modificar TQL (sin acceso a I/O del sistema).
- Multi-provider (OpenAI / local) — fase futura. Hardcode Anthropic primero.
+- **SQL → TQL**: no se implementa. Documentado en doc + en mensajes de error del Ask AI ("no soportamos SQL como input, use TQL").
+- **Multi-provider** (OpenAI, local): fase futura. Anthropic hardcoded v1.
+- **Generacion de viz desde LLM** mas alla de `display` token: la viz la elige TQL existente.
+- **Lua subset extension** (string.gsub, regex, table.*): postpone v2 si demanda real.
+- **DuckDB write ops**: solo SELECT/CTE. Apps que quieran INSERT/UPDATE lo hacen fuera del playground.
+
+## Flujo agente (resumen)
+
+```
+Agente -> "muestrame top 10 langs por total size"
+LLM (TQL default) -> emite TQL { stages = {...} }
+tql::apply -> State + dry-run OK
+User clickea Apply -> compute_stage en memoria
+
+Agente -> "lo mismo pero como SQL"
+[Si FN_TQL_DUCKDB=1 y app linkó adapter]
+LLM (SQL mode toggled) -> emite SELECT ... DuckDB
+duckdb::execute(sql, params, tables) -> resultado materializado
+[Si NO linkado] -> error "SQL mode requiere DuckDB. Compila con FN_TQL_DUCKDB=1"
+```
+
+## Riesgos
+
+- **Subset Lua restrictivo en SQL emit**: usuarios usan Lua arbitrario en playground → al pedir SQL falla. Mitigacion: error message claro + sugerencia workaround.
+- **DuckDB tamaño**: lib ~10MB. Solo se paga si app opta-in con build flag.
+- **Dialect drift DuckDB**: funciones SQL pueden cambiar entre versiones. Pinear DuckDB version en CMake.
+- **LLM hallucinations**: TQL invalido → dry-run rechaza con error. Loop "Ask AI again with this error" recupera.
+- **API key leak**: `pass` integration mantiene fuera del repo. Build flag NUNCA imprime key.
+- **Coste tokens**: prompt grande (schema + grammar + TQL). Cap input + warning visual.
@@ -496,3 +496,87 @@ StageOutput compute_stage(const char* const* in_cells, int in_rows, int in_cols,
 | Multi-sort drag-reorder | Phase 4 |

 Ver `cpp/apps/primitives_gallery/playground/tables/` para la implementacion del playground.
+
+---
+
+## SQL transpile subset (fase 11 — issue 0080)
+
+TQL emite SQL DuckDB equivalente para que agentes LLM puedan generar TQL o SQL contra los mismos datos. Modulo `tql_to_sql.{h,cpp}` provee `emit_sql(State, tables)`. Mapeo MBQL-style con CTE chain `t0..tN`.
+
+### Lua subset transpilable
+
+Lua sigue **potente y sin limites en runtime general** (formula eval en derived cols TQL puro). El subset SOLO aplica al pedir `tql_to_sql::emit_sql()`. Fuera del subset → error compile-time con causa concreta + workaround.
+
+**Permitido (transpila a SQL DuckDB):**
+
+| Lua | SQL DuckDB | Ejemplo |
+|---|---|---|
+| Literales numero/string/bool/nil | mismas (`'x'`, `TRUE`, `NULL`) | `42`, `"hola"`, `nil` |
+| Col ref: `[colname]` | `"colname"` (quoted) | `[size_kb]` → `"size_kb"` |
+| Aritmetica: `+ - * / % - (unary)` | mismas | `[a] + [b] * 2` → `("a" + ("b" * 2))` |
+| Comparacion: `== ~= < <= > >=` | `= <> < <= > >=` | `[n] >= 10` → `("n" >= 10)` |
+| Logica: `and or not` | `AND OR NOT` | `[a] and [b]` → `("a" AND "b")` |
+| String concat: `..` | `\|\|` | `[a] .. "_" .. [b]` → `("a" \|\| '_' \|\| "b")` |
+| Ternary: `if A then B else C end` | `CASE WHEN A THEN B ELSE C END` | obligatorio `else` |
+| `math.floor/ceil/abs/sqrt/sin/cos/log/exp` | `floor/ceiling/abs/sqrt/sin/cos/ln/exp` | `math.floor([x])` |
+| `math.min(a,b)/max(a,b)` | `least(a,b)/greatest(a,b)` | `math.min([a], 100)` |
+| `string.upper/lower/len(s)` | `upper(s)/lower(s)/length(s)` | `string.upper([name])` |
+| `string.sub(s, i [, j])` | `substring(s, i [, j-i+1])` | `string.sub([s], 1, 3)` |
+| `tostring(x)/tonumber(x)` | `CAST(x AS VARCHAR)/CAST(x AS DOUBLE)` | `tonumber([n])` |
+| Parentesis y precedencia Lua | mismas | `(a + b) * c` |
+
+**Fuera de subset (error compile-time):**
+
+- Closures: `function() ... end`
+- Loops: `for/while/repeat`
+- Locals: `local x = ...`
+- Tables: `{...}`, `t[k]`, `t.field`, `table.*`
+- Multi-return, vararg `...`
+- `string.gsub/find/match/format/byte/char/rep`
+- IO/OS/debug: `io.*`, `os.*`, `debug.*`, `package`, `require`, `print`
+- Coroutines, metatables, `pcall/xpcall`, `rawget/rawset`
+- Recursion, multi-statement bodies (`;`)
+- Length operator `#`
+- Method calls `:`
+- Ternary sin else: `if A then B end` (subset requiere ambas ramas)
+
+### Error message ejemplo
+
+```
+SQL transpile error en derived col 'fullname':
+  formula = "[first] .. ' ' .. string.gsub([last], 'X', 'Y')"
+  causa: function 'string.gsub' not in SQL transpile whitelist
+  ver docs/TQL.md#sql-transpile-subset
+  workaround: usar TQL puro (sin SQL emit) o reescribir formula
+```
+
+### Stage → SQL mapeo
+
+| TQL element | SQL DuckDB |
+|---|---|
+| Stage 0 Raw | CTE `t0 AS (SELECT cols+derived FROM main_t [JOIN ...] [WHERE filters] [ORDER BY sorts])` |
+| Stage N>=1 | CTE `tN AS (SELECT breakouts+aggs FROM tN-1 [GROUP BY ...] [ORDER BY ...])` |
+| breakout `"col"` | `"col"` |
+| breakout `"col:month"` | `date_trunc('month', "col")` |
+| breakout `"col:year/week/day/hour"` | `date_trunc('year/week/day/hour', "col")` |
+| Aggregation Count | `COUNT(*)` |
+| Aggregation Sum/Avg/Min/Max/Stddev | `SUM/AVG/MIN/MAX/STDDEV("col")` |
+| Aggregation Distinct | `COUNT(DISTINCT "col")` |
+| Aggregation Median/P25/P75/P90/P99 | `quantile_cont("col", p)` |
+| Aggregation Percentile p | `quantile_cont("col", p)` |
+| Filter Op::Eq/Neq/Gt/Gte/Lt/Lte | `"col" = ?` etc (params bound) |
+| Filter Op::Contains | `"col" LIKE '%v%'` (param `%v%`) |
+| Filter Op::StartsWith / EndsWith | `LIKE 'v%'` / `LIKE '%v'` |
+| Sort `{desc, "col"}` | `ORDER BY "col" DESC` |
+| Join Left/Inner/Right/Full | `LEFT/INNER/RIGHT/FULL OUTER JOIN ... ON ...` |
+| Join multi-key `on={{l1,r1},{l2,r2}}` | `ON l.l1 = r.r1 AND l.l2 = r.r2` |
+| Join fields | cols `alias.field AS "alias.field"` |
+| `main_source` | `FROM "main_source_name"` |
+
+### Doctrina (Metabase-style)
+
+- **One-way:** TQL → SQL OK. SQL → TQL no soportado. Razon: traduccion inversa lossy (CTEs, window fns, set ops, lateral, correlated subqueries no caben en TQL).
+- **Output:** SQL string siempre emitible. Ejecucion requiere DuckDB linkado (build flag `FN_TQL_DUCKDB=1`, opcional).
+- **Agente flow:** TQL default. SQL solo si app linko DuckDB. UI Ask AI muestra toggle SQL solo cuando disponible.
+
+Ver issue 0080 + `tql_to_sql.{h,cpp}` para implementacion.
@@ -0,0 +1,155 @@
+package core
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"sync"
+	"syscall"
+	"time"
+)
+
+// StreamEvent es una linea capturada de stdout o stderr del subproceso.
+type StreamEvent struct {
+	Stream string    // "stdout" | "stderr"
+	Line   string    // sin trailing newline
+	Time   time.Time // timestamp de recepcion
+}
+
+// StreamResult es el resultado final del subproceso, enviado por el canal de
+// resultados cuando ambos pipes han llegado a EOF y el proceso ha terminado.
+type StreamResult struct {
+	ExitCode   int
+	Err        error
+	DurationMs int64
+}
+
+// SubprocessStream lanza name con args como subproceso y retorna dos canales:
+//   - events: recibe StreamEvent (linea de stdout/stderr) hasta EOF de ambos pipes.
+//   - result: recibe exactamente un StreamResult cuando el proceso termina.
+//
+// env se concatena con os.Environ(). stdin puede ser nil.
+//
+// Cancelar ctx envia SIGTERM al proceso; si no termina en 2 segundos, SIGKILL.
+// El caller DEBE consumir events hasta que se cierre o cancelar ctx para evitar
+// bloquear las goroutines internas.
+func SubprocessStream(
+	ctx context.Context,
+	name string,
+	args []string,
+	env []string,
+	stdin io.Reader,
+) (<-chan StreamEvent, <-chan StreamResult) {
+	events := make(chan StreamEvent, 64)
+	results := make(chan StreamResult, 1)
+
+	go func() {
+		defer close(events)
+		defer close(results)
+
+		start := time.Now()
+
+		cmd := exec.CommandContext(ctx, name, args...)
+
+		// Entorno: base + extra
+		if len(env) > 0 {
+			cmd.Env = append(os.Environ(), env...)
+		}
+
+		if stdin != nil {
+			cmd.Stdin = stdin
+		}
+
+		// Process group propio para matar hijos al recibir SIGTERM/SIGKILL
+		cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+
+		stdoutPipe, err := cmd.StdoutPipe()
+		if err != nil {
+			results <- StreamResult{ExitCode: -1, Err: fmt.Errorf("stdout pipe: %w", err), DurationMs: 0}
+			return
+		}
+		stderrPipe, err := cmd.StderrPipe()
+		if err != nil {
+			results <- StreamResult{ExitCode: -1, Err: fmt.Errorf("stderr pipe: %w", err), DurationMs: 0}
+			return
+		}
+
+		if err := cmd.Start(); err != nil {
+			results <- StreamResult{ExitCode: -1, Err: fmt.Errorf("start: %w", err), DurationMs: 0}
+			return
+		}
+
+		// Goroutine de supervision de ctx: SIGTERM → grace 2s → SIGKILL
+		ctxDone := make(chan struct{})
+		go func() {
+			select {
+			case <-ctx.Done():
+				if cmd.Process != nil {
+					_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGTERM)
+					timer := time.NewTimer(2 * time.Second)
+					defer timer.Stop()
+					select {
+					case <-timer.C:
+						_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+					case <-ctxDone:
+					}
+				}
+			case <-ctxDone:
+			}
+		}()
+
+		send := func(stream, line string) {
+			ev := StreamEvent{Stream: stream, Line: line, Time: time.Now()}
+			select {
+			case events <- ev:
+			case <-ctx.Done():
+			}
+		}
+
+		// Leer stdout y stderr concurrentemente
+		const bufSize = 1024 * 1024 // 1 MB para lineas largas (sd-cli progress, etc.)
+		var wg sync.WaitGroup
+
+		scanPipe := func(r io.Reader, stream string) {
+			defer wg.Done()
+			sc := bufio.NewScanner(r)
+			sc.Buffer(make([]byte, bufSize), bufSize)
+			for sc.Scan() {
+				send(stream, sc.Text())
+			}
+		}
+
+		wg.Add(2)
+		go scanPipe(stdoutPipe, "stdout")
+		go scanPipe(stderrPipe, "stderr")
+
+		wg.Wait()
+		close(ctxDone) // señal al supervisor de ctx para que pare
+
+		exitCode := 0
+		var waitErr error
+		if err := cmd.Wait(); err != nil {
+			waitErr = err
+			if exitErr, ok := err.(*exec.ExitError); ok {
+				exitCode = exitErr.ExitCode()
+				waitErr = nil // exit code no-cero no es un error de spawn
+			}
+		}
+
+		// Si el contexto fue cancelado, reportar como error de cancelacion
+		if ctx.Err() != nil && waitErr == nil {
+			waitErr = ctx.Err()
+		}
+
+		results <- StreamResult{
+			ExitCode:   exitCode,
+			Err:        waitErr,
+			DurationMs: time.Since(start).Milliseconds(),
+		}
+	}()
+
+	return events, results
+}
@@ -0,0 +1,69 @@
+---
+name: subprocess_stream
+kind: function
+lang: go
+domain: core
+version: "1.0.0"
+purity: impure
+signature: "func SubprocessStream(ctx context.Context, name string, args []string, env []string, stdin io.Reader) (<-chan StreamEvent, <-chan StreamResult)"
+description: "Lanza un subproceso y retorna dos canales: uno con StreamEvent (linea de stdout/stderr con timestamp) y otro con un unico StreamResult (ExitCode, Err, DurationMs). Cancelar ctx envia SIGTERM al proceso; si no termina en 2s, SIGKILL."
+tags: [subprocess, exec, stream, stdout, stderr, process, concurrency, io, primitiva]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [bufio, context, fmt, io, os, os/exec, sync, syscall, time]
+params:
+  - name: ctx
+    desc: "Contexto de cancelacion. Al cancelar, el proceso recibe SIGTERM; si no muere en 2s, SIGKILL. Usar context.WithTimeout para acotar duracion maxima."
+  - name: name
+    desc: "Nombre o path del ejecutable a lanzar (ej. 'echo', '/usr/bin/python3')."
+  - name: args
+    desc: "Argumentos del proceso. Puede ser nil o vacio."
+  - name: env
+    desc: "Variables de entorno adicionales en formato 'KEY=VALUE'. Se concatenan con os.Environ(). Puede ser nil."
+  - name: stdin
+    desc: "Stdin del proceso. Puede ser nil si el proceso no necesita entrada."
+output: "Dos canales: events (<-chan StreamEvent) cerrado cuando ambos pipes EOF; result (<-chan StreamResult) con exactamente un valor cuando el proceso termina. El caller DEBE consumir events hasta cierre o cancelar ctx para evitar bloquear goroutines internas."
+tested: true
+tests:
+  - "echo stdout llega como evento y ExitCode 0"
+  - "stderr llega como evento con stream stderr"
+  - "exit code no-cero se reporta en StreamResult"
+  - "ctx cancelado termina el proceso"
+  - "multiples lineas stdout"
+test_file_path: "functions/core/subprocess_stream_test.go"
+file_path: "functions/core/subprocess_stream.go"
+---
+
+## Ejemplo
+
+```go
+ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+defer cancel()
+
+events, results := core.SubprocessStream(ctx, "grep", []string{"-rn", "TODO", "."}, nil, nil)
+
+for ev := range events {
+    switch ev.Stream {
+    case "stdout":
+        fmt.Println(ev.Line)
+    case "stderr":
+        fmt.Fprintln(os.Stderr, "[stderr]", ev.Line)
+    }
+}
+
+res := <-results
+if res.ExitCode != 0 || res.Err != nil {
+    log.Printf("grep exit=%d err=%v duration=%dms", res.ExitCode, res.Err, res.DurationMs)
+}
+```
+
+## Notas
+
+- El canal `events` tiene buffer de 64. Si el caller deja de consumir y el buffer se llena, las goroutinas internas se bloquean hasta que haya espacio o el ctx sea cancelado.
+- El scanner de cada pipe tiene un buffer de 1 MB para tolerar lineas muy largas (progreso de CLIs tipo sd-cli, barras ANSI largas).
+- Los structs `StreamEvent` y `StreamResult` se declaran en el mismo archivo para que el paquete `core` los exporte sin imports adicionales.
+- Generaliza el patron de `claude_stream_go_core` desacoplando el lanzamiento de subprocesos del protocolo especifico de claude (NDJSON/stream-json). `claude_stream_go_core` puede reimplementarse internamente usando esta funcion como primitiva.
+- `cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}` crea un process group propio; SIGTERM/SIGKILL se envian con `Kill(-pgid, sig)` para matar tambien los procesos hijo del hijo.
@@ -0,0 +1,132 @@
+package core
+
+import (
+	"context"
+	"testing"
+	"time"
+)
+
+func TestSubprocessStream(t *testing.T) {
+	t.Run("echo stdout llega como evento y ExitCode 0", func(t *testing.T) {
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+
+		events, results := SubprocessStream(ctx, "echo", []string{"hola"}, nil, nil)
+
+		var got []StreamEvent
+		for ev := range events {
+			got = append(got, ev)
+		}
+
+		res := <-results
+
+		if res.ExitCode != 0 {
+			t.Errorf("ExitCode = %d, want 0 (err: %v)", res.ExitCode, res.Err)
+		}
+		if res.Err != nil {
+			t.Errorf("unexpected Err: %v", res.Err)
+		}
+		if len(got) != 1 {
+			t.Fatalf("got %d events, want 1", len(got))
+		}
+		if got[0].Stream != "stdout" {
+			t.Errorf("Stream = %q, want %q", got[0].Stream, "stdout")
+		}
+		if got[0].Line != "hola" {
+			t.Errorf("Line = %q, want %q", got[0].Line, "hola")
+		}
+	})
+
+	t.Run("stderr llega como evento con stream stderr", func(t *testing.T) {
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+
+		// sh -c "echo msg >&2" escribe a stderr
+		events, results := SubprocessStream(ctx, "sh", []string{"-c", "echo error_msg >&2"}, nil, nil)
+
+		var got []StreamEvent
+		for ev := range events {
+			got = append(got, ev)
+		}
+		res := <-results
+
+		if res.ExitCode != 0 {
+			t.Errorf("ExitCode = %d, want 0", res.ExitCode)
+		}
+		if len(got) != 1 {
+			t.Fatalf("got %d events, want 1", len(got))
+		}
+		if got[0].Stream != "stderr" {
+			t.Errorf("Stream = %q, want %q", got[0].Stream, "stderr")
+		}
+		if got[0].Line != "error_msg" {
+			t.Errorf("Line = %q, want %q", got[0].Line, "error_msg")
+		}
+	})
+
+	t.Run("exit code no-cero se reporta en StreamResult", func(t *testing.T) {
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+
+		events, results := SubprocessStream(ctx, "sh", []string{"-c", "exit 42"}, nil, nil)
+
+		for range events {
+		}
+		res := <-results
+
+		if res.ExitCode != 42 {
+			t.Errorf("ExitCode = %d, want 42", res.ExitCode)
+		}
+		if res.Err != nil {
+			t.Errorf("unexpected Err: %v", res.Err)
+		}
+	})
+
+	t.Run("ctx cancelado termina el proceso", func(t *testing.T) {
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+
+		// proceso que dura mucho; cancelamos enseguida
+		ctxShort, cancelShort := context.WithTimeout(ctx, 100*time.Millisecond)
+		defer cancelShort()
+
+		events, results := SubprocessStream(ctxShort, "sleep", []string{"60"}, nil, nil)
+
+		for range events {
+		}
+		res := <-results
+
+		// Tras cancelacion el proceso debe haber terminado (ExitCode != 0 o Err de ctx)
+		if res.ExitCode == 0 && res.Err == nil {
+			t.Error("expected non-zero exit or ctx error after cancellation")
+		}
+		if res.DurationMs > 3000 {
+			t.Errorf("took %d ms, expected < 3000 (should have been killed)", res.DurationMs)
+		}
+	})
+
+	t.Run("multiples lineas stdout", func(t *testing.T) {
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+
+		events, results := SubprocessStream(ctx, "sh", []string{"-c", "printf 'a\nb\nc\n'"}, nil, nil)
+
+		var lines []string
+		for ev := range events {
+			if ev.Stream == "stdout" {
+				lines = append(lines, ev.Line)
+			}
+		}
+		<-results
+
+		if len(lines) != 3 {
+			t.Fatalf("got %d stdout lines, want 3: %v", len(lines), lines)
+		}
+		want := []string{"a", "b", "c"}
+		for i, w := range want {
+			if lines[i] != w {
+				t.Errorf("line[%d] = %q, want %q", i, lines[i], w)
+			}
+		}
+	})
+}
@@ -0,0 +1,238 @@
+package infra
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// MlEnvCheck holds the result of a single ML environment probe.
+type MlEnvCheck struct {
+	Name    string `json:"name"`              // e.g. "cuda_toolkit", "python_venv"
+	Status  string `json:"status"`            // "ok" | "missing" | "warning" | "unknown"
+	Version string `json:"version,omitempty"` // version string if detected
+	Detail  string `json:"detail,omitempty"`  // human-readable extra info
+}
+
+// MlEnvReport is the full ML environment audit result.
+type MlEnvReport struct {
+	Gpus        []GpuInfo    `json:"gpus"`
+	Checks      []MlEnvCheck `json:"checks"`
+	OverallOK   bool         `json:"overall_ok"`
+	GeneratedAt int64        `json:"generated_at"`
+}
+
+// AuditMlEnv probes the ML environment rooted at registryRoot.
+// It checks for NVIDIA drivers, CUDA toolkit, Python venv, key Python
+// packages and optional tools (sd, llama-cli) and a local vault path.
+// Returns a non-nil MlEnvReport even when individual checks fail; the
+// function itself only errors if a fundamental system call cannot be
+// attempted.
+func AuditMlEnv(registryRoot string) (MlEnvReport, error) {
+	report := MlEnvReport{
+		GeneratedAt: time.Now().Unix(),
+	}
+
+	// --- GPU detection (composes GetGpuInfo) ---
+	gpus, err := GetGpuInfo()
+	if err != nil {
+		// Non-fatal: record absence.
+		gpus = []GpuInfo{}
+	}
+	report.Gpus = gpus
+
+	checks := []MlEnvCheck{}
+
+	// --- nvidia-smi ---
+	checks = append(checks, probeCommand("nvidia_smi", "nvidia-smi", []string{"--version"}, 5))
+
+	// --- nvcc (CUDA toolkit compiler) ---
+	nvcc := probeNvcc()
+	checks = append(checks, nvcc)
+
+	// --- Python venv ---
+	venvCheck := probeVenv(registryRoot)
+	checks = append(checks, venvCheck)
+
+	// Python venv path for subsequent checks.
+	venvPy := filepath.Join(registryRoot, "python", ".venv", "bin", "python3")
+
+	// --- Python packages ---
+	for _, pkg := range []string{"torch", "diffusers", "transformers", "huggingface_hub", "stable_diffusion_cpp_python"} {
+		checks = append(checks, probePythonPackage(venvPy, pkg))
+	}
+
+	// --- sd.cpp CLI ---
+	checks = append(checks, probeCommand("sd_cli", "sd", []string{"--version"}, 5))
+
+	// --- llama.cpp CLI ---
+	checks = append(checks, probeCommand("llama_cpp", "llama-cli", []string{"--version"}, 5))
+
+	// --- imagegen_vault ---
+	checks = append(checks, probeImagegenVault())
+
+	report.Checks = checks
+
+	// OverallOK: no "missing" checks (warning is tolerated) and at least 1 GPU.
+	overallOK := len(gpus) > 0
+	for _, c := range checks {
+		if c.Status == "missing" {
+			// stable_diffusion_cpp_python and sd_cli are optional — downgrade to warning-only.
+			if c.Name == "stable_diffusion_cpp_python" || c.Name == "sd_cli" || c.Name == "llama_cpp" {
+				continue
+			}
+			overallOK = false
+		}
+	}
+	report.OverallOK = overallOK
+
+	return report, nil
+}
+
+// probeCommand checks whether a binary is available in PATH by running it with
+// the given args and recording any version output.
+func probeCommand(name, binary string, args []string, timeoutSec int) MlEnvCheck {
+	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSec)*time.Second)
+	defer cancel()
+
+	path, err := exec.LookPath(binary)
+	if err != nil {
+		return MlEnvCheck{Name: name, Status: "missing", Detail: fmt.Sprintf("%s not found in PATH", binary)}
+	}
+
+	out, err := exec.CommandContext(ctx, path, args...).CombinedOutput()
+	version := strings.TrimSpace(string(out))
+	if len(version) > 120 {
+		version = version[:120]
+	}
+	if err != nil {
+		return MlEnvCheck{Name: name, Status: "warning", Version: version, Detail: fmt.Sprintf("exit error: %v", err)}
+	}
+	return MlEnvCheck{Name: name, Status: "ok", Version: version}
+}
+
+// probeNvcc extracts the CUDA toolkit version from nvcc --version output.
+func probeNvcc() MlEnvCheck {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	path, err := exec.LookPath("nvcc")
+	if err != nil {
+		return MlEnvCheck{Name: "nvcc", Status: "missing", Detail: "nvcc not found in PATH (CUDA toolkit not installed)"}
+	}
+
+	out, err := exec.CommandContext(ctx, path, "--version").CombinedOutput()
+	if err != nil {
+		return MlEnvCheck{Name: "nvcc", Status: "warning", Detail: fmt.Sprintf("nvcc --version failed: %v", err)}
+	}
+
+	// Extract version from line like: "Cuda compilation tools, release 12.4, V12.4.99"
+	version := ""
+	for _, line := range strings.Split(string(out), "\n") {
+		if strings.Contains(line, "release") {
+			parts := strings.Split(line, ",")
+			for _, p := range parts {
+				p = strings.TrimSpace(p)
+				if strings.HasPrefix(p, "release") {
+					version = strings.TrimSpace(strings.TrimPrefix(p, "release"))
+					break
+				}
+			}
+			break
+		}
+	}
+	if version == "" {
+		version = strings.TrimSpace(string(out))
+		if len(version) > 80 {
+			version = version[:80]
+		}
+	}
+	return MlEnvCheck{Name: "nvcc", Status: "ok", Version: version}
+}
+
+// probeVenv checks that the Python venv exists and is functional.
+func probeVenv(registryRoot string) MlEnvCheck {
+	py := filepath.Join(registryRoot, "python", ".venv", "bin", "python3")
+	if _, err := os.Stat(py); os.IsNotExist(err) {
+		return MlEnvCheck{Name: "python_venv", Status: "missing", Detail: fmt.Sprintf("not found: %s", py)}
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	out, err := exec.CommandContext(ctx, py, "--version").CombinedOutput()
+	version := strings.TrimSpace(string(out))
+	if err != nil {
+		return MlEnvCheck{Name: "python_venv", Status: "warning", Version: version, Detail: fmt.Sprintf("python3 --version failed: %v", err)}
+	}
+	return MlEnvCheck{Name: "python_venv", Status: "ok", Version: version}
+}
+
+// probePythonPackage imports a package in the venv Python and extracts __version__.
+func probePythonPackage(venvPy, pkg string) MlEnvCheck {
+	// Map package name → import name (for packages with different import names).
+	importName := pkg
+	switch pkg {
+	case "stable_diffusion_cpp_python":
+		importName = "stable_diffusion_cpp"
+	case "huggingface_hub":
+		importName = "huggingface_hub"
+	}
+
+	// Check that the venv python binary exists first.
+	if _, err := os.Stat(venvPy); os.IsNotExist(err) {
+		return MlEnvCheck{Name: pkg, Status: "unknown", Detail: "python_venv not available"}
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	script := fmt.Sprintf("import %s; v = getattr(%s, '__version__', None); print(v or 'unknown')", importName, importName)
+	out, err := exec.CommandContext(ctx, venvPy, "-c", script).CombinedOutput()
+	output := strings.TrimSpace(string(out))
+
+	if err != nil {
+		// Module not found → missing; other errors → warning.
+		detail := output
+		if len(detail) > 200 {
+			detail = detail[:200]
+		}
+		if strings.Contains(output, "ModuleNotFoundError") || strings.Contains(output, "No module named") {
+			return MlEnvCheck{Name: pkg, Status: "missing", Detail: fmt.Sprintf("%s not installed", importName)}
+		}
+		return MlEnvCheck{Name: pkg, Status: "warning", Detail: detail}
+	}
+	return MlEnvCheck{Name: pkg, Status: "ok", Version: output}
+}
+
+// probeImagegenVault checks that ~/vaults/imagegen_models exists and lists subdirs.
+func probeImagegenVault() MlEnvCheck {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return MlEnvCheck{Name: "imagegen_vault", Status: "unknown", Detail: "cannot determine home directory"}
+	}
+	vaultPath := filepath.Join(home, "vaults", "imagegen_models")
+	entries, err := os.ReadDir(vaultPath)
+	if os.IsNotExist(err) {
+		return MlEnvCheck{Name: "imagegen_vault", Status: "missing", Detail: fmt.Sprintf("vault not found: %s", vaultPath)}
+	}
+	if err != nil {
+		return MlEnvCheck{Name: "imagegen_vault", Status: "warning", Detail: fmt.Sprintf("cannot read vault: %v", err)}
+	}
+
+	subdirs := []string{}
+	for _, e := range entries {
+		if e.IsDir() {
+			subdirs = append(subdirs, e.Name())
+		}
+	}
+	detail := fmt.Sprintf("subdirs: %s", strings.Join(subdirs, ", "))
+	if len(subdirs) == 0 {
+		detail = "vault exists but is empty"
+	}
+	return MlEnvCheck{Name: "imagegen_vault", Status: "ok", Detail: detail}
+}
@@ -0,0 +1,67 @@
+---
+name: audit_ml_env
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func AuditMlEnv(registryRoot string) (MlEnvReport, error)"
+description: "Audita el entorno ML del sistema: GPUs NVIDIA, toolkit CUDA, venv Python, paquetes clave (torch, diffusers, transformers, huggingface_hub), herramientas CLI (sd, llama-cli) y el vault de modelos. Retorna un MlEnvReport con OverallOK=true solo si hay al menos 1 GPU y los checks criticos estan en ok/warning."
+tags: [ml, cuda, gpu, nvidia, audit, doctor, infra, torch, diffusers]
+uses_functions: [get_gpu_info_go_infra]
+uses_types: [gpu_info_go_infra]
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [context, fmt, os, os/exec, path/filepath, strings, time]
+tested: true
+tests:
+  - "report no nil y tiene checks"
+  - "generated_at es positivo"
+  - "checks tiene al menos 4 entradas"
+  - "gpus puede ser vacio en CI"
+test_file_path: "functions/infra/audit_ml_env_test.go"
+file_path: "functions/infra/audit_ml_env.go"
+params:
+  - name: registryRoot
+    desc: "Ruta absoluta a la raiz del fn_registry. Se usa para localizar python/.venv/bin/python3 y probar paquetes instalados."
+output: "MlEnvReport con Gpus (puede estar vacio si no hay NVIDIA), Checks con estado por herramienta/paquete, OverallOK y GeneratedAt (unix timestamp)."
+---
+
+## Checks realizados
+
+| Check | Tipo | Critico |
+|---|---|---|
+| `nvidia_smi` | binary in PATH | no (ok si hay GPU) |
+| `nvcc` | CUDA toolkit version | no |
+| `python_venv` | exists + `python3 --version` | si |
+| `torch` | `import torch; __version__` | si |
+| `diffusers` | `import diffusers; __version__` | si |
+| `transformers` | `import transformers; __version__` | si |
+| `huggingface_hub` | `import huggingface_hub; __version__` | si |
+| `stable_diffusion_cpp_python` | `import stable_diffusion_cpp` | no (opcional) |
+| `sd_cli` | `sd --version` in PATH | no (opcional) |
+| `llama_cpp` | `llama-cli --version` in PATH | no (opcional) |
+| `imagegen_vault` | `~/vaults/imagegen_models` exists | no |
+
+## Ejemplo
+
+```go
+root := "/home/lucas/fn_registry"
+report, err := AuditMlEnv(root)
+if err != nil {
+    log.Fatal(err)
+}
+for _, c := range report.Checks {
+    fmt.Printf("%-40s %s  %s\n", c.Name, c.Status, c.Version)
+}
+fmt.Printf("OverallOK: %v\n", report.OverallOK)
+```
+
+## Notas
+
+- Cada check tiene timeout de 5 segundos para no bloquear en entornos sin GPU.
+- `stable_diffusion_cpp_python`, `sd_cli` y `llama_cpp` son opcionales: si estan missing, `OverallOK` no se ve afectado.
+- `OverallOK` requiere al menos 1 GPU NVIDIA detectada via `GetGpuInfo()`.
+- No escribe nada en disco. Read-only.
+- Se expone como `fn doctor ml` via cmd/fn/doctor.go.
@@ -0,0 +1,53 @@
+package infra
+
+import (
+	"testing"
+)
+
+func TestAuditMlEnv(t *testing.T) {
+	// Use the actual registry root relative to the test binary location.
+	// Tests run from the package directory; go up two levels.
+	registryRoot := "../.."
+
+	t.Run("report no nil y tiene checks", func(t *testing.T) {
+		report, err := AuditMlEnv(registryRoot)
+		if err != nil {
+			t.Fatalf("AuditMlEnv returned error: %v", err)
+		}
+		if report.Checks == nil {
+			t.Fatal("report.Checks is nil")
+		}
+	})
+
+	t.Run("generated_at es positivo", func(t *testing.T) {
+		report, err := AuditMlEnv(registryRoot)
+		if err != nil {
+			t.Fatalf("AuditMlEnv returned error: %v", err)
+		}
+		if report.GeneratedAt <= 0 {
+			t.Errorf("GeneratedAt should be positive unix timestamp, got %d", report.GeneratedAt)
+		}
+	})
+
+	t.Run("checks tiene al menos 4 entradas", func(t *testing.T) {
+		report, err := AuditMlEnv(registryRoot)
+		if err != nil {
+			t.Fatalf("AuditMlEnv returned error: %v", err)
+		}
+		if len(report.Checks) < 4 {
+			t.Errorf("expected at least 4 checks, got %d", len(report.Checks))
+		}
+	})
+
+	t.Run("gpus puede ser vacio en CI", func(t *testing.T) {
+		report, err := AuditMlEnv(registryRoot)
+		if err != nil {
+			t.Fatalf("AuditMlEnv returned error: %v", err)
+		}
+		// Gpus may be empty in CI without a GPU; that's OK.
+		// Just verify the field is not nil.
+		if report.Gpus == nil {
+			t.Error("report.Gpus should be a non-nil slice (can be empty)")
+		}
+	})
+}
@@ -0,0 +1,60 @@
+package infra
+
+import (
+	"encoding/csv"
+	"errors"
+	"fmt"
+	"os/exec"
+	"strconv"
+	"strings"
+)
+
+// GetGpuInfo queries NVIDIA GPUs via nvidia-smi and returns a slice of GpuInfo.
+// If nvidia-smi is not installed or no NVIDIA GPU is present, returns an empty
+// slice and a nil error (absence of NVIDIA hardware is not an error).
+func GetGpuInfo() ([]GpuInfo, error) {
+	out, err := exec.Command(
+		"nvidia-smi",
+		"--query-gpu=index,name,memory.total,memory.free,driver_version,cuda_version",
+		"--format=csv,noheader,nounits",
+	).Output()
+
+	if err != nil {
+		// nvidia-smi not installed or no NVIDIA device — not an error.
+		var exitErr *exec.ExitError
+		if errors.Is(err, exec.ErrNotFound) || errors.As(err, &exitErr) {
+			return []GpuInfo{}, nil
+		}
+		return nil, fmt.Errorf("gpu_info: nvidia-smi: %w", err)
+	}
+
+	r := csv.NewReader(strings.NewReader(strings.TrimSpace(string(out))))
+	r.TrimLeadingSpace = true
+
+	records, err := r.ReadAll()
+	if err != nil {
+		return nil, fmt.Errorf("gpu_info: parse csv: %w", err)
+	}
+
+	gpus := make([]GpuInfo, 0, len(records))
+	for _, rec := range records {
+		if len(rec) < 6 {
+			continue
+		}
+
+		idx, _ := strconv.Atoi(strings.TrimSpace(rec[0]))
+		totalMb, _ := strconv.Atoi(strings.TrimSpace(rec[2]))
+		freeMb, _ := strconv.Atoi(strings.TrimSpace(rec[3]))
+
+		gpus = append(gpus, GpuInfo{
+			Index:         idx,
+			Name:          strings.TrimSpace(rec[1]),
+			VramTotalMb:   totalMb,
+			VramFreeMb:    freeMb,
+			DriverVersion: strings.TrimSpace(rec[4]),
+			CudaVersion:   strings.TrimSpace(rec[5]),
+		})
+	}
+
+	return gpus, nil
+}
@@ -0,0 +1,70 @@
+---
+name: get_gpu_info
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func GetGpuInfo() ([]GpuInfo, error)"
+description: "Consulta GPUs NVIDIA via nvidia-smi y retorna un slice de GpuInfo con index, nombre, VRAM total/libre, driver y version CUDA. Si nvidia-smi no esta instalado o no hay GPU NVIDIA, retorna slice vacio y nil (ausencia de hardware no es error)."
+tags: [gpu, nvidia, cuda, hardware, infra, probe]
+uses_functions: []
+uses_types: ["gpu_info_go_infra"]
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [encoding/csv, errors, fmt, os/exec, strconv, strings]
+params:
+  - name: (ninguno)
+    desc: "No toma parametros. Lee el estado del sistema via nvidia-smi."
+output: "Slice de GpuInfo con una entrada por GPU detectada. Slice vacio si no hay GPUs NVIDIA o nvidia-smi no esta instalado. Error solo si nvidia-smi existe pero falla inesperadamente al parsear la salida CSV."
+tested: true
+tests:
+  - "retorna slice vacio y nil cuando no hay GPU NVIDIA"
+  - "linea GPU RTX 3080 tipica"
+  - "dos GPUs en el CSV"
+  - "CSV vacio retorna slice vacio"
+  - "linea con menos de 6 campos se ignora"
+  - "espacios extra en los valores se eliminan"
+  - "campos del struct GpuInfo correctos"
+test_file_path: "functions/infra/get_gpu_info_test.go"
+file_path: "functions/infra/get_gpu_info.go"
+---
+
+## Ejemplo
+
+```go
+gpus, err := GetGpuInfo()
+if err != nil {
+    log.Fatal(err)
+}
+if len(gpus) == 0 {
+    fmt.Println("No NVIDIA GPUs detected")
+} else {
+    for _, g := range gpus {
+        fmt.Printf("[%d] %s  VRAM: %d/%d MiB  Driver: %s  CUDA: %s\n",
+            g.Index, g.Name, g.VramFreeMb, g.VramTotalMb,
+            g.DriverVersion, g.CudaVersion)
+    }
+}
+```
+
+## Salida nvidia-smi
+
+Ejecuta:
+```
+nvidia-smi --query-gpu=index,name,memory.total,memory.free,driver_version,cuda_version --format=csv,noheader,nounits
+```
+
+Ejemplo de salida con una GPU:
+```
+0, NVIDIA GeForce RTX 3080, 10240, 8192, 550.54.15, 12.4
+```
+
+## Notas
+
+- Requiere `nvidia-smi` en PATH (parte del driver NVIDIA).
+- La columna `cuda_version` en nvidia-smi refleja la version maxima de CUDA soportada por el driver, no la del toolkit instalado.
+- Para comprobar el toolkit CUDA instalado, usar `cuda_toolkit_check_bash_infra`.
+- En maquinas sin GPU NVIDIA retorna `([]GpuInfo{}, nil)` — el caller puede tratar esto como "sin GPU disponible".
+- No ejecutar tests automatizados para esta funcion en CI sin GPU; verificar manualmente o con mock.
@@ -0,0 +1,165 @@
+package infra
+
+import (
+	"strconv"
+	"strings"
+	"testing"
+)
+
+// TestGetGpuInfoNoGpu verifica que la funcion retorna slice vacio sin error
+// cuando nvidia-smi no esta instalado o no hay GPU NVIDIA presente.
+// Este test pasa en cualquier maquina, con o sin GPU.
+func TestGetGpuInfoNoGpu(t *testing.T) {
+	t.Run("retorna slice vacio y nil cuando no hay GPU NVIDIA", func(t *testing.T) {
+		gpus, err := GetGpuInfo()
+		if err != nil {
+			t.Errorf("GetGpuInfo() error inesperado: %v", err)
+		}
+		// En maquinas sin nvidia-smi el resultado debe ser un slice vacio (no nil)
+		if gpus == nil {
+			t.Error("GetGpuInfo() retorno nil, se esperaba slice vacio []GpuInfo{}")
+		}
+	})
+}
+
+// parseCsvNvidiaSmi replica la logica de parsing de GetGpuInfo para tests unitarios.
+// Recibe el output de nvidia-smi --format=csv,noheader,nounits y retorna []GpuInfo.
+func parseCsvNvidiaSmi(output string) ([]GpuInfo, error) {
+	trimmed := strings.TrimSpace(output)
+	if trimmed == "" {
+		return []GpuInfo{}, nil
+	}
+	lines := strings.Split(trimmed, "\n")
+	gpus := make([]GpuInfo, 0, len(lines))
+	for _, line := range lines {
+		parts := strings.Split(line, ",")
+		if len(parts) < 6 {
+			continue
+		}
+		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
+		totalMb, _ := strconv.Atoi(strings.TrimSpace(parts[2]))
+		freeMb, _ := strconv.Atoi(strings.TrimSpace(parts[3]))
+		gpus = append(gpus, GpuInfo{
+			Index:         idx,
+			Name:          strings.TrimSpace(parts[1]),
+			VramTotalMb:   totalMb,
+			VramFreeMb:    freeMb,
+			DriverVersion: strings.TrimSpace(parts[4]),
+			CudaVersion:   strings.TrimSpace(parts[5]),
+		})
+	}
+	return gpus, nil
+}
+
+// TestParseCsvNvidiaSmi verifica el parsing de la salida CSV de nvidia-smi
+// sin requerir GPU real ni nvidia-smi instalado.
+func TestParseCsvNvidiaSmi(t *testing.T) {
+	tests := []struct {
+		name          string
+		csvInput      string
+		wantLen       int
+		wantIndex     int
+		wantName      string
+		wantVramTotal int
+		wantVramFree  int
+		wantDriver    string
+		wantCuda      string
+	}{
+		{
+			name:          "linea GPU RTX 3080 tipica",
+			csvInput:      "0, NVIDIA GeForce RTX 3080, 10240, 8192, 550.54.15, 12.4",
+			wantLen:       1,
+			wantIndex:     0,
+			wantName:      "NVIDIA GeForce RTX 3080",
+			wantVramTotal: 10240,
+			wantVramFree:  8192,
+			wantDriver:    "550.54.15",
+			wantCuda:      "12.4",
+		},
+		{
+			name:     "dos GPUs en el CSV",
+			csvInput: "0, GPU A, 8192, 4096, 525.0, 12.0\n1, GPU B, 24576, 20000, 525.0, 12.0",
+			wantLen:  2,
+		},
+		{
+			name:     "CSV vacio retorna slice vacio",
+			csvInput: "",
+			wantLen:  0,
+		},
+		{
+			name:     "linea con menos de 6 campos se ignora",
+			csvInput: "0, GPU, 8192",
+			wantLen:  0,
+		},
+		{
+			name:          "espacios extra en los valores se eliminan",
+			csvInput:      " 1 ,  NVIDIA RTX 4090 ,  24576 ,  20000 ,  545.0 ,  12.6 ",
+			wantLen:       1,
+			wantIndex:     1,
+			wantName:      "NVIDIA RTX 4090",
+			wantVramTotal: 24576,
+			wantVramFree:  20000,
+			wantDriver:    "545.0",
+			wantCuda:      "12.6",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			gpus, err := parseCsvNvidiaSmi(tc.csvInput)
+			if err != nil {
+				t.Fatalf("error inesperado: %v", err)
+			}
+			if len(gpus) != tc.wantLen {
+				t.Fatalf("len(gpus) = %d, quería %d", len(gpus), tc.wantLen)
+			}
+			if tc.wantLen == 1 {
+				g := gpus[0]
+				if g.Index != tc.wantIndex {
+					t.Errorf("Index = %d, quería %d", g.Index, tc.wantIndex)
+				}
+				if g.Name != tc.wantName {
+					t.Errorf("Name = %q, quería %q", g.Name, tc.wantName)
+				}
+				if g.VramTotalMb != tc.wantVramTotal {
+					t.Errorf("VramTotalMb = %d, quería %d", g.VramTotalMb, tc.wantVramTotal)
+				}
+				if g.VramFreeMb != tc.wantVramFree {
+					t.Errorf("VramFreeMb = %d, quería %d", g.VramFreeMb, tc.wantVramFree)
+				}
+				if g.DriverVersion != tc.wantDriver {
+					t.Errorf("DriverVersion = %q, quería %q", g.DriverVersion, tc.wantDriver)
+				}
+				if g.CudaVersion != tc.wantCuda {
+					t.Errorf("CudaVersion = %q, quería %q", g.CudaVersion, tc.wantCuda)
+				}
+			}
+		})
+	}
+}
+
+// TestGpuInfoStruct verifica los campos del tipo GpuInfo.
+func TestGpuInfoStruct(t *testing.T) {
+	t.Run("campos del struct GpuInfo correctos", func(t *testing.T) {
+		g := GpuInfo{
+			Index:         0,
+			Name:          "NVIDIA GeForce GTX 1080",
+			VramTotalMb:   8192,
+			VramFreeMb:    6144,
+			DriverVersion: "470.0",
+			CudaVersion:   "11.4",
+		}
+		if g.Index != 0 {
+			t.Errorf("Index = %d", g.Index)
+		}
+		if g.Name != "NVIDIA GeForce GTX 1080" {
+			t.Errorf("Name = %q", g.Name)
+		}
+		if g.VramTotalMb != 8192 {
+			t.Errorf("VramTotalMb = %d", g.VramTotalMb)
+		}
+		if g.VramFreeMb != 6144 {
+			t.Errorf("VramFreeMb = %d", g.VramFreeMb)
+		}
+	})
+}
@@ -0,0 +1,12 @@
+package infra
+
+// GpuInfo describe una GPU detectada en el sistema con sus capacidades de VRAM
+// y versiones de driver y CUDA.
+type GpuInfo struct {
+	Index         int    `json:"index"`
+	Name          string `json:"name"`
+	VramTotalMb   int    `json:"vram_total_mb"`
+	VramFreeMb    int    `json:"vram_free_mb"`
+	DriverVersion string `json:"driver_version"`
+	CudaVersion   string `json:"cuda_version,omitempty"`
+}
@@ -0,0 +1,171 @@
+package infra
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+// AggregateReport summarises the result of a VaultAggregateIndex run.
+type AggregateReport struct {
+	VaultsProcessed int
+	VaultsSkipped   int      // vaults without a vault_index.db
+	TotalFiles      int
+	Errors          []string // non-fatal per-vault errors
+}
+
+// VaultAggregateIndex reads all vault manifests from repoRoot, opens each
+// vault_index.db and copies all file records into the central registry.db
+// vault_files table. The table is created if it does not exist (idempotent).
+//
+// For each vault the previous rows are deleted and replaced atomically, so
+// re-running always produces a clean, non-duplicated state.
+//
+// Returns an AggregateReport with counts. Per-vault errors are non-fatal
+// (logged in report.Errors); only fatal errors (e.g. registry.db
+// unreachable) are returned as the error value.
+func VaultAggregateIndex(repoRoot string) (AggregateReport, error) {
+	var report AggregateReport
+
+	// 1. Open registry.db
+	registryDB, err := SQLiteOpen(filepath.Join(repoRoot, "registry.db"), "")
+	if err != nil {
+		return report, fmt.Errorf("vault_aggregate_index: open registry.db: %w", err)
+	}
+	defer registryDB.Close()
+
+	// 2. Idempotent schema migration
+	for _, stmt := range []string{
+		`CREATE TABLE IF NOT EXISTS vault_files (
+    vault_id    TEXT NOT NULL,
+    vault_name  TEXT NOT NULL,
+    rel_path    TEXT NOT NULL,
+    size        INTEGER NOT NULL,
+    mtime       INTEGER NOT NULL,
+    sha256      TEXT NOT NULL,
+    mime        TEXT NOT NULL DEFAULT '',
+    ext         TEXT NOT NULL DEFAULT '',
+    bucket      TEXT NOT NULL DEFAULT '',
+    sub_bucket  TEXT NOT NULL DEFAULT '',
+    indexed_at  INTEGER NOT NULL,
+    PRIMARY KEY (vault_id, rel_path)
+);`,
+		`CREATE INDEX IF NOT EXISTS idx_vault_files_sha256 ON vault_files(sha256);`,
+		`CREATE INDEX IF NOT EXISTS idx_vault_files_vault ON vault_files(vault_id);`,
+	} {
+		if _, err := registryDB.Exec(stmt); err != nil {
+			if !isIdempotentMigrationError(err) {
+				return report, fmt.Errorf("vault_aggregate_index: schema: %w", err)
+			}
+		}
+	}
+
+	// 3. Read manifest
+	entries, err := VaultManifestRead(repoRoot)
+	if err != nil {
+		return report, fmt.Errorf("vault_aggregate_index: manifest: %w", err)
+	}
+
+	now := time.Now().UTC().Unix()
+
+	for _, entry := range entries {
+		vaultID := vaultIDFromEntry(entry)
+		vaultName := entry.Name
+		vaultPath := entry.Path
+
+		indexPath := filepath.Join(vaultPath, "vault_index.db")
+		if _, statErr := os.Stat(indexPath); statErr != nil {
+			report.VaultsSkipped++
+			continue
+		}
+
+		vaultDB, openErr := VaultIndexOpen(vaultPath)
+		if openErr != nil {
+			report.Errors = append(report.Errors, fmt.Sprintf("%s: open index: %v", vaultName, openErr))
+			continue
+		}
+
+		rows, queryErr := vaultDB.Query(
+			`SELECT rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket FROM files`,
+		)
+		if queryErr != nil {
+			vaultDB.Close()
+			report.Errors = append(report.Errors, fmt.Sprintf("%s: query files: %v", vaultName, queryErr))
+			continue
+		}
+
+		type fileRow struct {
+			RelPath   string
+			Size      int64
+			Mtime     int64
+			Sha256    string
+			Mime      string
+			Ext       string
+			Bucket    string
+			SubBucket string
+		}
+		var fileRows []fileRow
+		for rows.Next() {
+			var r fileRow
+			if scanErr := rows.Scan(&r.RelPath, &r.Size, &r.Mtime, &r.Sha256, &r.Mime, &r.Ext, &r.Bucket, &r.SubBucket); scanErr != nil {
+				continue
+			}
+			fileRows = append(fileRows, r)
+		}
+		rows.Close()
+		vaultDB.Close()
+
+		// Atomic replace in registry.db
+		tx, txErr := registryDB.Begin()
+		if txErr != nil {
+			report.Errors = append(report.Errors, fmt.Sprintf("%s: begin tx: %v", vaultName, txErr))
+			continue
+		}
+
+		if _, delErr := tx.Exec(`DELETE FROM vault_files WHERE vault_id = ?`, vaultID); delErr != nil {
+			tx.Rollback()
+			report.Errors = append(report.Errors, fmt.Sprintf("%s: delete: %v", vaultName, delErr))
+			continue
+		}
+
+		stmt, prepErr := tx.Prepare(`
+INSERT INTO vault_files
+    (vault_id, vault_name, rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
+VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`)
+		if prepErr != nil {
+			tx.Rollback()
+			report.Errors = append(report.Errors, fmt.Sprintf("%s: prepare: %v", vaultName, prepErr))
+			continue
+		}
+
+		for _, r := range fileRows {
+			if _, insErr := stmt.Exec(vaultID, vaultName, r.RelPath, r.Size, r.Mtime, r.Sha256, r.Mime, r.Ext, r.Bucket, r.SubBucket, now); insErr != nil {
+				stmt.Close()
+				tx.Rollback()
+				report.Errors = append(report.Errors, fmt.Sprintf("%s: insert %s: %v", vaultName, r.RelPath, insErr))
+				continue
+			}
+		}
+		stmt.Close()
+
+		if commitErr := tx.Commit(); commitErr != nil {
+			report.Errors = append(report.Errors, fmt.Sprintf("%s: commit: %v", vaultName, commitErr))
+			continue
+		}
+
+		report.VaultsProcessed++
+		report.TotalFiles += len(fileRows)
+	}
+
+	return report, nil
+}
+
+// vaultIDFromEntry constructs the canonical vault ID used in registry.db.
+// Pattern: "<vault_name>_<project_id>" — consistent with the vaults table.
+func vaultIDFromEntry(e VaultManifestEntry) string {
+	if e.ProjectID == "" {
+		return e.Name
+	}
+	return e.Name + "_" + e.ProjectID
+}
@@ -0,0 +1,58 @@
+---
+name: vault_aggregate_index
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func VaultAggregateIndex(repoRoot string) (AggregateReport, error)"
+description: "Agrega los índices de todos los vaults del registry en la tabla vault_files de registry.db. Lee cada vault_index.db (via VaultIndexOpen) y reemplaza las filas de forma atómica. Idempotente: re-ejecutar limpia y reescribe sin duplicar."
+tags: [vault, index, aggregate, registry]
+uses_functions:
+  - "vault_manifest_read_go_infra"
+  - "vault_index_open_go_infra"
+  - "sqlite_open_go_infra"
+uses_types:
+  - "vault_file_go_infra"
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports:
+  - "database/sql"
+  - "fmt"
+  - "os"
+  - "path/filepath"
+  - "time"
+tested: true
+tests:
+  - "TestVaultAggregateIndex_NoVaults"
+  - "TestVaultAggregateIndex_VaultWithoutIndex"
+  - "TestVaultAggregateIndex_HappyPath"
+  - "TestVaultAggregateIndex_ReRunReplaces"
+test_file_path: "functions/infra/vault_aggregate_index_test.go"
+file_path: "functions/infra/vault_aggregate_index.go"
+params:
+  - name: repoRoot
+    desc: "Ruta absoluta a la raiz del fn_registry (contiene registry.db y projects/)."
+output: "AggregateReport con VaultsProcessed, VaultsSkipped (sin vault_index.db), TotalFiles y Errors (errores no fatales por vault). Error fatal solo si registry.db no se puede abrir."
+---
+
+## Ejemplo
+
+```go
+report, err := infra.VaultAggregateIndex("/home/lucas/fn_registry")
+if err != nil {
+    log.Fatal(err)
+}
+fmt.Printf("Processed: %d vaults, %d files\n", report.VaultsProcessed, report.TotalFiles)
+for _, e := range report.Errors {
+    fmt.Println("warning:", e)
+}
+```
+
+## Notas
+
+- Requiere que `registry/migrations/012_vault_files.sql` haya sido aplicado (o que el indexer lo aplique al arrancar). La función aplica la migración de forma idempotente ella misma con `CREATE TABLE IF NOT EXISTS`.
+- Por cada vault: `DELETE WHERE vault_id = ?` + batch `INSERT` dentro de una transacción. Re-run siempre produce el mismo resultado.
+- Vaults sin `vault_index.db` se cuentan en `VaultsSkipped` y se omiten sin error.
+- El `vault_id` sigue el patrón `<vault_name>_<project_id>`, consistente con la tabla `vaults` de registry.db.
@@ -0,0 +1,175 @@
+package infra
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+// setupAggregateTestRepo creates a minimal repo layout:
+//
+//	<root>/
+//	  registry.db  (SQLite, empty)
+//	  projects/<project>/vaults/vault.yaml
+//	  <vaultPath>/    (optionally with vault_index.db populated)
+func setupAggregateTestRepo(t *testing.T, vaultName, projectID, vaultPath string, withIndex bool) string {
+	t.Helper()
+	root := t.TempDir()
+
+	// Create registry.db
+	regDB, err := SQLiteOpen(filepath.Join(root, "registry.db"), "")
+	if err != nil {
+		t.Fatalf("create registry.db: %v", err)
+	}
+	regDB.Close()
+
+	// Create project vault manifest
+	projVaultsDir := filepath.Join(root, "projects", projectID, "vaults")
+	if err := os.MkdirAll(projVaultsDir, 0755); err != nil {
+		t.Fatalf("mkdir projects: %v", err)
+	}
+	manifestYAML := "vaults:\n  - name: " + vaultName + "\n    description: test\n    path: " + vaultPath + "\n    tags: []\n"
+	if err := os.WriteFile(filepath.Join(projVaultsDir, "vault.yaml"), []byte(manifestYAML), 0644); err != nil {
+		t.Fatalf("write vault.yaml: %v", err)
+	}
+
+	// Create vault dir
+	if err := os.MkdirAll(vaultPath, 0755); err != nil {
+		t.Fatalf("mkdir vault: %v", err)
+	}
+
+	if withIndex {
+		// Create a vault_index.db with one file row
+		vdb, err := VaultIndexOpen(vaultPath)
+		if err != nil {
+			t.Fatalf("VaultIndexOpen: %v", err)
+		}
+		now := time.Now().UTC().Unix()
+		_, err = vdb.Exec(`INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
+VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+			"data/raw/sample.csv", 1024, now, "deadbeef", "text/csv", ".csv", "data", "raw", now)
+		if err != nil {
+			t.Fatalf("insert test file: %v", err)
+		}
+		vdb.Close()
+	}
+
+	return root
+}
+
+func TestVaultAggregateIndex_NoVaults(t *testing.T) {
+	root := t.TempDir()
+	// No manifests, just registry.db
+	regDB, err := SQLiteOpen(filepath.Join(root, "registry.db"), "")
+	if err != nil {
+		t.Fatalf("create registry.db: %v", err)
+	}
+	regDB.Close()
+
+	report, err := VaultAggregateIndex(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if report.VaultsProcessed != 0 {
+		t.Errorf("VaultsProcessed: want 0, got %d", report.VaultsProcessed)
+	}
+	if len(report.Errors) != 0 {
+		t.Errorf("Errors: want empty, got %v", report.Errors)
+	}
+}
+
+func TestVaultAggregateIndex_VaultWithoutIndex(t *testing.T) {
+	vaultDir := t.TempDir()
+	root := setupAggregateTestRepo(t, "my_vault", "my_proj", vaultDir, false /* no vault_index.db */)
+
+	report, err := VaultAggregateIndex(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if report.VaultsSkipped != 1 {
+		t.Errorf("VaultsSkipped: want 1, got %d", report.VaultsSkipped)
+	}
+	if report.VaultsProcessed != 0 {
+		t.Errorf("VaultsProcessed: want 0, got %d", report.VaultsProcessed)
+	}
+}
+
+func TestVaultAggregateIndex_HappyPath(t *testing.T) {
+	vaultDir := t.TempDir()
+	root := setupAggregateTestRepo(t, "my_vault", "my_proj", vaultDir, true)
+
+	report, err := VaultAggregateIndex(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if report.VaultsProcessed != 1 {
+		t.Errorf("VaultsProcessed: want 1, got %d", report.VaultsProcessed)
+	}
+	if report.TotalFiles != 1 {
+		t.Errorf("TotalFiles: want 1, got %d", report.TotalFiles)
+	}
+
+	// Verify row exists in registry.db
+	regDB, err := SQLiteOpen(filepath.Join(root, "registry.db"), "")
+	if err != nil {
+		t.Fatalf("open registry.db: %v", err)
+	}
+	defer regDB.Close()
+
+	var count int
+	if err := regDB.QueryRow(`SELECT COUNT(*) FROM vault_files`).Scan(&count); err != nil {
+		t.Fatalf("count vault_files: %v", err)
+	}
+	if count != 1 {
+		t.Errorf("vault_files count: want 1, got %d", count)
+	}
+}
+
+func TestVaultAggregateIndex_ReRunReplaces(t *testing.T) {
+	vaultDir := t.TempDir()
+	root := setupAggregateTestRepo(t, "my_vault", "my_proj", vaultDir, true)
+
+	// First run
+	if _, err := VaultAggregateIndex(root); err != nil {
+		t.Fatalf("first run: %v", err)
+	}
+
+	// Add a second file to vault_index.db
+	vdb, err := VaultIndexOpen(vaultDir)
+	if err != nil {
+		t.Fatalf("reopen vault index: %v", err)
+	}
+	now := time.Now().UTC().Unix()
+	_, err = vdb.Exec(`INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
+VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+		"data/raw/extra.csv", 512, now, "cafebabe", "text/csv", ".csv", "data", "raw", now)
+	if err != nil {
+		t.Fatalf("insert second file: %v", err)
+	}
+	vdb.Close()
+
+	// Second run
+	report, err := VaultAggregateIndex(root)
+	if err != nil {
+		t.Fatalf("second run: %v", err)
+	}
+	if report.TotalFiles != 2 {
+		t.Errorf("TotalFiles: want 2, got %d", report.TotalFiles)
+	}
+
+	// Verify no duplicates — exactly 2 rows
+	regDB, err := SQLiteOpen(filepath.Join(root, "registry.db"), "")
+	if err != nil {
+		t.Fatalf("open registry.db: %v", err)
+	}
+	defer regDB.Close()
+
+	var count int
+	if err := regDB.QueryRow(`SELECT COUNT(*) FROM vault_files`).Scan(&count); err != nil {
+		t.Fatalf("count vault_files: %v", err)
+	}
+	if count != 2 {
+		t.Errorf("vault_files count after re-run: want 2, got %d", count)
+	}
+}
@@ -0,0 +1,68 @@
+package infra
+
+import "sort"
+
+// VaultFileChange holds the before/after state of a file whose content changed.
+type VaultFileChange struct {
+	RelPath string
+	Prev    VaultFile
+	Curr    VaultFile
+}
+
+// VaultDiffReport is the result of comparing two VaultFile slices.
+type VaultDiffReport struct {
+	Added     []VaultFile       // in curr but not in prev (by rel_path)
+	Removed   []VaultFile       // in prev but not in curr
+	Changed   []VaultFileChange // same rel_path, different sha256
+	Unchanged int               // files present in both with identical sha256
+}
+
+// VaultDiff computes the difference between two vault snapshots.
+// It indexes both slices by RelPath, then classifies each entry as
+// Added, Removed, Changed, or Unchanged. All output slices are sorted
+// by RelPath ascending. The function is pure and deterministic.
+func VaultDiff(prev, curr []VaultFile) VaultDiffReport {
+	prevMap := make(map[string]VaultFile, len(prev))
+	for _, f := range prev {
+		prevMap[f.RelPath] = f
+	}
+	currMap := make(map[string]VaultFile, len(curr))
+	for _, f := range curr {
+		currMap[f.RelPath] = f
+	}
+
+	var report VaultDiffReport
+
+	for _, f := range curr {
+		p, exists := prevMap[f.RelPath]
+		if !exists {
+			report.Added = append(report.Added, f)
+		} else if p.Sha256 != f.Sha256 {
+			report.Changed = append(report.Changed, VaultFileChange{
+				RelPath: f.RelPath,
+				Prev:    p,
+				Curr:    f,
+			})
+		} else {
+			report.Unchanged++
+		}
+	}
+
+	for _, f := range prev {
+		if _, exists := currMap[f.RelPath]; !exists {
+			report.Removed = append(report.Removed, f)
+		}
+	}
+
+	sort.Slice(report.Added, func(i, j int) bool {
+		return report.Added[i].RelPath < report.Added[j].RelPath
+	})
+	sort.Slice(report.Removed, func(i, j int) bool {
+		return report.Removed[i].RelPath < report.Removed[j].RelPath
+	})
+	sort.Slice(report.Changed, func(i, j int) bool {
+		return report.Changed[i].RelPath < report.Changed[j].RelPath
+	})
+
+	return report
+}
@@ -0,0 +1,49 @@
+---
+name: vault_diff
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: pure
+signature: "func VaultDiff(prev, curr []VaultFile) VaultDiffReport"
+description: "Computes the diff between two vault snapshots (slices of VaultFile). Returns Added, Removed, Changed and Unchanged counts. Pure and deterministic — no I/O."
+tags: [vault, diff, comparison, pure]
+uses_functions: []
+uses_types: ["vault_file_go_infra"]
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["sort"]
+tested: true
+tests:
+  - "TestVaultDiff_NoChanges"
+  - "TestVaultDiff_AllAdded"
+  - "TestVaultDiff_AllRemoved"
+  - "TestVaultDiff_ContentChanged"
+  - "TestVaultDiff_Mixed"
+test_file_path: "functions/infra/vault_diff_test.go"
+file_path: "functions/infra/vault_diff.go"
+params:
+  - name: prev
+    desc: "Snapshot anterior — slice de VaultFile del estado previo del vault (puede ser nil para diff desde cero)."
+  - name: curr
+    desc: "Snapshot actual — slice de VaultFile del estado corriente del vault (puede ser nil para diff de borrado total)."
+output: "VaultDiffReport con Added (nuevos), Removed (eliminados), Changed (mismo rel_path, sha256 distinto) y Unchanged (identicos). Todos los slices ordenados por RelPath ASC."
+---
+
+## Ejemplo
+
+```go
+prev, _ := infra.VaultInventoryScan(oldPath, "my_vault_proj", "my_vault")
+curr, _ := infra.VaultInventoryScan(newPath, "my_vault_proj", "my_vault")
+report := infra.VaultDiff(prev, curr)
+fmt.Printf("Added: %d, Removed: %d, Changed: %d, Unchanged: %d\n",
+    len(report.Added), len(report.Removed), len(report.Changed), report.Unchanged)
+```
+
+## Notas
+
+- Usa `RelPath` como clave de identidad de archivo (no nombre, no sha256).
+- Dos archivos con mismo `RelPath` pero diferente `Sha256` se consideran Changed.
+- Los slices del report se ordenan por `RelPath` ASC para salida deterministica.
+- Función pura: no toca disco ni BD.
@@ -0,0 +1,126 @@
+package infra
+
+import (
+	"testing"
+)
+
+func makeVF(relPath, sha256 string) VaultFile {
+	return VaultFile{
+		VaultID:   "test_vault",
+		VaultName: "test",
+		RelPath:   relPath,
+		Sha256:    sha256,
+	}
+}
+
+func TestVaultDiff_NoChanges(t *testing.T) {
+	files := []VaultFile{
+		makeVF("data/a.csv", "aaa"),
+		makeVF("data/b.csv", "bbb"),
+	}
+	report := VaultDiff(files, files)
+	if len(report.Added) != 0 {
+		t.Errorf("Added: want 0, got %d", len(report.Added))
+	}
+	if len(report.Removed) != 0 {
+		t.Errorf("Removed: want 0, got %d", len(report.Removed))
+	}
+	if len(report.Changed) != 0 {
+		t.Errorf("Changed: want 0, got %d", len(report.Changed))
+	}
+	if report.Unchanged != 2 {
+		t.Errorf("Unchanged: want 2, got %d", report.Unchanged)
+	}
+}
+
+func TestVaultDiff_AllAdded(t *testing.T) {
+	curr := []VaultFile{
+		makeVF("data/a.csv", "aaa"),
+		makeVF("data/b.csv", "bbb"),
+	}
+	report := VaultDiff(nil, curr)
+	if len(report.Added) != 2 {
+		t.Errorf("Added: want 2, got %d", len(report.Added))
+	}
+	if len(report.Removed) != 0 {
+		t.Errorf("Removed: want 0, got %d", len(report.Removed))
+	}
+	if report.Added[0].RelPath != "data/a.csv" {
+		t.Errorf("Added[0]: want data/a.csv, got %s", report.Added[0].RelPath)
+	}
+	if report.Added[1].RelPath != "data/b.csv" {
+		t.Errorf("Added[1]: want data/b.csv, got %s", report.Added[1].RelPath)
+	}
+}
+
+func TestVaultDiff_AllRemoved(t *testing.T) {
+	prev := []VaultFile{
+		makeVF("data/a.csv", "aaa"),
+		makeVF("data/b.csv", "bbb"),
+	}
+	report := VaultDiff(prev, nil)
+	if len(report.Removed) != 2 {
+		t.Errorf("Removed: want 2, got %d", len(report.Removed))
+	}
+	if len(report.Added) != 0 {
+		t.Errorf("Added: want 0, got %d", len(report.Added))
+	}
+	if report.Removed[0].RelPath != "data/a.csv" {
+		t.Errorf("Removed[0]: want data/a.csv, got %s", report.Removed[0].RelPath)
+	}
+}
+
+func TestVaultDiff_ContentChanged(t *testing.T) {
+	prev := []VaultFile{
+		makeVF("data/a.csv", "old_hash"),
+	}
+	curr := []VaultFile{
+		makeVF("data/a.csv", "new_hash"),
+	}
+	report := VaultDiff(prev, curr)
+	if len(report.Changed) != 1 {
+		t.Fatalf("Changed: want 1, got %d", len(report.Changed))
+	}
+	if report.Changed[0].RelPath != "data/a.csv" {
+		t.Errorf("Changed[0].RelPath: want data/a.csv, got %s", report.Changed[0].RelPath)
+	}
+	if report.Changed[0].Prev.Sha256 != "old_hash" {
+		t.Errorf("Changed[0].Prev.Sha256: want old_hash, got %s", report.Changed[0].Prev.Sha256)
+	}
+	if report.Changed[0].Curr.Sha256 != "new_hash" {
+		t.Errorf("Changed[0].Curr.Sha256: want new_hash, got %s", report.Changed[0].Curr.Sha256)
+	}
+	if len(report.Added) != 0 || len(report.Removed) != 0 {
+		t.Errorf("Expected no added/removed, got %d/%d", len(report.Added), len(report.Removed))
+	}
+	if report.Unchanged != 0 {
+		t.Errorf("Unchanged: want 0, got %d", report.Unchanged)
+	}
+}
+
+func TestVaultDiff_Mixed(t *testing.T) {
+	prev := []VaultFile{
+		makeVF("data/a.csv", "aaa"),
+		makeVF("data/b.csv", "bbb"),
+		makeVF("data/c.csv", "ccc"),
+	}
+	curr := []VaultFile{
+		makeVF("data/a.csv", "aaa"),     // unchanged
+		makeVF("data/b.csv", "bbb_new"), // changed
+		makeVF("data/d.csv", "ddd"),     // added
+	}
+	report := VaultDiff(prev, curr)
+
+	if len(report.Added) != 1 || report.Added[0].RelPath != "data/d.csv" {
+		t.Errorf("Added: want [data/d.csv], got %v", report.Added)
+	}
+	if len(report.Removed) != 1 || report.Removed[0].RelPath != "data/c.csv" {
+		t.Errorf("Removed: want [data/c.csv], got %v", report.Removed)
+	}
+	if len(report.Changed) != 1 || report.Changed[0].RelPath != "data/b.csv" {
+		t.Errorf("Changed: want [data/b.csv], got %v", report.Changed)
+	}
+	if report.Unchanged != 1 {
+		t.Errorf("Unchanged: want 1, got %d", report.Unchanged)
+	}
+}
@@ -0,0 +1,230 @@
+package infra
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// VaultDoctorEntry holds the health report for a single vault.
+type VaultDoctorEntry struct {
+	VaultName     string   `json:"vault_name"`
+	VaultPath     string   `json:"vault_path"`
+	ProjectID     string   `json:"project_id"`
+	Issues        []string `json:"issues"`         // human-readable issues; empty = healthy
+	IndexedFiles  int      `json:"indexed_files"`  // 0 if no vault_index.db
+	LastIndexedAt int64    `json:"last_indexed_at"` // unix seconds; 0 if N/A
+	DiskFiles     int      `json:"disk_files"`     // count via WalkDir (no hashing)
+	Status        string   `json:"status"`         // "ok" | "warning" | "error"
+}
+
+// VaultDoctor audits every vault declared in projects/*/vaults/vault.yaml under
+// repoRoot. For each vault it performs a series of checks (disk presence, layout,
+// index existence, staleness, drift) and returns a slice of VaultDoctorEntry.
+//
+// The function is read-only: it never writes to disk or any database.
+// Returns an error only if VaultManifestRead fails (manifest parse error).
+func VaultDoctor(repoRoot string) ([]VaultDoctorEntry, error) {
+	entries, err := VaultManifestRead(repoRoot)
+	if err != nil {
+		return nil, fmt.Errorf("vault_doctor: read manifests: %w", err)
+	}
+
+	results := make([]VaultDoctorEntry, 0, len(entries))
+	for _, e := range entries {
+		result := auditVault(e)
+		results = append(results, result)
+	}
+	return results, nil
+}
+
+func auditVault(e VaultManifestEntry) VaultDoctorEntry {
+	entry := VaultDoctorEntry{
+		VaultName: e.Name,
+		VaultPath: e.Path,
+		ProjectID: e.ProjectID,
+	}
+
+	// Resolve symlinks for disk checks
+	realPath, err := filepath.EvalSymlinks(e.Path)
+	if err != nil || realPath == "" {
+		realPath = e.Path
+	}
+
+	// CHECK 1: directory_missing
+	info, statErr := os.Stat(realPath)
+	if statErr != nil || !info.IsDir() {
+		entry.Issues = append(entry.Issues, "directory_missing")
+		entry.Status = "error"
+		return entry
+	}
+
+	// COUNT disk files (cheap walk — no hashing, no mime detection)
+	diskCount := countDiskFiles(realPath)
+	entry.DiskFiles = diskCount
+
+	// CHECK 2: layout_missing / non_standard_layout
+	hasData := dirExists(filepath.Join(realPath, "data"))
+	hasKnowledge := dirExists(filepath.Join(realPath, "knowledge"))
+	if !hasData && !hasKnowledge {
+		// Check if it looks like a non-standard but intentional layout
+		if hasNonStandardLayout(realPath) {
+			entry.Issues = append(entry.Issues, "non_standard_layout")
+		} else {
+			entry.Issues = append(entry.Issues, "layout_missing")
+		}
+	}
+
+	// CHECK 3: index_missing
+	indexPath := filepath.Join(realPath, "vault_index.db")
+	_, indexStatErr := os.Stat(indexPath)
+	if indexStatErr != nil {
+		entry.Issues = append(entry.Issues, "index_missing")
+		entry.setWarningStatus()
+		entry.setFinalStatus()
+		return entry
+	}
+
+	// Open vault index (read-only) for checks 4 and 5
+	vdb, openErr := VaultIndexOpen(realPath)
+	if openErr != nil {
+		entry.Issues = append(entry.Issues, fmt.Sprintf("index_open_error: %v", openErr))
+		entry.setWarningStatus()
+		return entry
+	}
+	defer vdb.Close()
+
+	// Query indexed file count and max indexed_at
+	var indexedCount int
+	var maxIndexedAt int64
+	row := vdb.QueryRow(`SELECT COUNT(*), COALESCE(MAX(indexed_at), 0) FROM files`)
+	if scanErr := row.Scan(&indexedCount, &maxIndexedAt); scanErr != nil {
+		entry.Issues = append(entry.Issues, fmt.Sprintf("index_query_error: %v", scanErr))
+	} else {
+		entry.IndexedFiles = indexedCount
+		entry.LastIndexedAt = maxIndexedAt
+	}
+
+	// CHECK 4: index_stale — any file on disk newer than MAX(indexed_at)
+	if maxIndexedAt > 0 {
+		maxTime := time.Unix(maxIndexedAt, 0)
+		if isIndexStale(realPath, maxTime) {
+			entry.Issues = append(entry.Issues, "index_stale")
+		}
+	}
+
+	// CHECK 5: index_drift — disk file count != indexed count
+	if indexedCount != diskCount {
+		entry.Issues = append(entry.Issues, fmt.Sprintf("index_drift: disk=%d indexed=%d", diskCount, indexedCount))
+	}
+
+	// CHECK 6: empty_vault
+	if diskCount == 0 {
+		entry.Issues = append(entry.Issues, "empty_vault")
+	}
+
+	entry.setFinalStatus()
+	return entry
+}
+
+// setWarningStatus sets status to warning if not already error.
+func (e *VaultDoctorEntry) setWarningStatus() {
+	if e.Status != "error" {
+		e.Status = "warning"
+	}
+}
+
+// setFinalStatus derives the final Status from Issues.
+func (e *VaultDoctorEntry) setFinalStatus() {
+	if e.Status == "error" {
+		return
+	}
+	if len(e.Issues) == 0 {
+		e.Status = "ok"
+	} else {
+		e.Status = "warning"
+	}
+}
+
+// countDiskFiles walks realPath and counts regular files, excluding:
+// vault_index.db*, .git/, hidden files/dirs at any depth.
+func countDiskFiles(realPath string) int {
+	count := 0
+	_ = filepath.WalkDir(realPath, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			return nil
+		}
+		name := d.Name()
+		// Skip hidden entries
+		if strings.HasPrefix(name, ".") {
+			if d.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		// Skip .git
+		if d.IsDir() && name == ".git" {
+			return filepath.SkipDir
+		}
+		// Skip vault_index.db files
+		if !d.IsDir() && (name == "vault_index.db" || name == "vault_index.db-shm" || name == "vault_index.db-wal") {
+			return nil
+		}
+		if !d.IsDir() {
+			count++
+		}
+		return nil
+	})
+	return count
+}
+
+// isIndexStale returns true if any regular file under realPath has an mtime
+// strictly after maxTime (excluding vault_index.db* and hidden files).
+func isIndexStale(realPath string, maxTime time.Time) bool {
+	stale := false
+	_ = filepath.WalkDir(realPath, func(path string, d os.DirEntry, err error) error {
+		if err != nil || stale {
+			return nil
+		}
+		name := d.Name()
+		if strings.HasPrefix(name, ".") {
+			if d.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		if d.IsDir() && name == ".git" {
+			return filepath.SkipDir
+		}
+		if !d.IsDir() {
+			if name == "vault_index.db" || name == "vault_index.db-shm" || name == "vault_index.db-wal" {
+				return nil
+			}
+			fi, statErr := d.Info()
+			if statErr == nil && fi.ModTime().After(maxTime) {
+				stale = true
+			}
+		}
+		return nil
+	})
+	return stale
+}
+
+// hasNonStandardLayout returns true when a vault directory contains
+// subdirectories that are clearly intentional but not data/knowledge.
+// Heuristic: any subdir at the vault root that is not data/knowledge.
+func hasNonStandardLayout(realPath string) bool {
+	entries, err := os.ReadDir(realPath)
+	if err != nil {
+		return false
+	}
+	standardDirs := map[string]bool{"data": true, "knowledge": true, ".git": true}
+	for _, e := range entries {
+		if e.IsDir() && !standardDirs[e.Name()] && !strings.HasPrefix(e.Name(), ".") {
+			return true
+		}
+	}
+	return false
+}
@@ -0,0 +1,66 @@
+---
+name: vault_doctor
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func VaultDoctor(repoRoot string) ([]VaultDoctorEntry, error)"
+description: "Audita la salud de todos los vaults declarados en projects/*/vaults/vault.yaml. Comprueba existencia del directorio, layout estándar, presencia del índice, staleness y drift entre disco e índice. Read-only."
+tags: [vault, doctor, health, audit]
+uses_functions:
+  - "vault_manifest_read_go_infra"
+  - "vault_index_open_go_infra"
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports:
+  - "fmt"
+  - "os"
+  - "path/filepath"
+  - "strings"
+  - "time"
+tested: true
+tests:
+  - "TestVaultDoctor_OK"
+  - "TestVaultDoctor_MissingDir"
+  - "TestVaultDoctor_NoIndex"
+  - "TestVaultDoctor_LayoutDrift"
+  - "TestVaultDoctor_EmptyVault"
+test_file_path: "functions/infra/vault_doctor_test.go"
+file_path: "functions/infra/vault_doctor.go"
+params:
+  - name: repoRoot
+    desc: "Ruta absoluta a la raiz del fn_registry (donde están projects/ y registry.db)."
+output: "Slice de VaultDoctorEntry con Status (ok/warning/error), Issues, DiskFiles, IndexedFiles y LastIndexedAt por vault. Error fatal solo si los manifests no se pueden leer."
+---
+
+## Checks aplicados
+
+| Check | Condición | Severidad |
+|---|---|---|
+| `directory_missing` | `e.Path` no existe en disco | error |
+| `layout_missing` | no hay `data/` ni `knowledge/` en la raíz del vault | warning |
+| `non_standard_layout` | no hay `data/`/`knowledge/` pero sí otros subdirectorios (ej. imagegen_models) | warning |
+| `index_missing` | no existe `vault_index.db` | warning |
+| `index_stale` | algún archivo en disco tiene mtime > MAX(indexed_at) | warning |
+| `index_drift` | count disco != count en tabla `files` | warning |
+| `empty_vault` | DiskFiles == 0 | warning |
+
+## Ejemplo
+
+```go
+entries, err := infra.VaultDoctor("/home/lucas/fn_registry")
+for _, e := range entries {
+    fmt.Printf("%-30s  %-8s  files=%d  issues=%v\n",
+        e.VaultName, e.Status, e.DiskFiles, e.Issues)
+}
+```
+
+## Notas
+
+- Función read-only: nunca escribe en disco ni en ninguna base de datos.
+- `countDiskFiles` usa `filepath.WalkDir` sin hash (cheap) — excluye `vault_index.db*`, `.git/` y ficheros ocultos.
+- `isIndexStale` también usa WalkDir; compara mtime de archivos con MAX(indexed_at) de la BD.
+- El VaultIndexOpen de sólo lectura no crea el DB (si no existe, retorna error y se reporta `index_missing`).
@@ -0,0 +1,211 @@
+package infra
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+// setupDoctorRepo creates a repo layout with one vault in a project manifest.
+// vaultPath must be an absolute path that already exists (or not, for missing tests).
+func setupDoctorRepo(t *testing.T, vaultName, projectID, vaultPath string) string {
+	t.Helper()
+	root := t.TempDir()
+	projVaultsDir := filepath.Join(root, "projects", projectID, "vaults")
+	if err := os.MkdirAll(projVaultsDir, 0755); err != nil {
+		t.Fatalf("mkdir projects: %v", err)
+	}
+	manifest := "vaults:\n  - name: " + vaultName + "\n    description: test vault\n    path: " + vaultPath + "\n    tags: []\n"
+	if err := os.WriteFile(filepath.Join(projVaultsDir, "vault.yaml"), []byte(manifest), 0644); err != nil {
+		t.Fatalf("write vault.yaml: %v", err)
+	}
+	return root
+}
+
+func TestVaultDoctor_OK(t *testing.T) {
+	vaultDir := t.TempDir()
+
+	// Proper layout
+	if err := os.MkdirAll(filepath.Join(vaultDir, "data", "raw"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(filepath.Join(vaultDir, "knowledge"), 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a file with a past mtime so the index is not stale
+	samplePath := filepath.Join(vaultDir, "data", "raw", "sample.csv")
+	if err := os.WriteFile(samplePath, []byte("a,b\n1,2\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	pastTime := time.Now().Add(-1 * time.Hour)
+	if err := os.Chtimes(samplePath, pastTime, pastTime); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create vault_index.db with the file indexed after its mtime
+	vdb, err := VaultIndexOpen(vaultDir)
+	if err != nil {
+		t.Fatalf("VaultIndexOpen: %v", err)
+	}
+	futureIndexed := time.Now().Unix() // indexed_at is now — after file mtime
+	_, err = vdb.Exec(`INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
+VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+		"data/raw/sample.csv", 8, pastTime.Unix(), "deadbeef", "text/csv", ".csv", "data", "raw", futureIndexed)
+	if err != nil {
+		t.Fatalf("insert: %v", err)
+	}
+	vdb.Close()
+
+	root := setupDoctorRepo(t, "my_vault", "my_proj", vaultDir)
+	entries, err := VaultDoctor(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(entries) != 1 {
+		t.Fatalf("expected 1 entry, got %d", len(entries))
+	}
+	e := entries[0]
+	if e.Status != "ok" {
+		t.Errorf("Status: want ok, got %s (issues: %v)", e.Status, e.Issues)
+	}
+	if len(e.Issues) != 0 {
+		t.Errorf("Issues: want empty, got %v", e.Issues)
+	}
+	if e.DiskFiles != 1 {
+		t.Errorf("DiskFiles: want 1, got %d", e.DiskFiles)
+	}
+	if e.IndexedFiles != 1 {
+		t.Errorf("IndexedFiles: want 1, got %d", e.IndexedFiles)
+	}
+}
+
+func TestVaultDoctor_MissingDir(t *testing.T) {
+	missingPath := filepath.Join(t.TempDir(), "does_not_exist")
+	root := setupDoctorRepo(t, "missing_vault", "my_proj", missingPath)
+
+	entries, err := VaultDoctor(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(entries) != 1 {
+		t.Fatalf("expected 1 entry, got %d", len(entries))
+	}
+	e := entries[0]
+	if e.Status != "error" {
+		t.Errorf("Status: want error, got %s", e.Status)
+	}
+	found := false
+	for _, issue := range e.Issues {
+		if issue == "directory_missing" {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("Expected directory_missing issue, got %v", e.Issues)
+	}
+}
+
+func TestVaultDoctor_NoIndex(t *testing.T) {
+	vaultDir := t.TempDir()
+	// Proper layout but no vault_index.db
+	if err := os.MkdirAll(filepath.Join(vaultDir, "data", "raw"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(vaultDir, "data", "raw", "a.csv"), []byte("x"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	root := setupDoctorRepo(t, "no_index_vault", "my_proj", vaultDir)
+	entries, err := VaultDoctor(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(entries) != 1 {
+		t.Fatalf("expected 1 entry, got %d", len(entries))
+	}
+	e := entries[0]
+	if e.Status != "warning" {
+		t.Errorf("Status: want warning, got %s", e.Status)
+	}
+	found := false
+	for _, issue := range e.Issues {
+		if issue == "index_missing" {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("Expected index_missing issue, got %v", e.Issues)
+	}
+}
+
+func TestVaultDoctor_LayoutDrift(t *testing.T) {
+	vaultDir := t.TempDir()
+	// No data/ or knowledge/ — just a random file at root
+	if err := os.WriteFile(filepath.Join(vaultDir, "something.txt"), []byte("hi"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	root := setupDoctorRepo(t, "layout_vault", "my_proj", vaultDir)
+	entries, err := VaultDoctor(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(entries) != 1 {
+		t.Fatalf("expected 1 entry, got %d", len(entries))
+	}
+	e := entries[0]
+	if e.Status != "warning" {
+		t.Errorf("Status: want warning, got %s", e.Status)
+	}
+	foundLayout := false
+	for _, issue := range e.Issues {
+		if issue == "layout_missing" || issue == "non_standard_layout" {
+			foundLayout = true
+		}
+	}
+	if !foundLayout {
+		t.Errorf("Expected layout_missing or non_standard_layout, got %v", e.Issues)
+	}
+}
+
+func TestVaultDoctor_EmptyVault(t *testing.T) {
+	vaultDir := t.TempDir()
+	// data/ and knowledge/ exist but are empty
+	if err := os.MkdirAll(filepath.Join(vaultDir, "data"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(filepath.Join(vaultDir, "knowledge"), 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create vault_index.db (empty)
+	vdb, err := VaultIndexOpen(vaultDir)
+	if err != nil {
+		t.Fatalf("VaultIndexOpen: %v", err)
+	}
+	vdb.Close()
+
+	root := setupDoctorRepo(t, "empty_vault", "my_proj", vaultDir)
+	entries, err := VaultDoctor(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(entries) != 1 {
+		t.Fatalf("expected 1 entry, got %d", len(entries))
+	}
+	e := entries[0]
+	if e.Status != "warning" {
+		t.Errorf("Status: want warning, got %s (issues: %v)", e.Status, e.Issues)
+	}
+	found := false
+	for _, issue := range e.Issues {
+		if issue == "empty_vault" {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("Expected empty_vault issue, got %v", e.Issues)
+	}
+}
@@ -0,0 +1,21 @@
+package infra
+
+// VaultFile describes a single file inside a vault directory.
+// It carries identity (vault + relative path), content metadata (size, mtime, sha256, mime)
+// and structural classification (bucket, sub-bucket).
+type VaultFile struct {
+	VaultID   string `json:"vault_id"`   // e.g. "turismo_spain_app_turismo"
+	VaultName string `json:"vault_name"` // e.g. "turismo_spain"
+	RelPath   string `json:"rel_path"`   // path relative to vault root, e.g. "data/raw/foo.csv"
+	Size      int64  `json:"size"`       // bytes
+	Mtime     int64  `json:"mtime"`      // unix seconds (UTC)
+	Sha256    string `json:"sha256"`     // hex lowercase
+	Mime      string `json:"mime"`       // e.g. "text/csv"
+	Ext       string `json:"ext"`        // e.g. ".csv"
+	// Bucket is the top-level classification: "data" or "knowledge".
+	Bucket string `json:"bucket"`
+	// SubBucket is the second-level directory within the bucket.
+	// Known values: raw, processed, exports (data); decisions, domains, models,
+	// benchmarks, test_documents (knowledge). Empty string for files at bucket root.
+	SubBucket string `json:"sub_bucket"`
+}
@@ -0,0 +1,49 @@
+CREATE TABLE IF NOT EXISTS files (
+    rel_path    TEXT PRIMARY KEY,
+    size        INTEGER NOT NULL,
+    mtime       INTEGER NOT NULL,
+    sha256      TEXT NOT NULL,
+    mime        TEXT NOT NULL DEFAULT '',
+    ext         TEXT NOT NULL DEFAULT '',
+    bucket      TEXT NOT NULL DEFAULT '',
+    sub_bucket  TEXT NOT NULL DEFAULT '',
+    indexed_at  INTEGER NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_files_sha256 ON files(sha256);
+CREATE INDEX IF NOT EXISTS idx_files_bucket ON files(bucket, sub_bucket);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS files_fts USING fts5(
+    rel_path,
+    content_text,
+    content='',
+    tokenize='unicode61 remove_diacritics 2'
+);
+
+CREATE TABLE IF NOT EXISTS csv_profiles (
+    rel_path    TEXT PRIMARY KEY,
+    cols_json   TEXT NOT NULL,
+    n_rows      INTEGER NOT NULL,
+    encoding    TEXT NOT NULL DEFAULT '',
+    date_min    TEXT,
+    date_max    TEXT,
+    profiled_at INTEGER NOT NULL,
+    FOREIGN KEY (rel_path) REFERENCES files(rel_path) ON DELETE CASCADE
+);
+
+CREATE TABLE IF NOT EXISTS pdf_extracts (
+    rel_path     TEXT PRIMARY KEY,
+    page_count   INTEGER NOT NULL,
+    text_len     INTEGER NOT NULL,
+    extracted_to TEXT,
+    extracted_at INTEGER NOT NULL,
+    FOREIGN KEY (rel_path) REFERENCES files(rel_path) ON DELETE CASCADE
+);
+
+CREATE TABLE IF NOT EXISTS knowledge_docs (
+    rel_path         TEXT PRIMARY KEY,
+    title            TEXT NOT NULL DEFAULT '',
+    frontmatter_json TEXT NOT NULL DEFAULT '{}',
+    headings_json    TEXT NOT NULL DEFAULT '[]',
+    parsed_at        INTEGER NOT NULL,
+    FOREIGN KEY (rel_path) REFERENCES files(rel_path) ON DELETE CASCADE
+);
@@ -0,0 +1,30 @@
+package infra
+
+import (
+	"database/sql"
+	"embed"
+	"fmt"
+	"path/filepath"
+)
+
+//go:embed vault_index_migrations/*.sql
+var vaultIndexMigrationsFS embed.FS
+
+// VaultIndexOpen opens (or creates) the vault_index.db inside vaultPath.
+// It applies all embedded migrations idempotently and returns a ready-to-use
+// *sql.DB. The caller is responsible for closing the connection.
+//
+// The database is opened with WAL mode and foreign keys enabled via SQLiteOpen.
+// Migrations are applied from vault_index_migrations/*.sql in lexicographic order.
+func VaultIndexOpen(vaultPath string) (*sql.DB, error) {
+	dbPath := filepath.Join(vaultPath, "vault_index.db")
+	db, err := SQLiteOpen(dbPath, "")
+	if err != nil {
+		return nil, fmt.Errorf("vault_index_open: %w", err)
+	}
+	if err := ApplyMigrations(db, vaultIndexMigrationsFS, "vault_index_migrations/*.sql"); err != nil {
+		db.Close()
+		return nil, fmt.Errorf("vault_index_open: apply migrations: %w", err)
+	}
+	return db, nil
+}
@@ -0,0 +1,54 @@
+---
+name: vault_index_open
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func VaultIndexOpen(vaultPath string) (*sql.DB, error)"
+description: "Abre (o crea) vault_index.db dentro de vaultPath con WAL + FK y aplica las migraciones embebidas idempotentemente. El caller cierra la conexion."
+tags: [vault, sqlite, index, migration, infra]
+uses_functions: ["sqlite_open_go_infra", "sqlite_apply_migrations_go_infra"]
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [database/sql, embed, fmt, path/filepath]
+params:
+  - name: vaultPath
+    desc: "ruta absoluta o relativa al directorio raiz del vault"
+output: "*sql.DB apuntando a <vaultPath>/vault_index.db con schema completo aplicado; el caller es responsable de cerrar"
+tested: true
+tests:
+  - "crea vault_index.db en tmpdir vacio"
+  - "segunda apertura no falla (idempotente)"
+  - "todas las tablas esperadas existen en sqlite_master"
+  - "fts5 INSERT y MATCH funcionan"
+test_file_path: "functions/infra/vault_index_open_test.go"
+file_path: "functions/infra/vault_index_open.go"
+---
+
+## Ejemplo
+
+```go
+db, err := VaultIndexOpen("/data/vaults/turismo_spain")
+if err != nil {
+    log.Fatal(err)
+}
+defer db.Close()
+```
+
+## Notas
+
+El archivo de base de datos se crea en `<vaultPath>/vault_index.db`. Las migraciones
+viven en `vault_index_migrations/*.sql` embebidas via `//go:embed` en el mismo paquete.
+
+Schema creado por `001_init.sql`:
+- `files` — inventario de archivos (PK: rel_path)
+- `files_fts` — tabla FTS5 virtual para busqueda de texto (content_text lo llenan profilers posteriores)
+- `csv_profiles` — perfil de columnas/filas para .csv (FK → files)
+- `pdf_extracts` — metadatos de extraccion de texto para .pdf (FK → files)
+- `knowledge_docs` — headings/frontmatter para .md del bucket knowledge (FK → files)
+
+`SQLiteOpen` abre con WAL mode + foreign keys. `ApplyMigrations` es idempotente:
+los errores de "already exists" y "duplicate column" se ignoran silenciosamente.
@@ -0,0 +1,107 @@
+package infra
+
+import (
+	"database/sql"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestVaultIndexOpen_CreatesDB(t *testing.T) {
+	t.Run("crea vault_index.db en tmpdir vacio", func(t *testing.T) {
+		dir := t.TempDir()
+		db, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatalf("VaultIndexOpen: %v", err)
+		}
+		defer db.Close()
+
+		dbPath := filepath.Join(dir, "vault_index.db")
+		if _, err := os.Stat(dbPath); os.IsNotExist(err) {
+			t.Fatalf("vault_index.db no fue creado en %s", dir)
+		}
+	})
+}
+
+func TestVaultIndexOpen_Idempotent(t *testing.T) {
+	t.Run("segunda apertura no falla (idempotente)", func(t *testing.T) {
+		dir := t.TempDir()
+
+		db1, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatalf("primera apertura: %v", err)
+		}
+		db1.Close()
+
+		db2, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatalf("segunda apertura: %v", err)
+		}
+		db2.Close()
+	})
+}
+
+func TestVaultIndexOpen_AppliesAllMigrations(t *testing.T) {
+	t.Run("todas las tablas esperadas existen en sqlite_master", func(t *testing.T) {
+		dir := t.TempDir()
+		db, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatalf("VaultIndexOpen: %v", err)
+		}
+		defer db.Close()
+
+		expectedTables := []string{
+			"files",
+			"csv_profiles",
+			"pdf_extracts",
+			"knowledge_docs",
+		}
+		for _, tbl := range expectedTables {
+			assertTableExists(t, db, tbl)
+		}
+	})
+}
+
+func TestVaultIndexOpen_FTS5Works(t *testing.T) {
+	t.Run("fts5 INSERT y MATCH funcionan", func(t *testing.T) {
+		dir := t.TempDir()
+		db, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatalf("VaultIndexOpen: %v", err)
+		}
+		defer db.Close()
+
+		// Insert a row into files_fts (content='' table, manual INSERT required)
+		_, err = db.Exec(`INSERT INTO files_fts(rel_path, content_text) VALUES (?, ?)`,
+			"data/raw/informe_ventas.csv", "ventas trimestrales empresa")
+		if err != nil {
+			t.Fatalf("INSERT files_fts: %v", err)
+		}
+
+		var count int
+		err = db.QueryRow(
+			`SELECT count(*) FROM files_fts WHERE files_fts MATCH 'ventas'`,
+		).Scan(&count)
+		if err != nil {
+			t.Fatalf("FTS MATCH query: %v", err)
+		}
+		if count != 1 {
+			t.Errorf("FTS MATCH: got %d rows, want 1", count)
+		}
+	})
+}
+
+// assertTableExists verifies that a table (or virtual table) exists in sqlite_master.
+func assertTableExists(t *testing.T, db *sql.DB, name string) {
+	t.Helper()
+	var exists int
+	err := db.QueryRow(
+		`SELECT count(*) FROM sqlite_master WHERE name = ?`, name,
+	).Scan(&exists)
+	if err != nil {
+		t.Fatalf("sqlite_master query for %q: %v", name, err)
+	}
+	if exists == 0 {
+		t.Errorf("table/vtable %q not found in sqlite_master", name)
+	}
+}
@@ -0,0 +1,154 @@
+package infra
+
+import (
+	"database/sql"
+	"fmt"
+	"strings"
+	"time"
+)
+
+// WriteReport summarises the outcome of a VaultIndexWrite call.
+type WriteReport struct {
+	Inserted int // rows newly inserted into files
+	Updated  int // rows updated (upserted) in files
+	Pruned   int // rows deleted from files (only when prune=true)
+	FTS      int // rows inserted into files_fts
+}
+
+// VaultIndexWrite upserts a slice of VaultFile into the vault_index.db opened
+// as db, updates the files_fts FTS5 table, and optionally prunes stale rows.
+//
+// All changes run inside a single transaction.
+//
+// Counting strategy: the set of rel_paths already in the DB is read before the
+// loop. An upsert is counted as Inserted if the rel_path was absent, Updated if
+// it was present. This avoids N+1 queries while remaining correct.
+//
+// FTS5: all affected rows are deleted and re-inserted with rel_path and empty
+// content_text. Downstream profilers (csv_profiles, pdf_extracts, knowledge_docs)
+// are responsible for populating content_text with meaningful text.
+//
+// Prune: if prune=true, every row in files whose rel_path is NOT in the provided
+// slice is deleted. Cascades to csv_profiles, pdf_extracts, knowledge_docs via FK.
+func VaultIndexWrite(db *sql.DB, files []VaultFile, prune bool) (WriteReport, error) {
+	var report WriteReport
+	if len(files) == 0 && !prune {
+		return report, nil
+	}
+
+	tx, err := db.Begin()
+	if err != nil {
+		return report, fmt.Errorf("vault_index_write: begin tx: %w", err)
+	}
+	defer func() {
+		if err != nil {
+			tx.Rollback() //nolint:errcheck
+		}
+	}()
+
+	// Load existing rel_paths into a set to distinguish insert vs update.
+	existing := make(map[string]struct{})
+	rows, err := tx.Query(`SELECT rel_path FROM files`)
+	if err != nil {
+		return report, fmt.Errorf("vault_index_write: query existing: %w", err)
+	}
+	for rows.Next() {
+		var rp string
+		if err := rows.Scan(&rp); err != nil {
+			rows.Close()
+			return report, fmt.Errorf("vault_index_write: scan existing: %w", err)
+		}
+		existing[rp] = struct{}{}
+	}
+	rows.Close()
+	if err := rows.Err(); err != nil {
+		return report, fmt.Errorf("vault_index_write: rows err: %w", err)
+	}
+
+	now := time.Now().Unix()
+
+	upsertStmt, err := tx.Prepare(`
+		INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+		ON CONFLICT(rel_path) DO UPDATE SET
+			size       = excluded.size,
+			mtime      = excluded.mtime,
+			sha256     = excluded.sha256,
+			mime       = excluded.mime,
+			ext        = excluded.ext,
+			bucket     = excluded.bucket,
+			sub_bucket = excluded.sub_bucket,
+			indexed_at = excluded.indexed_at
+	`)
+	if err != nil {
+		return report, fmt.Errorf("vault_index_write: prepare upsert: %w", err)
+	}
+	defer upsertStmt.Close()
+
+	ftsDeleteStmt, err := tx.Prepare(`DELETE FROM files_fts WHERE rel_path = ?`)
+	if err != nil {
+		return report, fmt.Errorf("vault_index_write: prepare fts delete: %w", err)
+	}
+	defer ftsDeleteStmt.Close()
+
+	ftsInsertStmt, err := tx.Prepare(`INSERT INTO files_fts(rel_path, content_text) VALUES (?, '')`)
+	if err != nil {
+		return report, fmt.Errorf("vault_index_write: prepare fts insert: %w", err)
+	}
+	defer ftsInsertStmt.Close()
+
+	for _, f := range files {
+		_, err = upsertStmt.Exec(
+			f.RelPath, f.Size, f.Mtime, f.Sha256,
+			f.Mime, f.Ext, f.Bucket, f.SubBucket, now,
+		)
+		if err != nil {
+			return report, fmt.Errorf("vault_index_write: upsert %q: %w", f.RelPath, err)
+		}
+
+		if _, wasExisting := existing[f.RelPath]; wasExisting {
+			report.Updated++
+		} else {
+			report.Inserted++
+		}
+
+		// Refresh FTS row.
+		if _, err = ftsDeleteStmt.Exec(f.RelPath); err != nil {
+			return report, fmt.Errorf("vault_index_write: fts delete %q: %w", f.RelPath, err)
+		}
+		if _, err = ftsInsertStmt.Exec(f.RelPath); err != nil {
+			return report, fmt.Errorf("vault_index_write: fts insert %q: %w", f.RelPath, err)
+		}
+		report.FTS++
+	}
+
+	// Prune rows not present in the incoming slice.
+	if prune && len(files) > 0 {
+		keep := make([]string, len(files))
+		for i, f := range files {
+			keep[i] = "'" + strings.ReplaceAll(f.RelPath, "'", "''") + "'"
+		}
+		inClause := strings.Join(keep, ",")
+		res, err := tx.Exec(fmt.Sprintf(
+			`DELETE FROM files WHERE rel_path NOT IN (%s)`, inClause,
+		))
+		if err != nil {
+			return report, fmt.Errorf("vault_index_write: prune: %w", err)
+		}
+		n, _ := res.RowsAffected()
+		report.Pruned = int(n)
+	} else if prune && len(files) == 0 {
+		// prune=true with empty slice means delete everything.
+		res, err := tx.Exec(`DELETE FROM files`)
+		if err != nil {
+			return report, fmt.Errorf("vault_index_write: prune all: %w", err)
+		}
+		n, _ := res.RowsAffected()
+		report.Pruned = int(n)
+	}
+
+	if err = tx.Commit(); err != nil {
+		return report, fmt.Errorf("vault_index_write: commit: %w", err)
+	}
+	return report, nil
+}
@@ -0,0 +1,84 @@
+---
+name: vault_index_write
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func VaultIndexWrite(db *sql.DB, files []VaultFile, prune bool) (WriteReport, error)"
+description: "Upserta un slice de VaultFile en vault_index.db (tabla files + FTS5 files_fts) dentro de una sola transaccion. Cuenta Inserted/Updated/FTS. Con prune=true elimina filas no presentes en el slice."
+tags: [vault, sqlite, index, write, upsert, fts, infra]
+uses_functions: []
+uses_types: ["vault_file_go_infra"]
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [database/sql, fmt, strings, time]
+params:
+  - name: db
+    desc: "*sql.DB abierto sobre vault_index.db (tipicamente retornado por VaultIndexOpen)"
+  - name: files
+    desc: "slice de VaultFile a insertar/actualizar; puede ser vacio"
+  - name: prune
+    desc: "si true, elimina de 'files' todas las filas cuyo rel_path no este en el slice (sincronizacion destructiva)"
+output: "WriteReport con conteos Inserted/Updated/Pruned/FTS; error si falla la transaccion"
+tested: true
+tests:
+  - "N archivos nuevos — Inserted=N"
+  - "re-escritura con mtime distinto — Updated=N"
+  - "prune elimina filas ausentes"
+  - "sin prune, filas previas persisten"
+  - "FTS5 MATCH funciona tras escritura"
+test_file_path: "functions/infra/vault_index_write_test.go"
+file_path: "functions/infra/vault_index_write.go"
+---
+
+## Ejemplo
+
+```go
+db, _ := VaultIndexOpen("/data/vaults/turismo")
+defer db.Close()
+
+files, _ := VaultInventoryScan("/data/vaults/turismo", "turismo_v1", "turismo")
+report, err := VaultIndexWrite(db, files, true)
+if err != nil {
+    log.Fatal(err)
+}
+fmt.Printf("inserted=%d updated=%d pruned=%d fts=%d\n",
+    report.Inserted, report.Updated, report.Pruned, report.FTS)
+```
+
+## Notas
+
+### WriteReport
+Struct local al paquete infra:
+```go
+type WriteReport struct {
+    Inserted int
+    Updated  int
+    Pruned   int
+    FTS      int
+}
+```
+
+### Estrategia de conteo Inserted vs Updated
+Se carga el conjunto de rel_paths existentes en un map antes del loop. Un upsert
+se clasifica como Inserted si el rel_path no estaba en el map, Updated si estaba.
+Esto evita N+1 SELECTs y es correcto porque la transaccion serializa los cambios.
+
+### FTS5
+`files_fts` usa `content=''` (tabla de contenido externo vacio). Para cada archivo
+se borra la fila FTS existente y se reinserta con `content_text=''`. Los profilers
+posteriores (csv_profiles, knowledge_docs) son responsables de actualizar
+`content_text` con texto indexable real.
+
+### Prune
+Con `prune=true` se construye un IN clause con los rel_paths del slice. La FK con
+`ON DELETE CASCADE` propaga el DELETE a csv_profiles, pdf_extracts y knowledge_docs
+automaticamente. Con slice vacio + prune=true se borra todo (DELETE FROM files).
+
+### Escapado SQL
+El IN clause se construye escapando las comillas simples en rel_path (duplicandolas).
+Evita inyeccion en rutas con apostrofos. Para entornos con rutas controladas
+(interior de vaults sin apostrofos) esto es suficiente; para entornos adversariales
+usar parametros binding con VALUES multiples via prepared statement.
@@ -0,0 +1,210 @@
+package infra
+
+import (
+	"testing"
+	"time"
+)
+
+// makeTestVaultFile creates a minimal VaultFile for testing.
+func makeTestVaultFile(relPath, mime, bucket, subBucket string) VaultFile {
+	return VaultFile{
+		VaultID:   "test_vault",
+		VaultName: "test",
+		RelPath:   relPath,
+		Size:      100,
+		Mtime:     time.Now().Unix(),
+		Sha256:    "abc123def456abc123def456abc123def456abc123def456abc123def456abc1",
+		Mime:      mime,
+		Ext:       ".csv",
+		Bucket:    bucket,
+		SubBucket: subBucket,
+	}
+}
+
+func openInMemoryVaultIndex(t *testing.T) interface{ Close() error } {
+	t.Helper()
+	dir := t.TempDir()
+	db, err := VaultIndexOpen(dir)
+	if err != nil {
+		t.Fatalf("VaultIndexOpen: %v", err)
+	}
+	return db
+}
+
+func TestVaultIndexWrite_FreshInsert(t *testing.T) {
+	t.Run("N archivos nuevos — Inserted=N", func(t *testing.T) {
+		dir := t.TempDir()
+		db, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+
+		files := []VaultFile{
+			makeTestVaultFile("data/raw/a.csv", "text/csv", "data", "raw"),
+			makeTestVaultFile("data/raw/b.csv", "text/csv", "data", "raw"),
+			makeTestVaultFile("knowledge/decisions/x.md", "text/markdown", "knowledge", "decisions"),
+		}
+
+		report, err := VaultIndexWrite(db, files, false)
+		if err != nil {
+			t.Fatalf("VaultIndexWrite: %v", err)
+		}
+		if report.Inserted != 3 {
+			t.Errorf("Inserted = %d, want 3", report.Inserted)
+		}
+		if report.Updated != 0 {
+			t.Errorf("Updated = %d, want 0", report.Updated)
+		}
+		if report.Pruned != 0 {
+			t.Errorf("Pruned = %d, want 0", report.Pruned)
+		}
+		if report.FTS != 3 {
+			t.Errorf("FTS = %d, want 3", report.FTS)
+		}
+	})
+}
+
+func TestVaultIndexWrite_Upsert(t *testing.T) {
+	t.Run("re-escritura con mtime distinto — Updated=N", func(t *testing.T) {
+		dir := t.TempDir()
+		db, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+
+		files := []VaultFile{
+			makeTestVaultFile("data/raw/a.csv", "text/csv", "data", "raw"),
+			makeTestVaultFile("data/raw/b.csv", "text/csv", "data", "raw"),
+		}
+
+		if _, err := VaultIndexWrite(db, files, false); err != nil {
+			t.Fatalf("first write: %v", err)
+		}
+
+		// Modify mtime to simulate file change.
+		files[0].Mtime = time.Now().Unix() + 100
+		files[1].Mtime = time.Now().Unix() + 200
+
+		report, err := VaultIndexWrite(db, files, false)
+		if err != nil {
+			t.Fatalf("second write: %v", err)
+		}
+		if report.Inserted != 0 {
+			t.Errorf("Inserted = %d, want 0", report.Inserted)
+		}
+		if report.Updated != 2 {
+			t.Errorf("Updated = %d, want 2", report.Updated)
+		}
+	})
+}
+
+func TestVaultIndexWrite_Prune(t *testing.T) {
+	t.Run("prune elimina filas ausentes", func(t *testing.T) {
+		dir := t.TempDir()
+		db, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+
+		// Write A and B.
+		ab := []VaultFile{
+			makeTestVaultFile("data/raw/a.csv", "text/csv", "data", "raw"),
+			makeTestVaultFile("data/raw/b.csv", "text/csv", "data", "raw"),
+		}
+		if _, err := VaultIndexWrite(db, ab, false); err != nil {
+			t.Fatalf("first write: %v", err)
+		}
+
+		// Write only A with prune=true — B should be deleted.
+		onlyA := []VaultFile{ab[0]}
+		report, err := VaultIndexWrite(db, onlyA, true)
+		if err != nil {
+			t.Fatalf("prune write: %v", err)
+		}
+		if report.Pruned != 1 {
+			t.Errorf("Pruned = %d, want 1", report.Pruned)
+		}
+
+		// Verify B is gone.
+		var count int
+		err = db.QueryRow(`SELECT count(*) FROM files WHERE rel_path = 'data/raw/b.csv'`).Scan(&count)
+		if err != nil {
+			t.Fatalf("query: %v", err)
+		}
+		if count != 0 {
+			t.Errorf("b.csv still present after prune")
+		}
+	})
+}
+
+func TestVaultIndexWrite_NoPrune(t *testing.T) {
+	t.Run("sin prune, filas previas persisten", func(t *testing.T) {
+		dir := t.TempDir()
+		db, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+
+		ab := []VaultFile{
+			makeTestVaultFile("data/raw/a.csv", "text/csv", "data", "raw"),
+			makeTestVaultFile("data/raw/b.csv", "text/csv", "data", "raw"),
+		}
+		if _, err := VaultIndexWrite(db, ab, false); err != nil {
+			t.Fatalf("first write: %v", err)
+		}
+
+		// Write only A without prune — B must remain.
+		onlyA := []VaultFile{ab[0]}
+		report, err := VaultIndexWrite(db, onlyA, false)
+		if err != nil {
+			t.Fatalf("second write: %v", err)
+		}
+		if report.Pruned != 0 {
+			t.Errorf("Pruned = %d, want 0", report.Pruned)
+		}
+
+		var count int
+		err = db.QueryRow(`SELECT count(*) FROM files`).Scan(&count)
+		if err != nil {
+			t.Fatalf("query: %v", err)
+		}
+		if count != 2 {
+			t.Errorf("files count = %d, want 2", count)
+		}
+	})
+}
+
+func TestVaultIndexWrite_FTSMatch(t *testing.T) {
+	t.Run("FTS5 MATCH funciona tras escritura", func(t *testing.T) {
+		dir := t.TempDir()
+		db, err := VaultIndexOpen(dir)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+
+		files := []VaultFile{
+			makeTestVaultFile("data/raw/foo_report.csv", "text/csv", "data", "raw"),
+			makeTestVaultFile("data/raw/bar_data.csv", "text/csv", "data", "raw"),
+		}
+		if _, err := VaultIndexWrite(db, files, false); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+
+		// FTS5 on rel_path column: MATCH 'foo*'
+		var count int
+		err = db.QueryRow(
+			`SELECT count(*) FROM files_fts WHERE files_fts MATCH 'rel_path:foo*'`,
+		).Scan(&count)
+		if err != nil {
+			t.Fatalf("FTS MATCH query: %v", err)
+		}
+		if count != 1 {
+			t.Errorf("FTS MATCH rel_path:foo* = %d rows, want 1", count)
+		}
+	})
+}
@@ -0,0 +1,174 @@
+package infra
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+)
+
+// VaultInventoryScan walks vaultPath and returns a VaultFile slice (sorted by RelPath)
+// for every regular file found, skipping:
+//   - vault_index.db, vault_index.db-shm, vault_index.db-wal
+//   - .git/ directories at any depth
+//   - hidden files/dirs (names starting with ".") at the vault root level only
+//
+// For each file it computes: relative path (forward slashes), size, mtime (unix UTC),
+// sha256 (streaming, hex lowercase), MIME type, extension, bucket and sub-bucket.
+//
+// MIME detection priority:
+//  1. Extension override: .csv → text/csv, .md → text/markdown, .parquet → application/parquet
+//  2. http.DetectContentType on first 512 bytes (magic bytes, stdlib)
+//
+// NOTE: file_validate_type_go_infra (FileValidateType) was not used here because its
+// signature requires an allowedTypes allowlist and returns (mime, bool) — it is designed
+// for upload validation, not for open-ended inventory scanning where any MIME is valid.
+// http.DetectContentType provides the same magic-byte detection without the allowlist
+// coupling and handles a broader set of formats including text/plain for CSV fallback.
+func VaultInventoryScan(vaultPath, vaultID, vaultName string) ([]VaultFile, error) {
+	var files []VaultFile
+
+	err := filepath.WalkDir(vaultPath, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		name := d.Name()
+
+		// Skip .git directories at any depth.
+		if d.IsDir() && name == ".git" {
+			return filepath.SkipDir
+		}
+
+		// Skip hidden entries (names starting with ".") at vault root only.
+		if strings.HasPrefix(name, ".") {
+			rel, relErr := filepath.Rel(vaultPath, path)
+			if relErr == nil {
+				// At root level the relative path has no separator.
+				if !strings.Contains(filepath.ToSlash(rel), "/") {
+					if d.IsDir() {
+						return filepath.SkipDir
+					}
+					return nil
+				}
+			}
+		}
+
+		if d.IsDir() {
+			return nil
+		}
+
+		// Skip vault_index.db and its WAL/SHM sidecar files.
+		if name == "vault_index.db" || name == "vault_index.db-shm" || name == "vault_index.db-wal" {
+			return nil
+		}
+
+		rel, err := filepath.Rel(vaultPath, path)
+		if err != nil {
+			return fmt.Errorf("vault_inventory_scan: rel path for %q: %w", path, err)
+		}
+		rel = filepath.ToSlash(rel)
+
+		info, err := d.Info()
+		if err != nil {
+			return fmt.Errorf("vault_inventory_scan: stat %q: %w", path, err)
+		}
+
+		// Compute sha256 by streaming — avoids loading large files into memory.
+		sha, err := fileSha256(path)
+		if err != nil {
+			return fmt.Errorf("vault_inventory_scan: sha256 %q: %w", path, err)
+		}
+
+		mime, err := detectVaultFileMime(path, name)
+		if err != nil {
+			return fmt.Errorf("vault_inventory_scan: mime %q: %w", path, err)
+		}
+
+		ext := strings.ToLower(filepath.Ext(name))
+		bucket, subBucket := vaultBucketParts(rel)
+
+		files = append(files, VaultFile{
+			VaultID:   vaultID,
+			VaultName: vaultName,
+			RelPath:   rel,
+			Size:      info.Size(),
+			Mtime:     info.ModTime().UTC().Unix(),
+			Sha256:    sha,
+			Mime:      mime,
+			Ext:       ext,
+			Bucket:    bucket,
+			SubBucket: subBucket,
+		})
+		return nil
+	})
+	if err != nil {
+		return nil, fmt.Errorf("vault_inventory_scan: walk %q: %w", vaultPath, err)
+	}
+
+	sort.Slice(files, func(i, j int) bool {
+		return files[i].RelPath < files[j].RelPath
+	})
+	return files, nil
+}
+
+// fileSha256 computes the hex-lowercase SHA-256 of the file at path by streaming.
+func fileSha256(path string) (string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+	h := sha256.New()
+	if _, err := io.Copy(h, f); err != nil {
+		return "", err
+	}
+	return hex.EncodeToString(h.Sum(nil)), nil
+}
+
+// detectVaultFileMime returns the MIME type for a vault file.
+// Extension overrides take priority; otherwise http.DetectContentType is used.
+func detectVaultFileMime(path, name string) (string, error) {
+	ext := strings.ToLower(filepath.Ext(name))
+	switch ext {
+	case ".csv":
+		return "text/csv", nil
+	case ".md":
+		return "text/markdown", nil
+	case ".parquet":
+		return "application/parquet", nil
+	}
+
+	f, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	buf := make([]byte, 512)
+	n, err := f.Read(buf)
+	if err != nil && err != io.EOF {
+		return "", err
+	}
+	return http.DetectContentType(buf[:n]), nil
+}
+
+// vaultBucketParts extracts the top-level bucket ("data" or "knowledge") and
+// the second-level sub-bucket from a forward-slash relative path.
+// Returns empty strings for files at vault root or with no recognisable bucket.
+func vaultBucketParts(relPath string) (bucket, subBucket string) {
+	parts := strings.SplitN(relPath, "/", 3)
+	if len(parts) < 1 {
+		return "", ""
+	}
+	bucket = parts[0]
+	if len(parts) >= 2 {
+		subBucket = parts[1]
+	}
+	return bucket, subBucket
+}
@@ -0,0 +1,74 @@
+---
+name: vault_inventory_scan
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func VaultInventoryScan(vaultPath, vaultID, vaultName string) ([]VaultFile, error)"
+description: "Recorre vaultPath con filepath.WalkDir y retorna un slice de VaultFile ordenado por RelPath para cada archivo regular, computando sha256 por streaming, MIME por extension/magic y bucket/sub-bucket por posicion en el arbol."
+tags: [vault, inventory, scan, filesystem, sha256, mime, infra]
+uses_functions: []
+uses_types: ["vault_file_go_infra"]
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [crypto/sha256, encoding/hex, fmt, io, net/http, os, path/filepath, sort, strings]
+params:
+  - name: vaultPath
+    desc: "ruta absoluta o relativa al directorio raiz del vault"
+  - name: vaultID
+    desc: "identificador del vault (ej: turismo_spain_app_turismo) — se copia a cada VaultFile"
+  - name: vaultName
+    desc: "nombre legible del vault (ej: turismo_spain) — se copia a cada VaultFile"
+output: "slice de VaultFile ordenado lexicograficamente por RelPath; slice vacio (no nil) si el vault esta vacio"
+tested: true
+tests:
+  - "tmpdir vacio retorna slice vacio"
+  - "data layout — bucket y sub_bucket correctos"
+  - "knowledge layout — bucket y sub_bucket correctos"
+  - "omite vault_index.db y .git"
+  - "sha256 determinista para mismo contenido"
+  - "orden lexicografico del resultado"
+test_file_path: "functions/infra/vault_inventory_scan_test.go"
+file_path: "functions/infra/vault_inventory_scan.go"
+---
+
+## Ejemplo
+
+```go
+files, err := VaultInventoryScan("/data/vaults/turismo_spain", "turismo_spain_v1", "turismo_spain")
+if err != nil {
+    log.Fatal(err)
+}
+for _, f := range files {
+    fmt.Printf("%s  %s  %s/%s\n", f.RelPath, f.Mime, f.Bucket, f.SubBucket)
+}
+```
+
+## Notas
+
+### Archivos omitidos
+- `vault_index.db`, `vault_index.db-shm`, `vault_index.db-wal` (siempre)
+- `.git/` en cualquier profundidad (SkipDir)
+- Entradas cuyo nombre empieza por `.` solo en la raiz del vault (nivel 0)
+
+### Deteccion de MIME
+`file_validate_type_go_infra` (FileValidateType) no se usa porque su firma
+requiere una lista blanca de tipos permitidos y retorna (mime, bool) — esta
+disenada para validacion de uploads, no para escaneo inventarial donde
+cualquier MIME es valido. Se usan en su lugar:
+
+1. Override por extension (prioridad alta): `.csv` → `text/csv`, `.md` → `text/markdown`,
+   `.parquet` → `application/parquet`. Necesario porque `http.DetectContentType`
+   clasifica CSV como `text/plain` y no conoce Parquet.
+2. `http.DetectContentType` sobre primeros 512 bytes (magic bytes, stdlib) para el resto.
+
+### SHA-256
+Calculado por streaming con `io.Copy` a `sha256.New()` — no carga el archivo completo
+a memoria. Valido para archivos de cualquier tamano.
+
+### Bucket / SubBucket
+Derivados de la posicion en el arbol:
+- `bucket` = primer segmento del RelPath (tipicamente "data" o "knowledge")
+- `subBucket` = segundo segmento si existe; vacio si el archivo esta en la raiz del bucket
@@ -0,0 +1,182 @@
+package infra
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func writeTestFile(t *testing.T, dir, rel, content string) {
+	t.Helper()
+	full := filepath.Join(dir, filepath.FromSlash(rel))
+	if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
+		t.Fatalf("mkdir %s: %v", filepath.Dir(full), err)
+	}
+	if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
+		t.Fatalf("write %s: %v", full, err)
+	}
+}
+
+func TestVaultInventoryScan_Empty(t *testing.T) {
+	t.Run("tmpdir vacio retorna slice vacio", func(t *testing.T) {
+		dir := t.TempDir()
+		files, err := VaultInventoryScan(dir, "v1", "test")
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if len(files) != 0 {
+			t.Errorf("expected 0 files, got %d", len(files))
+		}
+	})
+}
+
+func TestVaultInventoryScan_DataLayout(t *testing.T) {
+	t.Run("data layout — bucket y sub_bucket correctos", func(t *testing.T) {
+		dir := t.TempDir()
+		writeTestFile(t, dir, "data/raw/a.csv", "col1,col2\n1,2\n")
+		writeTestFile(t, dir, "data/processed/b.parquet", "PAR1fakedata")
+
+		files, err := VaultInventoryScan(dir, "vid", "vname")
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if len(files) != 2 {
+			t.Fatalf("expected 2 files, got %d", len(files))
+		}
+
+		// files are sorted: data/processed/b.parquet < data/raw/a.csv
+		b := files[0]
+		if b.RelPath != "data/processed/b.parquet" {
+			t.Errorf("files[0].RelPath = %q, want data/processed/b.parquet", b.RelPath)
+		}
+		if b.Bucket != "data" {
+			t.Errorf("files[0].Bucket = %q, want data", b.Bucket)
+		}
+		if b.SubBucket != "processed" {
+			t.Errorf("files[0].SubBucket = %q, want processed", b.SubBucket)
+		}
+		if b.Mime != "application/parquet" {
+			t.Errorf("files[0].Mime = %q, want application/parquet", b.Mime)
+		}
+		if b.Ext != ".parquet" {
+			t.Errorf("files[0].Ext = %q, want .parquet", b.Ext)
+		}
+		if b.VaultID != "vid" {
+			t.Errorf("VaultID = %q, want vid", b.VaultID)
+		}
+
+		a := files[1]
+		if a.RelPath != "data/raw/a.csv" {
+			t.Errorf("files[1].RelPath = %q, want data/raw/a.csv", a.RelPath)
+		}
+		if a.Mime != "text/csv" {
+			t.Errorf("files[1].Mime = %q, want text/csv", a.Mime)
+		}
+		if a.Bucket != "data" || a.SubBucket != "raw" {
+			t.Errorf("files[1]: bucket=%q subBucket=%q, want data/raw", a.Bucket, a.SubBucket)
+		}
+	})
+}
+
+func TestVaultInventoryScan_KnowledgeLayout(t *testing.T) {
+	t.Run("knowledge layout — bucket y sub_bucket correctos", func(t *testing.T) {
+		dir := t.TempDir()
+		writeTestFile(t, dir, "knowledge/decisions/x.md", "# Decision\n\ncontent")
+
+		files, err := VaultInventoryScan(dir, "vid", "vname")
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if len(files) != 1 {
+			t.Fatalf("expected 1 file, got %d", len(files))
+		}
+		f := files[0]
+		if f.RelPath != "knowledge/decisions/x.md" {
+			t.Errorf("RelPath = %q", f.RelPath)
+		}
+		if f.Bucket != "knowledge" {
+			t.Errorf("Bucket = %q, want knowledge", f.Bucket)
+		}
+		if f.SubBucket != "decisions" {
+			t.Errorf("SubBucket = %q, want decisions", f.SubBucket)
+		}
+		if f.Mime != "text/markdown" {
+			t.Errorf("Mime = %q, want text/markdown", f.Mime)
+		}
+	})
+}
+
+func TestVaultInventoryScan_SkipsIndexAndGit(t *testing.T) {
+	t.Run("omite vault_index.db y .git", func(t *testing.T) {
+		dir := t.TempDir()
+		writeTestFile(t, dir, "vault_index.db", "sqlite data")
+		writeTestFile(t, dir, "vault_index.db-wal", "wal data")
+		writeTestFile(t, dir, ".git/HEAD", "ref: refs/heads/master")
+		writeTestFile(t, dir, "data/raw/real.csv", "a,b\n1,2\n")
+
+		files, err := VaultInventoryScan(dir, "vid", "vname")
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if len(files) != 1 {
+			t.Fatalf("expected 1 file (real.csv), got %d: %v", len(files), relPaths(files))
+		}
+		if files[0].RelPath != "data/raw/real.csv" {
+			t.Errorf("unexpected file: %q", files[0].RelPath)
+		}
+	})
+}
+
+func TestVaultInventoryScan_Sha256Deterministic(t *testing.T) {
+	t.Run("sha256 determinista para mismo contenido", func(t *testing.T) {
+		dir1 := t.TempDir()
+		dir2 := t.TempDir()
+		content := "deterministic content 123\n"
+		writeTestFile(t, dir1, "data/raw/f.csv", content)
+		writeTestFile(t, dir2, "data/raw/f.csv", content)
+
+		files1, err := VaultInventoryScan(dir1, "v1", "vault1")
+		if err != nil {
+			t.Fatal(err)
+		}
+		files2, err := VaultInventoryScan(dir2, "v2", "vault2")
+		if err != nil {
+			t.Fatal(err)
+		}
+		if files1[0].Sha256 != files2[0].Sha256 {
+			t.Errorf("sha256 mismatch: %q vs %q", files1[0].Sha256, files2[0].Sha256)
+		}
+		if len(files1[0].Sha256) != 64 {
+			t.Errorf("sha256 length = %d, want 64", len(files1[0].Sha256))
+		}
+	})
+}
+
+func TestVaultInventoryScan_Sorted(t *testing.T) {
+	t.Run("orden lexicografico del resultado", func(t *testing.T) {
+		dir := t.TempDir()
+		writeTestFile(t, dir, "knowledge/decisions/z.md", "z")
+		writeTestFile(t, dir, "data/raw/a.csv", "a")
+		writeTestFile(t, dir, "data/processed/m.parquet", "m")
+		writeTestFile(t, dir, "knowledge/domains/b.md", "b")
+
+		files, err := VaultInventoryScan(dir, "v", "v")
+		if err != nil {
+			t.Fatal(err)
+		}
+		for i := 1; i < len(files); i++ {
+			if files[i].RelPath < files[i-1].RelPath {
+				t.Errorf("not sorted at index %d: %q < %q", i, files[i].RelPath, files[i-1].RelPath)
+			}
+		}
+	})
+}
+
+// relPaths is a helper for test error messages.
+func relPaths(files []VaultFile) []string {
+	out := make([]string, len(files))
+	for i, f := range files {
+		out[i] = f.RelPath
+	}
+	return out
+}
@@ -0,0 +1,252 @@
+package infra
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// LayoutReport describes what VaultLayoutEnsure did (or would do) to a vault directory.
+type LayoutReport struct {
+	VaultPath string   `json:"vault_path"`
+	Created   []string `json:"created"`    // dirs created (relative paths)
+	Migrated  []string `json:"migrated"`   // renames executed, format "src -> dst" (relative)
+	AlreadyOK []string `json:"already_ok"` // dirs that already existed at the target location
+	Skipped   []string `json:"skipped"`    // unrecognized root-level entries, left untouched
+	DryRun    bool     `json:"dry_run"`
+}
+
+// dataBuckets are root-level directories that belong under data/.
+var dataBuckets = []string{"raw", "processed", "exports"}
+
+// knowledgeBuckets are root-level directories that belong under knowledge/.
+var knowledgeBuckets = []string{"decisions", "domains", "models", "benchmarks", "test_documents"}
+
+// knownRootFiles are root-level files that should be moved to knowledge/.
+var knownRootFiles = []string{"README.md", "README.txt"}
+
+// VaultLayoutEnsure ensures a vault directory uses the canonical hybrid layout:
+//
+//	data/{raw,processed,exports}
+//	knowledge/{decisions,domains,models,benchmarks,test_documents}
+//
+// Legacy vaults that have these directories at the root are migrated by renaming
+// (or merging when both src and dst already exist). The operation is idempotent:
+// a second run returns everything in AlreadyOK.
+//
+// When dryRun is true the function computes the report but does not touch the disk.
+func VaultLayoutEnsure(vaultPath string, dryRun bool) (LayoutReport, error) {
+	report := LayoutReport{DryRun: dryRun}
+
+	// --- resolve path ---
+	vaultPath = strings.TrimRight(vaultPath, "/\\")
+
+	var err error
+	vaultPath, err = filepath.Abs(vaultPath)
+	if err != nil {
+		return report, fmt.Errorf("vault_layout_ensure: abs(%q): %w", vaultPath, err)
+	}
+
+	// Follow symlinks for the vault root itself.
+	resolved, err := filepath.EvalSymlinks(vaultPath)
+	if err != nil {
+		return report, fmt.Errorf("vault_layout_ensure: eval symlinks %q: %w", vaultPath, err)
+	}
+	vaultPath = resolved
+	report.VaultPath = vaultPath
+
+	// --- check that vault exists and is a directory ---
+	info, err := os.Stat(vaultPath)
+	if err != nil {
+		return report, fmt.Errorf("vault_layout_ensure: stat %q: %w", vaultPath, err)
+	}
+	if !info.IsDir() {
+		return report, fmt.Errorf("vault_layout_ensure: %q is not a directory", vaultPath)
+	}
+
+	// --- ensure top-level containers ---
+	for _, container := range []string{"data", "knowledge"} {
+		dst := filepath.Join(vaultPath, container)
+		if err := ensureDir(dst, dryRun, container, &report); err != nil {
+			return report, err
+		}
+	}
+
+	// --- build migration table: root name -> relative destination ---
+	type migration struct {
+		rootName string // name in vault root (dir or file)
+		dstRel   string // relative destination path inside vault
+		isFile   bool
+	}
+
+	var migrations []migration
+	for _, b := range dataBuckets {
+		migrations = append(migrations, migration{rootName: b, dstRel: filepath.Join("data", b)})
+	}
+	for _, b := range knowledgeBuckets {
+		migrations = append(migrations, migration{rootName: b, dstRel: filepath.Join("knowledge", b)})
+	}
+	for _, rf := range knownRootFiles {
+		migrations = append(migrations, migration{rootName: rf, dstRel: filepath.Join("knowledge", "README.md"), isFile: true})
+	}
+
+	// Track which root names are "known" so we can compute Skipped.
+	knownNames := make(map[string]struct{})
+	for _, m := range migrations {
+		knownNames[strings.ToLower(m.rootName)] = struct{}{}
+	}
+	knownNames["data"] = struct{}{}
+	knownNames["knowledge"] = struct{}{}
+
+	// --- apply migrations ---
+	for _, m := range migrations {
+		src := filepath.Join(vaultPath, m.rootName)
+		dst := filepath.Join(vaultPath, m.dstRel)
+		srcRel := m.rootName
+		dstRel := m.dstRel
+
+		srcExists := pathExists(src)
+		dstExists := pathExists(dst)
+
+		switch {
+		case srcExists && dstExists:
+			// Both exist: merge if directory, error on file collision.
+			if m.isFile {
+				return report, fmt.Errorf("vault_layout_ensure: conflict: both %q and %q exist", srcRel, dstRel)
+			}
+			if err := mergeDirs(src, dst, srcRel, dstRel, dryRun, &report); err != nil {
+				return report, err
+			}
+
+		case srcExists && !dstExists:
+			// Only source exists: rename.
+			report.Migrated = append(report.Migrated, fmt.Sprintf("%s -> %s", srcRel, dstRel))
+			if !dryRun {
+				if err := os.Rename(src, dst); err != nil {
+					return report, fmt.Errorf("vault_layout_ensure: rename %q -> %q: %w", src, dst, err)
+				}
+			}
+
+		case !srcExists && dstExists:
+			// Already migrated.
+			report.AlreadyOK = append(report.AlreadyOK, dstRel)
+
+		default:
+			// Neither exists: create empty destination directory (skip for files).
+			if !m.isFile {
+				report.Created = append(report.Created, dstRel)
+				if !dryRun {
+					if err := os.MkdirAll(dst, 0o755); err != nil {
+						return report, fmt.Errorf("vault_layout_ensure: mkdir %q: %w", dst, err)
+					}
+				}
+			}
+		}
+	}
+
+	// --- collect skipped (unrecognized root entries) ---
+	entries, err := os.ReadDir(vaultPath)
+	if err != nil {
+		return report, fmt.Errorf("vault_layout_ensure: readdir %q: %w", vaultPath, err)
+	}
+	for _, e := range entries {
+		if _, known := knownNames[strings.ToLower(e.Name())]; !known {
+			report.Skipped = append(report.Skipped, e.Name())
+		}
+	}
+
+	return report, nil
+}
+
+// ensureDir adds the dir to Created (and creates it) if it doesn't exist,
+// or to AlreadyOK if it does. Used for top-level containers "data" and "knowledge".
+func ensureDir(path string, dryRun bool, rel string, report *LayoutReport) error {
+	if pathExists(path) {
+		report.AlreadyOK = append(report.AlreadyOK, rel)
+		return nil
+	}
+	report.Created = append(report.Created, rel)
+	if dryRun {
+		return nil
+	}
+	if err := os.MkdirAll(path, 0o755); err != nil {
+		return fmt.Errorf("vault_layout_ensure: mkdir %q: %w", path, err)
+	}
+	return nil
+}
+
+// mergeDirs moves the contents of src into dst, then removes src if empty.
+// Returns an error if any file in src already exists in dst (no overwrite policy).
+func mergeDirs(src, dst, srcRel, dstRel string, dryRun bool, report *LayoutReport) error {
+	children, err := os.ReadDir(src)
+	if err != nil {
+		return fmt.Errorf("vault_layout_ensure: readdir %q: %w", src, err)
+	}
+
+	for _, child := range children {
+		childDst := filepath.Join(dst, child.Name())
+		if pathExists(childDst) {
+			return fmt.Errorf("vault_layout_ensure: merge conflict: %q already exists in %q (cannot overwrite %q)",
+				child.Name(), dstRel, filepath.Join(srcRel, child.Name()))
+		}
+		childSrc := filepath.Join(src, child.Name())
+		childSrcRel := filepath.Join(srcRel, child.Name())
+		childDstRel := filepath.Join(dstRel, child.Name())
+		report.Migrated = append(report.Migrated, fmt.Sprintf("%s -> %s", childSrcRel, childDstRel))
+		if !dryRun {
+			if err := os.Rename(childSrc, childDst); err != nil {
+				return fmt.Errorf("vault_layout_ensure: rename %q -> %q: %w", childSrc, childDst, err)
+			}
+		}
+	}
+
+	// Remove the now-empty src directory.
+	if !dryRun {
+		// Re-check emptiness after renames.
+		remaining, _ := os.ReadDir(src)
+		if len(remaining) == 0 {
+			if err := os.Remove(src); err != nil {
+				return fmt.Errorf("vault_layout_ensure: remove empty src %q: %w", src, err)
+			}
+		}
+	}
+	return nil
+}
+
+// pathExists returns true if path exists (any type).
+func pathExists(path string) bool {
+	_, err := os.Lstat(path)
+	return err == nil
+}
+
+// dirIsEmpty returns true if a directory exists and has no entries.
+func dirIsEmpty(path string) bool {
+	entries, err := os.ReadDir(path)
+	if err != nil {
+		return false
+	}
+	return len(entries) == 0
+}
+
+// _ prevents "declared but not used" if dirIsEmpty is only used in tests.
+var _ = dirIsEmpty
+
+// vaultLayoutKnownNames returns the set of root-level names managed by this function.
+// Exported for use in tests.
+func vaultLayoutKnownNames() map[string]struct{} {
+	known := make(map[string]struct{})
+	for _, b := range dataBuckets {
+		known[b] = struct{}{}
+	}
+	for _, b := range knowledgeBuckets {
+		known[b] = struct{}{}
+	}
+	for _, rf := range knownRootFiles {
+		known[strings.ToLower(rf)] = struct{}{}
+	}
+	known["data"] = struct{}{}
+	known["knowledge"] = struct{}{}
+	return known
+}
+
@@ -0,0 +1,95 @@
+---
+name: vault_layout_ensure
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func VaultLayoutEnsure(vaultPath string, dryRun bool) (LayoutReport, error)"
+description: "Normaliza el layout de un vault al esquema hibrido canónico data/{raw,processed,exports} + knowledge/{decisions,domains,models,benchmarks,test_documents}. Migra directorios legacy en la raíz del vault a su ubicación correcta; idempotente."
+tags: [vault, layout, migration, infra, filesystem, idempotent]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports:
+  - "fmt"
+  - "os"
+  - "path/filepath"
+  - "strings"
+params:
+  - name: vault_path
+    desc: "Ruta al directorio raíz del vault. Puede ser absoluta, relativa o un symlink — se resuelve con filepath.Abs + filepath.EvalSymlinks. Trailing slashes se ignoran."
+  - name: dry_run
+    desc: "Si true, calcula el reporte completo (qué se crearía, migraría, etc.) pero no modifica el disco. Util para previsualizar antes de ejecutar."
+output: "LayoutReport con: VaultPath (ruta resuelta), Created (dirs creados), Migrated (renombres ejecutados, formato 'src -> dst'), AlreadyOK (destinos que ya existían), Skipped (entradas en raíz no reconocidas, no tocadas), DryRun (flag). Error si el path no existe, no es directorio, o hay conflicto de merge (mismo nombre de archivo en src y dst)."
+tested: true
+tests:
+  - "TestVaultLayoutEnsure_DryRun_NoChange"
+  - "TestVaultLayoutEnsure_FreshDir_CreatesLayout"
+  - "TestVaultLayoutEnsure_LegacyDataLayout_Migrates"
+  - "TestVaultLayoutEnsure_LegacyKnowledgeLayout_Migrates"
+  - "TestVaultLayoutEnsure_AlreadyMigrated_Idempotent"
+  - "TestVaultLayoutEnsure_Mixed_PartialMigration"
+  - "TestVaultLayoutEnsure_MergeConflict_Errors"
+  - "TestVaultLayoutEnsure_UnknownFiles_Skipped"
+  - "TestVaultLayoutEnsure_NotADir_Errors"
+test_file_path: "functions/infra/vault_layout_ensure_test.go"
+file_path: "functions/infra/vault_layout_ensure.go"
+---
+
+## Ejemplo
+
+```go
+// Previsualizar sin tocar disco:
+report, err := VaultLayoutEnsure("/home/lucas/vaults/turismo_spain", true)
+if err != nil {
+    log.Fatal(err)
+}
+fmt.Printf("Would migrate: %v\n", report.Migrated)
+fmt.Printf("Would create:  %v\n", report.Created)
+
+// Ejecutar la migración:
+report, err = VaultLayoutEnsure("/home/lucas/vaults/turismo_spain", false)
+if err != nil {
+    log.Fatalf("migration failed: %v", err)
+}
+fmt.Printf("Migrated: %v\n", report.Migrated)
+fmt.Printf("Created:  %v\n", report.Created)
+fmt.Printf("Skipped:  %v\n", report.Skipped)
+```
+
+## Comportamiento detallado
+
+**Directorios gestionados:**
+
+| Raíz (legacy) | Destino canónico |
+|---|---|
+| `raw/` | `data/raw/` |
+| `processed/` | `data/processed/` |
+| `exports/` | `data/exports/` |
+| `decisions/` | `knowledge/decisions/` |
+| `domains/` | `knowledge/domains/` |
+| `models/` | `knowledge/models/` |
+| `benchmarks/` | `knowledge/benchmarks/` |
+| `test_documents/` | `knowledge/test_documents/` |
+| `README.md` / `README.txt` | `knowledge/README.md` |
+
+**Lógica de migración (por cada entrada conocida):**
+
+- Solo `src` existe → rename atómico `src` → `dst`, registrado en `Migrated`.
+- Solo `dst` existe → ya migrado, registrado en `AlreadyOK`.
+- Ambos existen (dir) → merge: mueve cada hijo de `src/` a `dst/`; error si mismo nombre. Registrado en `Migrated` por hijo.
+- Ambos existen (archivo README) → error inmediato con paths concretos.
+- Ninguno existe → crea `dst` vacío, registrado en `Created`.
+
+**Archivos/dirs no reconocidos** en la raíz (`.git`, `vault_index.db`, archivos custom) se registran en `Skipped` y no se tocan.
+
+**Idempotencia:** segunda ejecución sobre un vault ya migrado reporta todo en `AlreadyOK` y no toca disco.
+
+## Notas
+
+`LayoutReport` es un tipo local de esta función (no un tipo del registry). El struct exportado vive en `functions/infra/vault_layout_ensure.go` junto con la función.
+
+Para aplicar la migración a múltiples vaults en batch, invocar desde un pipeline que lea los paths de `vault.yaml` (ver `vault_manifest_read_go_infra`) y llame a `VaultLayoutEnsure` en cada uno.
@@ -0,0 +1,394 @@
+package infra
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// mkVaultDir creates a temporary directory tree for tests.
+// entries is a list of relative paths to create.
+// Paths ending in "/" are directories; others are files with placeholder content.
+func mkVaultDir(t *testing.T, entries []string) string {
+	t.Helper()
+	root := t.TempDir()
+	for _, e := range entries {
+		full := filepath.Join(root, filepath.FromSlash(e))
+		if e[len(e)-1] == '/' {
+			if err := os.MkdirAll(full, 0o755); err != nil {
+				t.Fatalf("mkVaultDir: mkdir %q: %v", full, err)
+			}
+		} else {
+			if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
+				t.Fatalf("mkVaultDir: mkdir parent %q: %v", full, err)
+			}
+			if err := os.WriteFile(full, []byte("test\n"), 0o644); err != nil {
+				t.Fatalf("mkVaultDir: write %q: %v", full, err)
+			}
+		}
+	}
+	return root
+}
+
+func TestVaultLayoutEnsure_DryRun_NoChange(t *testing.T) {
+	root := mkVaultDir(t, []string{
+		"raw/",
+		"raw/file1.csv",
+		"processed/",
+	})
+
+	before := snapshotDir(t, root)
+	report, err := VaultLayoutEnsure(root, true)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !report.DryRun {
+		t.Error("DryRun flag not set in report")
+	}
+	after := snapshotDir(t, root)
+	if !mapEqual(before, after) {
+		t.Errorf("dry-run modified disk: before=%v after=%v", before, after)
+	}
+	// Should have planned a migration for raw and processed.
+	if len(report.Migrated) == 0 {
+		t.Error("expected Migrated to be non-empty in dry-run plan")
+	}
+}
+
+func TestVaultLayoutEnsure_FreshDir_CreatesLayout(t *testing.T) {
+	root := mkVaultDir(t, []string{}) // empty vault
+
+	report, err := VaultLayoutEnsure(root, false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// All standard dirs should be created.
+	wantCreated := []string{
+		"data", "knowledge",
+		filepath.Join("data", "raw"),
+		filepath.Join("data", "processed"),
+		filepath.Join("data", "exports"),
+		filepath.Join("knowledge", "decisions"),
+		filepath.Join("knowledge", "domains"),
+		filepath.Join("knowledge", "models"),
+		filepath.Join("knowledge", "benchmarks"),
+		filepath.Join("knowledge", "test_documents"),
+	}
+	createdSet := toSet(report.Created)
+	for _, w := range wantCreated {
+		if _, ok := createdSet[w]; !ok {
+			t.Errorf("expected Created to contain %q, got %v", w, report.Created)
+		}
+	}
+
+	// All directories must actually exist on disk.
+	for _, w := range wantCreated {
+		full := filepath.Join(root, w)
+		info, err := os.Stat(full)
+		if err != nil {
+			t.Errorf("expected %q to exist: %v", full, err)
+			continue
+		}
+		if !info.IsDir() {
+			t.Errorf("%q should be a directory", full)
+		}
+	}
+}
+
+func TestVaultLayoutEnsure_LegacyDataLayout_Migrates(t *testing.T) {
+	root := mkVaultDir(t, []string{
+		"raw/",
+		"raw/file1.parquet",
+		"raw/file2.parquet",
+		"processed/",
+		"processed/clean.csv",
+		"exports/",
+	})
+
+	report, err := VaultLayoutEnsure(root, false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// raw and processed should appear in Migrated (as dirs, top-level rename).
+	migratedSet := toSet(report.Migrated)
+	for _, pair := range []string{
+		"raw -> " + filepath.Join("data", "raw"),
+		"processed -> " + filepath.Join("data", "processed"),
+	} {
+		if _, ok := migratedSet[pair]; !ok {
+			t.Errorf("expected Migrated to contain %q, got %v", pair, report.Migrated)
+		}
+	}
+
+	// Files must have moved.
+	for _, f := range []string{
+		filepath.Join("data", "raw", "file1.parquet"),
+		filepath.Join("data", "raw", "file2.parquet"),
+		filepath.Join("data", "processed", "clean.csv"),
+	} {
+		if _, err := os.Stat(filepath.Join(root, f)); err != nil {
+			t.Errorf("expected %q to exist after migration: %v", f, err)
+		}
+	}
+	// Old dirs must be gone.
+	for _, d := range []string{"raw", "processed"} {
+		if pathExists(filepath.Join(root, d)) {
+			t.Errorf("expected legacy dir %q to be removed", d)
+		}
+	}
+}
+
+func TestVaultLayoutEnsure_LegacyKnowledgeLayout_Migrates(t *testing.T) {
+	root := mkVaultDir(t, []string{
+		"decisions/",
+		"decisions/2024-01.md",
+		"models/",
+		"models/ner_v1.pkl",
+		"README.md",
+	})
+
+	report, err := VaultLayoutEnsure(root, false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// decisions and models should appear in Migrated.
+	migratedSet := toSet(report.Migrated)
+	for _, pair := range []string{
+		"decisions -> " + filepath.Join("knowledge", "decisions"),
+		"models -> " + filepath.Join("knowledge", "models"),
+		"README.md -> " + filepath.Join("knowledge", "README.md"),
+	} {
+		if _, ok := migratedSet[pair]; !ok {
+			t.Errorf("expected Migrated to contain %q, got %v", pair, report.Migrated)
+		}
+	}
+
+	// Files must be at new location.
+	for _, f := range []string{
+		filepath.Join("knowledge", "decisions", "2024-01.md"),
+		filepath.Join("knowledge", "models", "ner_v1.pkl"),
+		filepath.Join("knowledge", "README.md"),
+	} {
+		if _, err := os.Stat(filepath.Join(root, f)); err != nil {
+			t.Errorf("expected %q to exist after migration: %v", f, err)
+		}
+	}
+}
+
+func TestVaultLayoutEnsure_AlreadyMigrated_Idempotent(t *testing.T) {
+	root := mkVaultDir(t, []string{
+		"data/",
+		"data/raw/",
+		"data/raw/file.csv",
+		"data/processed/",
+		"data/exports/",
+		"knowledge/",
+		"knowledge/decisions/",
+		"knowledge/domains/",
+		"knowledge/models/",
+		"knowledge/benchmarks/",
+		"knowledge/test_documents/",
+	})
+
+	report1, err := VaultLayoutEnsure(root, false)
+	if err != nil {
+		t.Fatalf("first run error: %v", err)
+	}
+	if len(report1.Migrated) != 0 {
+		t.Errorf("first run on fully-migrated vault should have no migrations, got %v", report1.Migrated)
+	}
+
+	before := snapshotDir(t, root)
+	report2, err := VaultLayoutEnsure(root, false)
+	if err != nil {
+		t.Fatalf("second run error: %v", err)
+	}
+	after := snapshotDir(t, root)
+
+	if !mapEqual(before, after) {
+		t.Error("second run modified disk (not idempotent)")
+	}
+	if len(report2.Migrated) != 0 {
+		t.Errorf("second run should produce no migrations, got %v", report2.Migrated)
+	}
+	if len(report2.AlreadyOK) == 0 {
+		t.Error("second run should report existing dirs as AlreadyOK")
+	}
+}
+
+func TestVaultLayoutEnsure_Mixed_PartialMigration(t *testing.T) {
+	// data/raw already migrated; exports still at root; knowledge dirs in legacy positions.
+	root := mkVaultDir(t, []string{
+		"data/",
+		"data/raw/",
+		"data/raw/already_here.csv",
+		"exports/",
+		"exports/report.pdf",
+		"decisions/",
+		"decisions/2023-note.md",
+	})
+
+	report, err := VaultLayoutEnsure(root, false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// data/raw should be AlreadyOK.
+	if !sliceContains(report.AlreadyOK, filepath.Join("data", "raw")) {
+		t.Errorf("data/raw should be AlreadyOK, got AlreadyOK=%v", report.AlreadyOK)
+	}
+	// exports should be migrated.
+	exportsMigrated := false
+	for _, m := range report.Migrated {
+		if m == "exports -> "+filepath.Join("data", "exports") {
+			exportsMigrated = true
+		}
+	}
+	if !exportsMigrated {
+		t.Errorf("exports should be migrated, Migrated=%v", report.Migrated)
+	}
+	// decisions should be migrated.
+	decisionsMigrated := false
+	for _, m := range report.Migrated {
+		if m == "decisions -> "+filepath.Join("knowledge", "decisions") {
+			decisionsMigrated = true
+		}
+	}
+	if !decisionsMigrated {
+		t.Errorf("decisions should be migrated, Migrated=%v", report.Migrated)
+	}
+}
+
+func TestVaultLayoutEnsure_MergeConflict_Errors(t *testing.T) {
+	// Both src (raw/) and dst (data/raw/) exist and have a file with the same name.
+	root := mkVaultDir(t, []string{
+		"raw/",
+		"raw/collision.csv",
+		"data/",
+		"data/raw/",
+		"data/raw/collision.csv", // same name -> conflict
+	})
+
+	_, err := VaultLayoutEnsure(root, false)
+	if err == nil {
+		t.Fatal("expected error for merge conflict, got nil")
+	}
+	if !contains(err.Error(), "conflict") && !contains(err.Error(), "collision.csv") {
+		t.Errorf("error should mention conflict or the file name, got: %v", err)
+	}
+}
+
+func TestVaultLayoutEnsure_UnknownFiles_Skipped(t *testing.T) {
+	root := mkVaultDir(t, []string{
+		".git/",
+		"vault_index.db",
+		"my_custom_notes.txt",
+		"raw/",
+	})
+
+	report, err := VaultLayoutEnsure(root, false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	skippedSet := toSet(report.Skipped)
+	for _, name := range []string{".git", "vault_index.db", "my_custom_notes.txt"} {
+		if _, ok := skippedSet[name]; !ok {
+			t.Errorf("expected %q in Skipped, got %v", name, report.Skipped)
+		}
+	}
+	// raw should NOT be in Skipped (it's a known bucket).
+	if _, ok := skippedSet["raw"]; ok {
+		t.Error("raw should not appear in Skipped — it is a known bucket")
+	}
+}
+
+func TestVaultLayoutEnsure_NotADir_Errors(t *testing.T) {
+	t.Run("non-existent path", func(t *testing.T) {
+		_, err := VaultLayoutEnsure("/tmp/does_not_exist_fn_registry_test_xyz", false)
+		if err == nil {
+			t.Fatal("expected error for non-existent path")
+		}
+	})
+
+	t.Run("path is a file", func(t *testing.T) {
+		f, err := os.CreateTemp("", "vault_layout_*.txt")
+		if err != nil {
+			t.Fatal(err)
+		}
+		f.Close()
+		defer os.Remove(f.Name())
+
+		_, err = VaultLayoutEnsure(f.Name(), false)
+		if err == nil {
+			t.Fatal("expected error when vaultPath is a file, not a dir")
+		}
+		if !contains(err.Error(), "not a directory") {
+			t.Errorf("error should mention 'not a directory', got: %v", err)
+		}
+	})
+}
+
+// --- helpers ---
+
+// snapshotDir returns a map of relative path -> exists for all entries under root.
+func snapshotDir(t *testing.T, root string) map[string]bool {
+	t.Helper()
+	snap := make(map[string]bool)
+	err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		rel, _ := filepath.Rel(root, path)
+		snap[rel] = true
+		return nil
+	})
+	if err != nil {
+		t.Fatalf("snapshotDir: %v", err)
+	}
+	return snap
+}
+
+func mapEqual(a, b map[string]bool) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for k := range a {
+		if !b[k] {
+			return false
+		}
+	}
+	return true
+}
+
+func toSet(ss []string) map[string]struct{} {
+	m := make(map[string]struct{}, len(ss))
+	for _, s := range ss {
+		m[s] = struct{}{}
+	}
+	return m
+}
+
+func sliceContains(ss []string, target string) bool {
+	for _, s := range ss {
+		if s == target {
+			return true
+		}
+	}
+	return false
+}
+
+func contains(s, sub string) bool {
+	return len(s) >= len(sub) && (s == sub || len(sub) == 0 ||
+		func() bool {
+			for i := 0; i <= len(s)-len(sub); i++ {
+				if s[i:i+len(sub)] == sub {
+					return true
+				}
+			}
+			return false
+		}())
+}
@@ -0,0 +1,96 @@
+package infra
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"gopkg.in/yaml.v3"
+)
+
+// VaultManifestEntry is a single vault entry parsed from a projects/<proj>/vaults/vault.yaml.
+type VaultManifestEntry struct {
+	ProjectID    string   // basename of projects/<proj>/, inferred from manifest path
+	Name         string   // vault name as declared in vault.yaml
+	Description  string   // human description
+	Path         string   // absolute path to the vault directory
+	Tags         []string // tags declared in vault.yaml
+	ManifestFile string   // absolute path to the vault.yaml this entry came from
+}
+
+// vaultYAML mirrors the vault.yaml schema (only the fields we care about).
+type vaultYAML struct {
+	Vaults []struct {
+		Name        string   `yaml:"name"`
+		Description string   `yaml:"description"`
+		Path        string   `yaml:"path"`
+		Tags        []string `yaml:"tags"`
+	} `yaml:"vaults"`
+}
+
+// VaultManifestRead globs all projects/*/vaults/vault.yaml under repoRoot, parses each
+// manifest and returns a flat slice of VaultManifestEntry.
+//
+// Rules:
+//   - If a manifest fails to parse, an error is returned immediately with the file path.
+//   - If no manifests are found, an empty slice is returned (not an error).
+//   - ProjectID is inferred from the directory component between "projects/" and "/vaults/".
+func VaultManifestRead(repoRoot string) ([]VaultManifestEntry, error) {
+	pattern := filepath.Join(repoRoot, "projects", "*", "vaults", "vault.yaml")
+	matches, err := filepath.Glob(pattern)
+	if err != nil {
+		return nil, fmt.Errorf("vault_manifest_read: glob %q: %w", pattern, err)
+	}
+
+	var out []VaultManifestEntry
+	for _, manifestPath := range matches {
+		entries, err := parseVaultManifest(manifestPath)
+		if err != nil {
+			return nil, err
+		}
+		out = append(out, entries...)
+	}
+	return out, nil
+}
+
+func parseVaultManifest(manifestPath string) ([]VaultManifestEntry, error) {
+	data, err := os.ReadFile(manifestPath)
+	if err != nil {
+		return nil, fmt.Errorf("vault_manifest_read: read %q: %w", manifestPath, err)
+	}
+
+	var raw vaultYAML
+	if err := yaml.Unmarshal(data, &raw); err != nil {
+		return nil, fmt.Errorf("vault_manifest_read: parse %q: %w", manifestPath, err)
+	}
+
+	projectID := inferProjectID(manifestPath)
+
+	entries := make([]VaultManifestEntry, 0, len(raw.Vaults))
+	for _, v := range raw.Vaults {
+		entries = append(entries, VaultManifestEntry{
+			ProjectID:    projectID,
+			Name:         v.Name,
+			Description:  v.Description,
+			Path:         v.Path,
+			Tags:         v.Tags,
+			ManifestFile: manifestPath,
+		})
+	}
+	return entries, nil
+}
+
+// inferProjectID extracts the project basename from a path of the form
+// .../projects/<proj>/vaults/vault.yaml.
+func inferProjectID(manifestPath string) string {
+	// Normalize separators and split.
+	parts := strings.Split(filepath.ToSlash(manifestPath), "/")
+	// Walk backwards: vault.yaml -> vaults -> <proj> -> projects -> ...
+	for i, p := range parts {
+		if p == "projects" && i+1 < len(parts) {
+			return parts[i+1]
+		}
+	}
+	return ""
+}
@@ -0,0 +1,59 @@
+---
+name: vault_manifest_read
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func VaultManifestRead(repoRoot string) ([]VaultManifestEntry, error)"
+description: "Lee todos los manifests vault.yaml bajo projects/*/vaults/ del repo y devuelve una lista plana de entradas de vault con su ProjectID inferido del path."
+tags: [vault, manifest, yaml, infra, projects, storage]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports:
+  - "fmt"
+  - "os"
+  - "path/filepath"
+  - "strings"
+  - "gopkg.in/yaml.v3"
+params:
+  - name: repoRoot
+    desc: "Ruta absoluta a la raiz del repositorio fn_registry. Se usa como base para el glob projects/*/vaults/vault.yaml."
+output: "Slice plano de VaultManifestEntry (ProjectID, Name, Description, Path, Tags, ManifestFile). Vacio si no hay manifests. Error si un yaml no parsea, con el path concreto en el mensaje."
+tested: true
+tests:
+  - "TestVaultManifestRead_HappyPath"
+  - "TestVaultManifestRead_MalformedYAML"
+  - "TestVaultManifestRead_EmptyDir"
+test_file_path: "functions/infra/vault_manifest_read_test.go"
+file_path: "functions/infra/vault_manifest_read.go"
+---
+
+## Ejemplo
+
+```go
+entries, err := VaultManifestRead("/home/lucas/fn_registry")
+if err != nil {
+    log.Fatal(err)
+}
+for _, e := range entries {
+    fmt.Printf("%s/%s -> %s\n", e.ProjectID, e.Name, e.Path)
+}
+// app_turismo/turismo_spain -> /home/lucas/vaults/turismo_spain
+// app_finance/finance_data  -> /home/lucas/vaults/finance_data
+```
+
+## Notas
+
+`VaultManifestEntry` es un tipo local de esta funcion (no un tipo del registry). Contiene:
+- `ProjectID` — basename del directorio `projects/<proj>/`, inferido del path del manifest.
+- `Name`, `Description`, `Path`, `Tags` — copiados del yaml tal cual.
+- `ManifestFile` — path absoluto al vault.yaml de origen, util para mensajes de error y trazabilidad.
+
+El parseo usa `gopkg.in/yaml.v3` (ya en go.mod). Si un manifest falla, la funcion devuelve
+error inmediatamente con el path del fichero problemático. Los manifests sin entradas
+`vaults:` contribuyen cero entries (no es error). Si no existe ningun `projects/*/vaults/vault.yaml`
+el resultado es slice vacio sin error.
@@ -0,0 +1,113 @@
+package infra
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestVaultManifestRead_HappyPath(t *testing.T) {
+	root := t.TempDir()
+
+	writeManifest(t, root, "app_turismo", `
+vaults:
+  - name: turismo_spain
+    description: "Datos de turismo en Espana"
+    path: "/home/lucas/vaults/turismo_spain"
+    tags: [turismo, espana]
+  - name: turismo_raw
+    description: "Datos brutos sin procesar"
+    path: "/home/lucas/vaults/turismo_raw"
+    tags: [raw]
+`)
+
+	writeManifest(t, root, "app_finance", `
+vaults:
+  - name: finance_data
+    description: "Datos financieros"
+    path: "/home/lucas/vaults/finance_data"
+    tags: [finance]
+`)
+
+	entries, err := VaultManifestRead(root)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(entries) != 3 {
+		t.Fatalf("got %d entries, want 3", len(entries))
+	}
+
+	// Build index by name for order-independent assertions.
+	byName := make(map[string]VaultManifestEntry, len(entries))
+	for _, e := range entries {
+		byName[e.Name] = e
+	}
+
+	// Check turismo_spain entry.
+	e, ok := byName["turismo_spain"]
+	if !ok {
+		t.Fatal("missing entry 'turismo_spain'")
+	}
+	if e.ProjectID != "app_turismo" {
+		t.Errorf("turismo_spain.ProjectID = %q, want %q", e.ProjectID, "app_turismo")
+	}
+	if e.Path != "/home/lucas/vaults/turismo_spain" {
+		t.Errorf("turismo_spain.Path = %q, want %q", e.Path, "/home/lucas/vaults/turismo_spain")
+	}
+	if len(e.Tags) != 2 || e.Tags[0] != "turismo" {
+		t.Errorf("turismo_spain.Tags = %v, want [turismo espana]", e.Tags)
+	}
+	if e.ManifestFile == "" {
+		t.Error("turismo_spain.ManifestFile is empty")
+	}
+
+	// Check finance_data entry belongs to app_finance.
+	ef, ok := byName["finance_data"]
+	if !ok {
+		t.Fatal("missing entry 'finance_data'")
+	}
+	if ef.ProjectID != "app_finance" {
+		t.Errorf("finance_data.ProjectID = %q, want %q", ef.ProjectID, "app_finance")
+	}
+}
+
+func TestVaultManifestRead_MalformedYAML(t *testing.T) {
+	root := t.TempDir()
+
+	writeManifest(t, root, "bad_project", `
+vaults:
+  - name: [invalid yaml
+    path: missing_bracket
+`)
+
+	_, err := VaultManifestRead(root)
+	if err == nil {
+		t.Fatal("expected error for malformed YAML, got nil")
+	}
+}
+
+func TestVaultManifestRead_EmptyDir(t *testing.T) {
+	root := t.TempDir()
+
+	// No projects/ directory at all — glob returns no matches.
+	entries, err := VaultManifestRead(root)
+	if err != nil {
+		t.Fatalf("unexpected error for empty dir: %v", err)
+	}
+	if len(entries) != 0 {
+		t.Fatalf("got %d entries, want 0", len(entries))
+	}
+}
+
+// writeManifest creates <root>/projects/<proj>/vaults/vault.yaml with the given content.
+func writeManifest(t *testing.T, root, proj, content string) {
+	t.Helper()
+	dir := filepath.Join(root, "projects", proj, "vaults")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		t.Fatalf("mkdir %s: %v", dir, err)
+	}
+	f := filepath.Join(dir, "vault.yaml")
+	if err := os.WriteFile(f, []byte(content), 0o644); err != nil {
+		t.Fatalf("write %s: %v", f, err)
+	}
+}
@@ -0,0 +1,265 @@
+package infra
+
+import (
+	"database/sql"
+	"fmt"
+	"path/filepath"
+	"strings"
+)
+
+// VaultSearchHit is a single result returned by VaultSearch.
+type VaultSearchHit struct {
+	VaultPath string `json:"vault_path"`
+	VaultName string `json:"vault_name"` // basename of VaultPath (after resolving symlinks)
+	RelPath   string `json:"rel_path"`
+	Size      int64  `json:"size"`
+	Mtime     int64  `json:"mtime"`
+	Mime      string `json:"mime"`
+	Bucket    string `json:"bucket"`
+	SubBucket string `json:"sub_bucket"`
+	Snippet   string `json:"snippet"` // FTS5 snippet or empty if match is only by rel_path (fallback)
+}
+
+// VaultSearch searches vault_index.db inside vaultPath for files matching query.
+//
+// Behaviour:
+//  1. Opens vault_index.db via VaultIndexOpen.
+//  2. If limit <= 0, defaults to 50.
+//  3. Runs a FTS5 MATCH query over files_fts to find content matches (when content_text
+//     is populated by profilers). Because the FTS5 table uses content='' (contentless),
+//     column values are not stored; results are correlated back to files via a LIKE
+//     match on rel_path for path tokens, or via an IN clause of matched rowids for
+//     content_text matches.
+//  4. Also searches files.rel_path with LIKE to find path matches.
+//  5. Results from both searches are merged (deduplication by rel_path).
+//  6. If both FTS5 and LIKE queries fail, returns the error.
+//  7. VaultName is derived from the basename of vaultPath (after resolving symlinks).
+func VaultSearch(vaultPath, query string, limit int) ([]VaultSearchHit, error) {
+	if limit <= 0 {
+		limit = 50
+	}
+
+	db, err := VaultIndexOpen(vaultPath)
+	if err != nil {
+		return nil, fmt.Errorf("vault_search: open index: %w", err)
+	}
+	defer db.Close()
+
+	vaultName := resolveVaultName(vaultPath)
+
+	hits, err := vaultSearchCombined(db, vaultPath, vaultName, query, limit)
+	if err != nil {
+		return nil, fmt.Errorf("vault_search: %w", err)
+	}
+	return hits, nil
+}
+
+// vaultSearchCombined runs the search using two strategies and merges deduplicated results:
+//  1. FTS5 MATCH on files_fts (for content_text when populated by profilers).
+//     Correlation back to files uses rowid (reliable for fresh indexes) or falls back.
+//  2. LIKE on files.rel_path (always reliable for path searching).
+//
+// Results are deduplicated by rel_path, up to limit entries.
+func vaultSearchCombined(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
+	seen := make(map[string]struct{})
+	var hits []VaultSearchHit
+
+	// Strategy 1: FTS5 MATCH on content_text (populated by profilers).
+	// With contentless FTS5 (content=''), column values are NOT retrievable via SELECT.
+	// We get matching rowids from FTS5, then look up files by rowid.
+	// This is reliable for content_text matches because VaultIndexWrite inserts
+	// content_text rows independently of the path rows (profilers update them).
+	// NOTE: for rel_path token matching, strategy 2 (LIKE) is more reliable.
+	ftsQuery := safeFTSQuery(query)
+	ftsHits, ftsErr := vaultSearchFTSContent(db, vaultPath, vaultName, ftsQuery, limit)
+	if ftsErr == nil {
+		for _, h := range ftsHits {
+			if len(hits) >= limit {
+				break
+			}
+			if _, ok := seen[h.RelPath]; !ok {
+				seen[h.RelPath] = struct{}{}
+				hits = append(hits, h)
+			}
+		}
+	}
+	// If FTS5 failed with a syntax error, that's expected for bad queries — continue.
+	// If it failed with a non-syntax error, still continue to LIKE fallback.
+
+	// Strategy 2: LIKE on rel_path — reliable path search.
+	// When query contains FTS5 special chars (e.g. "foo:bar:"), extract the first
+	// word-like token so the LIKE pattern is still useful.
+	likeQuery := simplifyForLike(query)
+	if len(hits) < limit && likeQuery != "" {
+		remaining := limit - len(hits)
+		likeHits, likeErr := vaultSearchLike(db, vaultPath, vaultName, likeQuery, remaining+len(seen))
+		if likeErr != nil && ftsErr != nil {
+			// Both failed — return a combined error.
+			return nil, fmt.Errorf("fts: %v; like: %v", ftsErr, likeErr)
+		}
+		for _, h := range likeHits {
+			if len(hits) >= limit {
+				break
+			}
+			if _, ok := seen[h.RelPath]; !ok {
+				seen[h.RelPath] = struct{}{}
+				hits = append(hits, h)
+			}
+		}
+	}
+
+	if hits == nil {
+		hits = []VaultSearchHit{}
+	}
+	return hits, nil
+}
+
+// vaultSearchFTSContent queries files_fts with a MATCH and correlates results
+// back to the files table.
+//
+// Design note: with content='' (contentless FTS5), SELECT on columns returns ''.
+// We get the rowid from the FTS5 match and look up files.rel_path via rowid.
+// This works correctly when content_text was populated by a profiler that did NOT
+// delete+reinsert the FTS row (i.e. profilers do direct INSERT/UPDATE of content_text
+// without changing the rowid). For the current VaultIndexWrite implementation
+// (which inserts content_text='' and profilers update it in-place), the rowids
+// remain stable after profiling.
+func vaultSearchFTSContent(db *sql.DB, vaultPath, vaultName, safeQuery string, limit int) ([]VaultSearchHit, error) {
+	// Get matching rowids from FTS5.
+	const qRowids = `
+		SELECT rowid
+		FROM files_fts
+		WHERE files_fts MATCH ?
+		ORDER BY rank
+		LIMIT ?`
+
+	rows, err := db.Query(qRowids, safeQuery, limit)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var rowids []int64
+	for rows.Next() {
+		var rid int64
+		if err := rows.Scan(&rid); err != nil {
+			return nil, err
+		}
+		rowids = append(rowids, rid)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	if len(rowids) == 0 {
+		return nil, nil
+	}
+
+	// Look up files by rowid. files uses a TEXT PK so its rowid is implicit.
+	// Snippet is empty for contentless FTS5 (snippet() returns NULL there).
+	var hits []VaultSearchHit
+	for _, rid := range rowids {
+		var h VaultSearchHit
+		err := db.QueryRow(`
+			SELECT rel_path, size, mtime, mime, bucket, sub_bucket
+			FROM files WHERE rowid = ?`, rid,
+		).Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket)
+		if err != nil {
+			// rowid mismatch (happens after update cycles) — skip gracefully.
+			continue
+		}
+		h.VaultPath = vaultPath
+		h.VaultName = vaultName
+		h.Snippet = ""
+		hits = append(hits, h)
+	}
+	return hits, nil
+}
+
+// vaultSearchLike searches files.rel_path with LIKE, ordered by mtime DESC.
+func vaultSearchLike(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
+	const qLike = `
+		SELECT rel_path, size, mtime, mime, bucket, sub_bucket
+		FROM files
+		WHERE rel_path LIKE '%' || ? || '%'
+		ORDER BY mtime DESC
+		LIMIT ?`
+
+	rows, err := db.Query(qLike, query, limit)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var hits []VaultSearchHit
+	for rows.Next() {
+		var h VaultSearchHit
+		if err := rows.Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket); err != nil {
+			return nil, err
+		}
+		h.VaultPath = vaultPath
+		h.VaultName = vaultName
+		h.Snippet = ""
+		hits = append(hits, h)
+	}
+	return hits, rows.Err()
+}
+
+// resolveVaultName returns the basename of vaultPath after resolving symlinks.
+// Falls back to filepath.Base if EvalSymlinks fails.
+func resolveVaultName(vaultPath string) string {
+	resolved, err := filepath.EvalSymlinks(vaultPath)
+	if err != nil {
+		resolved = vaultPath
+	}
+	return filepath.Base(resolved)
+}
+
+// safeFTSQuery wraps the query in double-quotes if it does not already contain
+// FTS5 boolean operators (AND, OR, NOT) or column prefixes (containing ":").
+// This prevents FTS5 syntax errors on tokens like "foo:bar:" or "hello-world".
+func safeFTSQuery(query string) string {
+	q := strings.TrimSpace(query)
+	if q == "" {
+		return q
+	}
+	upper := strings.ToUpper(q)
+	// If user already uses explicit operators or column prefix, pass through.
+	if strings.ContainsAny(q, ":") ||
+		strings.Contains(upper, " AND ") ||
+		strings.Contains(upper, " OR ") ||
+		strings.Contains(upper, " NOT ") {
+		return q
+	}
+	// Escape any double-quotes in the query before wrapping.
+	escaped := strings.ReplaceAll(q, `"`, `""`)
+	return `"` + escaped + `"`
+}
+
+// isFTSSyntaxError returns true when the error looks like an FTS5 query parser error.
+func isFTSSyntaxError(err error) bool {
+	if err == nil {
+		return false
+	}
+	msg := strings.ToLower(err.Error())
+	return strings.Contains(msg, "syntax error") ||
+		strings.Contains(msg, "no such column") ||
+		strings.Contains(msg, "fts5: syntax error")
+}
+
+// simplifyForLike extracts a clean substring from query suitable for LIKE matching.
+// When the query contains FTS5 special characters (colons, double-quotes, operators),
+// only the first word-like sequence of alphanumeric/underscore/hyphen characters is
+// used. This ensures the LIKE fallback remains useful even when the FTS5 query is
+// syntactically complex or contains column-prefix syntax like "foo:bar:".
+func simplifyForLike(query string) string {
+	q := strings.TrimSpace(query)
+	var token strings.Builder
+	for _, r := range q {
+		if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
+			token.WriteRune(r)
+		} else if token.Len() > 0 {
+			break
+		}
+	}
+	return token.String()
+}
@@ -0,0 +1,61 @@
+---
+name: vault_search
+kind: function
+lang: go
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "func VaultSearch(vaultPath, query string, limit int) ([]VaultSearchHit, error)"
+description: "Busca en vault_index.db de un vault usando FTS5 sobre files_fts. Si el query rompe el parser FTS5, hace fallback a LIKE sobre rel_path. Retorna hits con snippet de contexto."
+tags: [vault, search, fts5, sqlite, infra]
+uses_functions: ["vault_index_open_go_infra"]
+uses_types: ["vault_file_go_infra"]
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [database/sql, fmt, path/filepath, strings]
+params:
+  - name: vaultPath
+    desc: "ruta absoluta al directorio raiz del vault (puede ser symlink)"
+  - name: query
+    desc: "termino o frase de busqueda; se escapa automaticamente para FTS5 salvo que ya incluya operadores booleanos o prefijos de columna"
+  - name: limit
+    desc: "maximo de resultados; si es <= 0 se usa 50"
+output: "slice de VaultSearchHit ordenado por rank FTS5 (o mtime DESC en fallback LIKE); slice vacio si no hay resultados"
+tested: true
+tests:
+  - "FTS match devuelve hit con snippet"
+  - "query sin resultados retorna slice vacio"
+  - "limit se respeta"
+  - "query FTS invalida activa fallback LIKE"
+  - "limit cero usa 50 por defecto"
+test_file_path: "functions/infra/vault_search_test.go"
+file_path: "functions/infra/vault_search.go"
+---
+
+## Ejemplo
+
+```go
+hits, err := infra.VaultSearch("/home/lucas/vaults/turismo_spain", "hoteles", 20)
+if err != nil {
+    log.Fatal(err)
+}
+for _, h := range hits {
+    fmt.Printf("[%s] %s  %s\n", h.VaultName, h.RelPath, h.Snippet)
+}
+```
+
+## Notas
+
+`VaultSearchHit` es un struct local definido en este archivo (no en `vault_file.go`)
+porque combina campos de `files` + metadatos de contexto de busqueda (Snippet, VaultPath, VaultName).
+
+**FTS5 safety:** el helper `safeFTSQuery` envuelve la query en comillas dobles
+cuando no contiene operadores booleanos ni prefijos de columna. Esto evita errores
+del parser en tokens como `foo:bar:` o `hello-world`.
+
+**Fallback LIKE:** si el MATCH falla con un error de sintaxis FTS5, se ejecuta
+`WHERE rel_path LIKE '%' || query || '%'`. Los hits del fallback tienen `Snippet=""`.
+
+**VaultName:** se deriva del `filepath.Base(filepath.EvalSymlinks(vaultPath))`.
+Si `EvalSymlinks` falla (e.g. symlink roto), usa `filepath.Base(vaultPath)`.
@@ -0,0 +1,147 @@
+package infra
+
+import (
+	"testing"
+	"time"
+)
+
+// openTestVaultDB creates a fresh vault_index.db in a temp dir and returns the path.
+func openTestVaultDir(t *testing.T) string {
+	t.Helper()
+	dir := t.TempDir()
+	db, err := VaultIndexOpen(dir)
+	if err != nil {
+		t.Fatalf("VaultIndexOpen: %v", err)
+	}
+	db.Close()
+	return dir
+}
+
+// seedVaultFile inserts a row into files + files_fts.
+func seedVaultFile(t *testing.T, dir, relPath, mime, bucket, subBucket, contentText string, size int64) {
+	t.Helper()
+	db, err := VaultIndexOpen(dir)
+	if err != nil {
+		t.Fatalf("VaultIndexOpen seed: %v", err)
+	}
+	defer db.Close()
+
+	now := time.Now().Unix()
+	_, err = db.Exec(`
+		INSERT INTO files (rel_path, size, mtime, sha256, mime, ext, bucket, sub_bucket, indexed_at)
+		VALUES (?, ?, ?, 'aabbccdd', ?, '', ?, ?, ?)`,
+		relPath, size, now, mime, bucket, subBucket, now,
+	)
+	if err != nil {
+		t.Fatalf("seed files: %v", err)
+	}
+	_, err = db.Exec(`INSERT INTO files_fts(rel_path, content_text) VALUES (?, ?)`, relPath, contentText)
+	if err != nil {
+		t.Fatalf("seed files_fts: %v", err)
+	}
+}
+
+// --- Tests ---
+
+func TestVaultSearch_FTSMatch(t *testing.T) {
+	t.Run("FTS match devuelve hit con snippet", func(t *testing.T) {
+		dir := openTestVaultDir(t)
+		seedVaultFile(t, dir, "data/raw/informe.csv", "text/csv", "data", "raw",
+			"ventas trimestrales empresa iberica", 1024)
+		seedVaultFile(t, dir, "data/raw/other.csv", "text/csv", "data", "raw",
+			"productos inventario almacen", 512)
+
+		hits, err := VaultSearch(dir, "ventas", 10)
+		if err != nil {
+			t.Fatalf("VaultSearch: %v", err)
+		}
+		if len(hits) != 1 {
+			t.Fatalf("got %d hits, want 1", len(hits))
+		}
+		if hits[0].RelPath != "data/raw/informe.csv" {
+			t.Errorf("RelPath = %q, want data/raw/informe.csv", hits[0].RelPath)
+		}
+		if hits[0].VaultName == "" {
+			t.Errorf("VaultName should not be empty")
+		}
+	})
+}
+
+func TestVaultSearch_NoMatch(t *testing.T) {
+	t.Run("query sin resultados retorna slice vacio", func(t *testing.T) {
+		dir := openTestVaultDir(t)
+		seedVaultFile(t, dir, "data/raw/file.csv", "text/csv", "data", "raw", "some content", 100)
+
+		hits, err := VaultSearch(dir, "zzznomatch", 10)
+		if err != nil {
+			t.Fatalf("VaultSearch: %v", err)
+		}
+		if len(hits) != 0 {
+			t.Errorf("got %d hits, want 0", len(hits))
+		}
+	})
+}
+
+func TestVaultSearch_LimitRespected(t *testing.T) {
+	t.Run("limit se respeta", func(t *testing.T) {
+		dir := openTestVaultDir(t)
+		for i := 0; i < 10; i++ {
+			path := "data/raw/file" + string(rune('a'+i)) + ".csv"
+			seedVaultFile(t, dir, path, "text/csv", "data", "raw", "common keyword everywhere", 100)
+		}
+
+		hits, err := VaultSearch(dir, "common", 3)
+		if err != nil {
+			t.Fatalf("VaultSearch: %v", err)
+		}
+		if len(hits) != 3 {
+			t.Errorf("got %d hits, want 3", len(hits))
+		}
+	})
+}
+
+func TestVaultSearch_BadFTSQuery_FallbackLike(t *testing.T) {
+	t.Run("query FTS invalida activa fallback LIKE", func(t *testing.T) {
+		dir := openTestVaultDir(t)
+		// Insert a file whose rel_path contains "foobar" so LIKE can find it.
+		seedVaultFile(t, dir, "data/raw/foobar_report.csv", "text/csv", "data", "raw", "", 200)
+
+		// "foo:bar:" — colon after a non-column name triggers FTS5 parser error.
+		// safeFTSQuery passes it through unchanged because it contains ":"
+		// → FTS5 "no such column: bar" → fallback LIKE on rel_path.
+		hits, err := VaultSearch(dir, "foo:bar:", 10)
+		if err != nil {
+			t.Fatalf("VaultSearch: %v", err)
+		}
+		if len(hits) == 0 {
+			t.Errorf("expected fallback LIKE to find foobar_report.csv, got 0 hits")
+		}
+		for _, h := range hits {
+			if h.Snippet != "" {
+				t.Errorf("fallback hits should have empty Snippet, got %q", h.Snippet)
+			}
+		}
+	})
+}
+
+func TestVaultSearch_LimitZeroDefaults(t *testing.T) {
+	t.Run("limit cero usa 50 por defecto", func(t *testing.T) {
+		dir := openTestVaultDir(t)
+		// Insert 55 files with the same keyword.
+		for i := 0; i < 55; i++ {
+			path := "data/raw/doc" + string(rune('a')) + string(rune(int('0')+i%10)) + ".csv"
+			if i >= 10 {
+				path = "data/raw/doc" + string(rune('b'+i/10-1)) + string(rune(int('0')+i%10)) + ".csv"
+			}
+			seedVaultFile(t, dir, path, "text/csv", "data", "raw", "keyword alpha beta", 100)
+		}
+
+		hits, err := VaultSearch(dir, "keyword", 0)
+		if err != nil {
+			t.Fatalf("VaultSearch: %v", err)
+		}
+		if len(hits) != 50 {
+			t.Errorf("got %d hits, want 50 (default limit)", len(hits))
+		}
+	})
+}
@@ -0,0 +1,20 @@
+package ml
+
+import "encoding/json"
+
+// GenconfigMarshal serializa un GenerationConfig a JSON canonico con indent de 2 espacios.
+// El formato es identico al de Python json.dumps(indent=2, sort_keys=False):
+// keys en el orden de declaracion del struct, snake_case, campos omitempty ausentes si zero.
+func GenconfigMarshal(cfg GenerationConfig) ([]byte, error) {
+	return json.MarshalIndent(cfg, "", "  ")
+}
+
+// GenconfigUnmarshal deserializa JSON (compacto o con indent) a GenerationConfig.
+// Los campos JSON deben usar snake_case: negative_prompt, cfg_scale, model_type, etc.
+func GenconfigUnmarshal(data []byte) (GenerationConfig, error) {
+	var cfg GenerationConfig
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		return GenerationConfig{}, err
+	}
+	return cfg, nil
+}
@@ -0,0 +1,84 @@
+---
+name: genconfig_json_marshal
+kind: function
+lang: go
+domain: ml
+version: "1.0.0"
+purity: impure
+signature: "func GenconfigMarshal(cfg GenerationConfig) ([]byte, error)\nfunc GenconfigUnmarshal(data []byte) (GenerationConfig, error)"
+description: "Wrappers json.Marshal/Unmarshal para GenerationConfig con formato canonico (MarshalIndent 2 espacios). Garantiza roundtrip identico al Python: json.dumps(indent=2, sort_keys=False). Campos JSON en snake_case."
+tags: [ml, json, marshal, unmarshal, serialization, generation, canonical]
+uses_functions: []
+uses_types: [generation_config_go_ml]
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: ["encoding/json"]
+params:
+  - name: cfg
+    desc: "GenerationConfig a serializar. Campos omitempty (negative_prompt, loras, clip_skip) se omiten si son zero/nil/empty."
+  - name: data
+    desc: "JSON bytes a deserializar. Acepta formato compacto o con indent. Keys deben ser snake_case (negative_prompt, cfg_scale, model_type, etc.)."
+output: "GenconfigMarshal: bytes JSON con indent 2 espacios, orden de campos segun declaracion del struct (prompt, negative_prompt, seed, steps, cfg_scale, sampler, width, height, model, loras, clip_skip). GenconfigUnmarshal: GenerationConfig poblado o error de parsing."
+tested: true
+tests:
+  - "roundtrip marshal unmarshal produce config igual"
+  - "json cross-language snake_case keys se deserializan correctamente"
+test_file_path: "functions/ml/genconfig_test.go"
+file_path: "functions/ml/genconfig_json_marshal.go"
+---
+
+## Ejemplo
+
+```go
+cfg := ml.GenerationConfig{
+    Prompt:   "a mountain at sunset",
+    Seed:     1234,
+    Steps:    30,
+    CfgScale: 7.0,
+    Sampler:  "euler",
+    Width:    768,
+    Height:   512,
+    Model:    ml.ModelRef{Name: "sdxl-base", ModelType: "sdxl", Quantization: "fp16"},
+}
+
+b, err := ml.GenconfigMarshal(cfg)
+// b == {
+//   "prompt": "a mountain at sunset",
+//   "seed": 1234,
+//   ...
+// }
+
+cfg2, err := ml.GenconfigUnmarshal(b)
+// cfg2 == cfg  (DeepEqual)
+```
+
+## Notas
+
+### Formato canonico y compatibilidad con Python
+
+`GenconfigMarshal` usa `json.MarshalIndent(cfg, "", "  ")`. El formato resultante es identico al que produce Python con `model.model_dump_json()` o `json.dumps(data, indent=2)` cuando `sort_keys=False`:
+
+- Keys en orden de declaracion del struct (no alfabetico).
+- Indent de 2 espacios, sin trailing whitespace.
+- Campos omitempty ausentes si zero: `negative_prompt` ausente si `""`, `loras` ausente si `[]`, `clip_skip` ausente si `nil`.
+
+### Keys JSON (snake_case obligatorio)
+
+| Campo Go | Key JSON |
+|---|---|
+| `Prompt` | `"prompt"` |
+| `NegativePrompt` | `"negative_prompt"` |
+| `Seed` | `"seed"` |
+| `Steps` | `"steps"` |
+| `CfgScale` | `"cfg_scale"` |
+| `Sampler` | `"sampler"` |
+| `Width` | `"width"` |
+| `Height` | `"height"` |
+| `Model.ModelType` | `"model_type"` |
+| `Model.Quantization` | `"quantization"` |
+| `ClipSkip` | `"clip_skip"` |
+
+### Por que impure
+
+Los errores de `json.Unmarshal` son errores de parsing del input externo, no de I/O, pero se modelan como `(T, error)` para forzar manejo explicito en el caller. Marcado `impure` con `error_type: error_go_core` por convencion del registry.
@@ -0,0 +1,260 @@
+package ml
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+)
+
+// ---------------------------------------------------------------------------
+// TestGenconfigToSdcliArgs
+// ---------------------------------------------------------------------------
+
+func TestGenconfigToSdcliArgs(t *testing.T) {
+	clipSkip := 2
+
+	t.Run("config basico sin loras ni clip_skip", func(t *testing.T) {
+		cfg := GenerationConfig{
+			Prompt:   "a cat",
+			Seed:     42,
+			Steps:    20,
+			CfgScale: 7.5,
+			Sampler:  "euler",
+			Width:    512,
+			Height:   512,
+			Model:    ModelRef{Name: "v1-5", ModelType: "sd15", Quantization: "fp16"},
+		}
+		args := GenconfigToSdcliArgs(cfg)
+
+		want := []string{
+			"--prompt", "a cat",
+			"--seed", "42",
+			"--steps", "20",
+			"--cfg-scale", "7.5",
+			"--width", "512",
+			"--height", "512",
+			"--sampling-method", "euler",
+		}
+		if !reflect.DeepEqual(args, want) {
+			t.Errorf("got  %v\nwant %v", args, want)
+		}
+	})
+
+	t.Run("loras se emiten como pares path:weight", func(t *testing.T) {
+		cfg := GenerationConfig{
+			Prompt:   "portrait",
+			Seed:     1,
+			Steps:    10,
+			CfgScale: 7.0,
+			Sampler:  "euler",
+			Width:    512,
+			Height:   512,
+			Model:    ModelRef{Name: "v1-5", ModelType: "sd15", Quantization: "fp16", Path: "/models/v1.safetensors"},
+			Loras: []LoraRef{
+				{Path: "/loras/detail.safetensors", Weight: 0.8},
+				{Path: "/loras/style.safetensors", Weight: 0.5},
+			},
+			ClipSkip: &clipSkip,
+		}
+		args := GenconfigToSdcliArgs(cfg)
+
+		// Verificar que existen los pares --lora para ambas loras
+		loraIdx := indexAll(args, "--lora")
+		if len(loraIdx) != 2 {
+			t.Fatalf("esperaba 2 flags --lora, got %d en %v", len(loraIdx), args)
+		}
+		wantLoras := []string{
+			"/loras/detail.safetensors:0.8",
+			"/loras/style.safetensors:0.5",
+		}
+		for i, idx := range loraIdx {
+			if idx+1 >= len(args) {
+				t.Fatalf("--lora[%d] sin valor siguiente", i)
+			}
+			if args[idx+1] != wantLoras[i] {
+				t.Errorf("lora[%d]: got %q, want %q", i, args[idx+1], wantLoras[i])
+			}
+		}
+
+		// Verificar --model y --clip-skip presentes
+		if !containsPair(args, "--model", "/models/v1.safetensors") {
+			t.Errorf("--model no encontrado en %v", args)
+		}
+		if !containsPair(args, "--clip-skip", "2") {
+			t.Errorf("--clip-skip no encontrado en %v", args)
+		}
+	})
+
+	t.Run("sampler dpm++2m se traduce a dpmpp2m", func(t *testing.T) {
+		cfg := GenerationConfig{
+			Prompt:   "x",
+			Seed:     0,
+			Steps:    1,
+			CfgScale: 1.0,
+			Sampler:  "dpm++2m",
+			Width:    64,
+			Height:   64,
+			Model:    ModelRef{Name: "m", ModelType: "sd15", Quantization: "fp16"},
+		}
+		args := GenconfigToSdcliArgs(cfg)
+		if !containsPair(args, "--sampling-method", "dpmpp2m") {
+			t.Errorf("sampler no traducido; args=%v", args)
+		}
+	})
+
+	t.Run("negative_prompt vacio no genera flag", func(t *testing.T) {
+		cfg := GenerationConfig{
+			Prompt:         "x",
+			NegativePrompt: "",
+			Seed:           0,
+			Steps:          1,
+			CfgScale:       1.0,
+			Sampler:        "euler",
+			Width:          64,
+			Height:         64,
+			Model:          ModelRef{Name: "m", ModelType: "sd15", Quantization: "fp16"},
+		}
+		args := GenconfigToSdcliArgs(cfg)
+		for _, a := range args {
+			if a == "--negative-prompt" {
+				t.Errorf("flag --negative-prompt presente aunque NegativePrompt es vacio")
+			}
+		}
+	})
+}
+
+// ---------------------------------------------------------------------------
+// TestGenconfigMarshalRoundtrip
+// ---------------------------------------------------------------------------
+
+func TestGenconfigMarshalRoundtrip(t *testing.T) {
+	t.Run("roundtrip marshal unmarshal produce config igual", func(t *testing.T) {
+		clip := 2
+		cfg := GenerationConfig{
+			Prompt:         "sunset over the mountains",
+			NegativePrompt: "blurry, low quality",
+			Seed:           99,
+			Steps:          30,
+			CfgScale:       7.5,
+			Sampler:        "dpm++2m",
+			Width:          768,
+			Height:         512,
+			Model: ModelRef{
+				Name:         "sdxl-base",
+				ModelType:    "sdxl",
+				Quantization: "fp16",
+				Path:         "/models/sdxl.safetensors",
+			},
+			Loras: []LoraRef{
+				{Path: "/loras/detail.safetensors", Weight: 0.8},
+			},
+			ClipSkip: &clip,
+		}
+
+		b, err := GenconfigMarshal(cfg)
+		if err != nil {
+			t.Fatalf("GenconfigMarshal: %v", err)
+		}
+
+		got, err := GenconfigUnmarshal(b)
+		if err != nil {
+			t.Fatalf("GenconfigUnmarshal: %v", err)
+		}
+
+		if !reflect.DeepEqual(cfg, got) {
+			t.Errorf("roundtrip diverge\norig: %+v\ngot:  %+v", cfg, got)
+		}
+	})
+}
+
+// ---------------------------------------------------------------------------
+// TestGenconfigCrossLanguageJSON
+// ---------------------------------------------------------------------------
+
+func TestGenconfigCrossLanguageJSON(t *testing.T) {
+	// Fixture escrito a mano replicando lo que generaria Python:
+	//   json.dumps(config.model_dump(), indent=2)
+	// Keys en snake_case, orden de declaracion del dataclass Python.
+	fixture := `{
+  "prompt": "a dragon",
+  "negative_prompt": "ugly",
+  "seed": 1234,
+  "steps": 25,
+  "cfg_scale": 7.0,
+  "sampler": "euler_a",
+  "width": 512,
+  "height": 512,
+  "model": {
+    "name": "v1-5",
+    "model_type": "sd15",
+    "quantization": "fp16"
+  },
+  "loras": [
+    {
+      "path": "/loras/dragon.safetensors",
+      "weight": 0.9
+    }
+  ]
+}`
+
+	t.Run("json cross-language snake_case keys se deserializan correctamente", func(t *testing.T) {
+		cfg, err := GenconfigUnmarshal([]byte(fixture))
+		if err != nil {
+			t.Fatalf("GenconfigUnmarshal fixture: %v", err)
+		}
+
+		// Verificar campos clave
+		if cfg.Prompt != "a dragon" {
+			t.Errorf("Prompt: got %q", cfg.Prompt)
+		}
+		if cfg.NegativePrompt != "ugly" {
+			t.Errorf("NegativePrompt: got %q", cfg.NegativePrompt)
+		}
+		if cfg.CfgScale != 7.0 {
+			t.Errorf("CfgScale: got %v", cfg.CfgScale)
+		}
+		if cfg.Model.ModelType != "sd15" {
+			t.Errorf("Model.ModelType: got %q", cfg.Model.ModelType)
+		}
+		if len(cfg.Loras) != 1 || cfg.Loras[0].Weight != 0.9 {
+			t.Errorf("Loras: got %+v", cfg.Loras)
+		}
+
+		// Re-marshal y verificar que las keys snake_case siguen presentes
+		b, err := GenconfigMarshal(cfg)
+		if err != nil {
+			t.Fatalf("GenconfigMarshal: %v", err)
+		}
+		s := string(b)
+		for _, key := range []string{"negative_prompt", "cfg_scale", "model_type", "quantization"} {
+			if !strings.Contains(s, `"`+key+`"`) {
+				t.Errorf("key %q ausente en JSON re-serializado:\n%s", key, s)
+			}
+		}
+	})
+}
+
+// ---------------------------------------------------------------------------
+// helpers
+// ---------------------------------------------------------------------------
+
+// indexAll retorna todos los indices de val en slice.
+func indexAll(slice []string, val string) []int {
+	var out []int
+	for i, s := range slice {
+		if s == val {
+			out = append(out, i)
+		}
+	}
+	return out
+}
+
+// containsPair verifica que flag seguido de value aparece en slice.
+func containsPair(slice []string, flag, value string) bool {
+	for i := 0; i+1 < len(slice); i++ {
+		if slice[i] == flag && slice[i+1] == value {
+			return true
+		}
+	}
+	return false
+}
@@ -0,0 +1,59 @@
+package ml
+
+import (
+	"fmt"
+	"strconv"
+)
+
+// samplerMap traduce nombres canonicos del dominio ml a flags de stable-diffusion.cpp.
+var samplerMap = map[string]string{
+	"euler":       "euler",
+	"euler_a":     "euler_a",
+	"dpm++2m":     "dpmpp2m",
+	"dpm++2m_v2":  "dpmpp2mv2",
+	"heun":        "heun",
+	"dpm2":        "dpm2",
+	"lcm":         "lcm",
+}
+
+// GenconfigToSdcliArgs convierte un GenerationConfig en una lista de argumentos
+// CLI para stable-diffusion.cpp (sd.exe / sd binario).
+// Espejo Go de genconfig_to_sdcpp_args_py_ml.
+//
+// Loras se emiten como pares repetidos "--lora" "path:weight".
+// Si el sampler no existe en samplerMap se usa el valor literal sin traducir.
+// La funcion es pura: sin I/O, sin estado, determinista.
+func GenconfigToSdcliArgs(cfg GenerationConfig) []string {
+	args := []string{
+		"--prompt", cfg.Prompt,
+		"--seed", strconv.FormatInt(cfg.Seed, 10),
+		"--steps", strconv.Itoa(cfg.Steps),
+		"--cfg-scale", strconv.FormatFloat(cfg.CfgScale, 'f', -1, 64),
+		"--width", strconv.Itoa(cfg.Width),
+		"--height", strconv.Itoa(cfg.Height),
+	}
+
+	if cfg.NegativePrompt != "" {
+		args = append(args, "--negative-prompt", cfg.NegativePrompt)
+	}
+
+	sampler := cfg.Sampler
+	if mapped, ok := samplerMap[sampler]; ok {
+		sampler = mapped
+	}
+	args = append(args, "--sampling-method", sampler)
+
+	if cfg.Model.Path != "" {
+		args = append(args, "--model", cfg.Model.Path)
+	}
+
+	if cfg.ClipSkip != nil {
+		args = append(args, "--clip-skip", strconv.Itoa(*cfg.ClipSkip))
+	}
+
+	for _, lora := range cfg.Loras {
+		args = append(args, "--lora", fmt.Sprintf("%s:%g", lora.Path, lora.Weight))
+	}
+
+	return args
+}
@@ -0,0 +1,59 @@
+---
+name: genconfig_to_sdcli_args
+kind: function
+lang: go
+domain: ml
+version: "1.0.0"
+purity: pure
+signature: "func GenconfigToSdcliArgs(cfg GenerationConfig) []string"
+description: "Convierte un GenerationConfig en argumentos CLI para stable-diffusion.cpp. Espejo Go de genconfig_to_sdcpp_args_py_ml. Loras se emiten como pares repetidos --lora path:weight. Sampler traducido via samplerMap canonico."
+tags: [ml, stable-diffusion, cli, args, generation, pure]
+uses_functions: []
+uses_types: [generation_config_go_ml]
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["fmt", "strconv"]
+params:
+  - name: cfg
+    desc: "Parametros completos de generacion de imagen. Sampler debe ser uno de los valores de SamplerName. Model.Path se emite como --model si no esta vacio."
+output: "Slice de strings listos para pasar a exec.Command o similar. Incluye --prompt, --seed, --steps, --cfg-scale, --width, --height, --sampling-method, opcionales --negative-prompt / --model / --clip-skip, y pares --lora path:weight por cada LoraRef."
+tested: true
+tests:
+  - "config basico sin loras ni clip_skip"
+  - "loras se emiten como pares path:weight"
+  - "sampler dpm++2m se traduce a dpmpp2m"
+  - "negative_prompt vacio no genera flag"
+test_file_path: "functions/ml/genconfig_test.go"
+file_path: "functions/ml/genconfig_to_sdcli_args.go"
+---
+
+## Ejemplo
+
+```go
+clip := 2
+cfg := ml.GenerationConfig{
+    Prompt:   "a cat",
+    Seed:     42,
+    Steps:    20,
+    CfgScale: 7.5,
+    Sampler:  "dpm++2m",
+    Width:    512,
+    Height:   512,
+    Model:    ml.ModelRef{Name: "v1-5", ModelType: "sd15", Quantization: "fp16", Path: "/models/v1-5.safetensors"},
+    Loras:    []ml.LoraRef{{Path: "/loras/detail.safetensors", Weight: 0.8}},
+    ClipSkip: &clip,
+}
+args := ml.GenconfigToSdcliArgs(cfg)
+// args == ["--prompt","a cat","--seed","42","--steps","20",
+//          "--cfg-scale","7.5","--width","512","--height","512",
+//          "--sampling-method","dpmpp2m","--model","/models/v1-5.safetensors",
+//          "--clip-skip","2","--lora","/loras/detail.safetensors:0.8"]
+```
+
+## Notas
+
+- `samplerMap` traduce nombres canonicos del dominio ml a los identificadores que acepta stable-diffusion.cpp. Si el sampler no esta en el mapa se usa el valor literal.
+- El flag de modelo (`--model`) solo se emite si `cfg.Model.Path != ""`.
+- `%g` en `fmt.Sprintf` para el peso de la lora elimina ceros insignificantes: `0.800000` → `0.8`.
+- Funcion pura: misma entrada, misma salida. Sin I/O ni estado global.
@@ -0,0 +1,18 @@
+package ml
+
+// GenerationConfig parametriza una solicitud de generacion de imagen.
+// Espejo JSON-compatible de GenerationConfig_py_ml: los tags json coinciden
+// con los campos snake_case del dataclass Python para roundtrip sin perdida.
+type GenerationConfig struct {
+	Prompt         string    `json:"prompt"`
+	NegativePrompt string    `json:"negative_prompt,omitempty"`
+	Seed           int64     `json:"seed"`
+	Steps          int       `json:"steps"`
+	CfgScale       float64   `json:"cfg_scale"`
+	Sampler        string    `json:"sampler"`
+	Width          int       `json:"width"`
+	Height         int       `json:"height"`
+	Model          ModelRef  `json:"model"`
+	Loras          []LoraRef `json:"loras,omitempty"`
+	ClipSkip       *int      `json:"clip_skip,omitempty"`
+}
@@ -0,0 +1,12 @@
+package ml
+
+// ImageGenResult contiene la imagen generada y su metadata de ejecucion.
+// ImageBytes transporta los bytes raw del PNG y se excluye del JSON
+// (campo json:"-") porque viaja por canal binario separado.
+type ImageGenResult struct {
+	ImageBytes  []byte         `json:"-"`
+	Format      string         `json:"format"`
+	Meta        map[string]any `json:"meta"`
+	DurationMs  int64          `json:"duration_ms"`
+	VramPeakMb  *int           `json:"vram_peak_mb,omitempty"`
+}
@@ -0,0 +1,9 @@
+package ml
+
+import "context"
+
+// ImageGenerator define el contrato para cualquier backend de generacion de imagenes.
+// Las implementaciones pueden ser locales (ComfyUI, diffusers) o remotas (API).
+type ImageGenerator interface {
+	Generate(ctx context.Context, cfg GenerationConfig) (ImageGenResult, error)
+}
@@ -0,0 +1,8 @@
+package ml
+
+// LoraRef referencia un adaptador LoRA con su peso de fusión y escala opcional.
+type LoraRef struct {
+	Path   string   `json:"path"`
+	Weight float64  `json:"weight"`
+	Scale  *float64 `json:"scale,omitempty"`
+}
@@ -0,0 +1,10 @@
+package ml
+
+// ModelRef identifica un modelo de generacion de imagenes por nombre, tipo,
+// cuantizacion y path opcional en disco.
+type ModelRef struct {
+	Name         string `json:"name"`
+	ModelType    string `json:"model_type"`   // sd15|sdxl|flux_dev|...
+	Quantization string `json:"quantization"` // fp16|q8_0|...
+	Path         string `json:"path,omitempty"`
+}
@@ -0,0 +1,78 @@
+package ml
+
+import (
+	"regexp"
+	"strconv"
+)
+
+// SdcliProgress contiene el estado de progreso parseado de una linea de stderr de sd-cli.
+type SdcliProgress struct {
+	Step       int     `json:"step"`
+	TotalSteps int     `json:"total_steps"`
+	ItPerSec   float64 `json:"it_per_sec"`
+	Percent    float64 `json:"percent"`
+}
+
+// reProgress1 parsea el formato compacto: "  3/30 |  0.84it/s |  10%"
+var reProgress1 = regexp.MustCompile(`\s*(\d+)\s*/\s*(\d+)\s*\|[^|]*?([\d.]+)\s*it/s[^|]*?\|\s*([\d.]+)\s*%`)
+
+// reProgress2 parsea el formato verbose: "sampling: step 3 of 30 (0.84 it/s)"
+var reProgress2 = regexp.MustCompile(`step\s+(\d+)\s+of\s+(\d+)\s*\(\s*([\d.]+)\s*it/s\)`)
+
+// reProgress3 parsea el formato minimal: "step 3/30" o "progress: 3/30"
+var reProgress3 = regexp.MustCompile(`(?:progress[:\s]+)?(\d+)\s*/\s*(\d+)`)
+
+// SdcliParseProgress parsea una linea de stderr de stable-diffusion.cpp / sd-cli
+// y extrae el estado de progreso. Retorna (SdcliProgress, true) si la linea
+// contiene informacion de progreso reconocible; (zero, false) en caso contrario.
+// Funcion pura: sin I/O, sin estado mutable, determinista.
+func SdcliParseProgress(line string) (SdcliProgress, bool) {
+	// Formato 1: "  3/30 |  0.84it/s |  10%"
+	if m := reProgress1.FindStringSubmatch(line); m != nil {
+		step, err1 := strconv.Atoi(m[1])
+		total, err2 := strconv.Atoi(m[2])
+		itPerSec, err3 := strconv.ParseFloat(m[3], 64)
+		pct, err4 := strconv.ParseFloat(m[4], 64)
+		if err1 == nil && err2 == nil && err3 == nil && err4 == nil {
+			return SdcliProgress{
+				Step:       step,
+				TotalSteps: total,
+				ItPerSec:   itPerSec,
+				Percent:    pct,
+			}, true
+		}
+	}
+
+	// Formato 2: "sampling: step 3 of 30 (0.84 it/s)"
+	if m := reProgress2.FindStringSubmatch(line); m != nil {
+		step, err1 := strconv.Atoi(m[1])
+		total, err2 := strconv.Atoi(m[2])
+		itPerSec, err3 := strconv.ParseFloat(m[3], 64)
+		if err1 == nil && err2 == nil && err3 == nil && total > 0 {
+			pct := 100.0 * float64(step) / float64(total)
+			return SdcliProgress{
+				Step:       step,
+				TotalSteps: total,
+				ItPerSec:   itPerSec,
+				Percent:    pct,
+			}, true
+		}
+	}
+
+	// Formato 3: "step 3/30" o "progress: 3/30" sin velocidad
+	if m := reProgress3.FindStringSubmatch(line); m != nil {
+		step, err1 := strconv.Atoi(m[1])
+		total, err2 := strconv.Atoi(m[2])
+		if err1 == nil && err2 == nil && total > 0 {
+			pct := 100.0 * float64(step) / float64(total)
+			return SdcliProgress{
+				Step:       step,
+				TotalSteps: total,
+				ItPerSec:   0,
+				Percent:    pct,
+			}, true
+		}
+	}
+
+	return SdcliProgress{}, false
+}
@@ -0,0 +1,50 @@
+---
+name: sdcli_parse_progress
+kind: function
+lang: go
+domain: ml
+version: "1.0.0"
+purity: pure
+signature: "func SdcliParseProgress(line string) (SdcliProgress, bool)"
+description: "Parsea una linea de stderr de stable-diffusion.cpp / sd-cli y extrae el estado de progreso. Soporta el formato compacto '3/30 | 0.84it/s | 10%', el formato verbose 'sampling: step 3 of 30 (0.84 it/s)', y el formato minimal 'progress: 3/30'. Retorna (zero, false) si la linea no contiene informacion de progreso reconocible."
+tags: [ml, stable-diffusion, sdcli, progress, parser, stderr, pure]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["regexp", "strconv"]
+params:
+  - name: line
+    desc: "Una linea de stderr emitida por sd-cli / stable-diffusion.cpp durante la fase de sampling. Puede contener espacios al inicio o final."
+output: "Par (SdcliProgress, bool). bool=true si se reconocio un patron de progreso; SdcliProgress contiene Step (paso actual), TotalSteps (pasos totales), ItPerSec (iteraciones por segundo, 0 si no disponible) y Percent (porcentaje 0-100 calculado o leido de la linea). bool=false y struct zero si la linea no contiene progreso."
+tested: true
+tests:
+  - "formato estandar compacto step/total/itpersec/percent"
+  - "linea sin patron retorna false"
+  - "formato sampling verbose con velocidad"
+file_path: "functions/ml/sdcli_parse_progress.go"
+test_file_path: "functions/ml/sdcli_parse_progress_test.go"
+---
+
+## Ejemplo
+
+```go
+p, ok := ml.SdcliParseProgress("  3/30 |  0.84it/s |  10%")
+// ok = true
+// p = SdcliProgress{Step:3, TotalSteps:30, ItPerSec:0.84, Percent:10.0}
+
+p2, ok2 := ml.SdcliParseProgress("sampling: step 15 of 30 (1.2 it/s)")
+// ok2 = true
+// p2 = SdcliProgress{Step:15, TotalSteps:30, ItPerSec:1.2, Percent:50.0}
+
+_, ok3 := ml.SdcliParseProgress("loading model...")
+// ok3 = false
+```
+
+## Notas
+
+- Regexps precompiladas como vars de paquete (se compilan una sola vez al init del paquete).
+- Tolerante a variaciones de espaciado gracias a `\s*` en los patrones.
+- El campo `Percent` en el formato verbose se calcula como `100 * step / total` (no se lee de la linea porque ese formato no lo emite).
+- Funcion pura: sin I/O, sin estado mutable, determinista.
@@ -0,0 +1,103 @@
+package ml
+
+import (
+	"math"
+	"testing"
+)
+
+func TestSdcliParseProgress_StandardFormat(t *testing.T) {
+	line := "  3/30 |  0.84it/s |  10%"
+	got, ok := SdcliParseProgress(line)
+	if !ok {
+		t.Fatalf("expected match, got false")
+	}
+	if got.Step != 3 {
+		t.Errorf("Step: got %d, want 3", got.Step)
+	}
+	if got.TotalSteps != 30 {
+		t.Errorf("TotalSteps: got %d, want 30", got.TotalSteps)
+	}
+	if math.Abs(got.ItPerSec-0.84) > 1e-9 {
+		t.Errorf("ItPerSec: got %v, want 0.84", got.ItPerSec)
+	}
+	if math.Abs(got.Percent-10.0) > 1e-9 {
+		t.Errorf("Percent: got %v, want 10.0", got.Percent)
+	}
+}
+
+func TestSdcliParseProgress_NoMatch(t *testing.T) {
+	cases := []string{
+		"loading model...",
+		"",
+		"error: out of memory",
+		"clip model loaded",
+		"generating image...",
+	}
+	for _, line := range cases {
+		_, ok := SdcliParseProgress(line)
+		if ok {
+			t.Errorf("expected no match for %q, but got match", line)
+		}
+	}
+}
+
+func TestSdcliParseProgress_AltFormat(t *testing.T) {
+	t.Run("formato sampling verbose", func(t *testing.T) {
+		line := "sampling: step 3 of 30 (0.84 it/s)"
+		got, ok := SdcliParseProgress(line)
+		if !ok {
+			t.Fatalf("expected match, got false")
+		}
+		if got.Step != 3 {
+			t.Errorf("Step: got %d, want 3", got.Step)
+		}
+		if got.TotalSteps != 30 {
+			t.Errorf("TotalSteps: got %d, want 30", got.TotalSteps)
+		}
+		if math.Abs(got.ItPerSec-0.84) > 1e-9 {
+			t.Errorf("ItPerSec: got %v, want 0.84", got.ItPerSec)
+		}
+		expectedPct := 100.0 * 3.0 / 30.0
+		if math.Abs(got.Percent-expectedPct) > 1e-6 {
+			t.Errorf("Percent: got %v, want %v", got.Percent, expectedPct)
+		}
+	})
+
+	t.Run("formato step/total sin velocidad", func(t *testing.T) {
+		line := "progress: 15/20"
+		got, ok := SdcliParseProgress(line)
+		if !ok {
+			t.Fatalf("expected match, got false")
+		}
+		if got.Step != 15 {
+			t.Errorf("Step: got %d, want 15", got.Step)
+		}
+		if got.TotalSteps != 20 {
+			t.Errorf("TotalSteps: got %d, want 20", got.TotalSteps)
+		}
+		if got.ItPerSec != 0 {
+			t.Errorf("ItPerSec: got %v, want 0", got.ItPerSec)
+		}
+		expectedPct := 75.0
+		if math.Abs(got.Percent-expectedPct) > 1e-6 {
+			t.Errorf("Percent: got %v, want %v", got.Percent, expectedPct)
+		}
+	})
+
+	t.Run("formato con espacios variables y mayor velocidad", func(t *testing.T) {
+		line := "  20/30 | 12.50it/s |  66%"
+		got, ok := SdcliParseProgress(line)
+		if !ok {
+			t.Fatalf("expected match, got false")
+		}
+		if got.Step != 20 {
+			t.Errorf("Step: got %d, want 20", got.Step)
+		}
+		if got.TotalSteps != 30 {
+			t.Errorf("TotalSteps: got %d, want 30", got.TotalSteps)
+		}
+		if math.Abs(got.ItPerSec-12.5) > 1e-9 {
+			t.Errorf("ItPerSec: got %v, want 12.5", got.ItPerSec)
+		}
+	})
+}
@@ -0,0 +1,161 @@
+"""Tests para vault_csv_profile."""
+
+from __future__ import annotations
+
+import os
+import sqlite3
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+from vault_csv_profile import vault_csv_profile
+
+
+def _make_vault(tmp: Path) -> tuple[Path, Path]:
+    """Crea un vault mínimo con vault_index.db y tabla files + files_fts + csv_profiles."""
+    db = tmp / "vault_index.db"
+    conn = sqlite3.connect(str(db))
+    conn.executescript(
+        """
+        CREATE TABLE IF NOT EXISTS files (
+            rowid INTEGER PRIMARY KEY AUTOINCREMENT,
+            rel_path TEXT UNIQUE NOT NULL,
+            size_bytes INTEGER,
+            ext TEXT
+        );
+        CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
+            USING fts5(rel_path, content_text, content='', contentless_delete=1);
+        CREATE TABLE IF NOT EXISTS csv_profiles (
+            rel_path TEXT PRIMARY KEY,
+            cols_json TEXT,
+            n_rows INTEGER,
+            encoding TEXT,
+            date_min TEXT,
+            date_max TEXT,
+            profiled_at INTEGER
+        );
+        """
+    )
+    conn.commit()
+    conn.close()
+    return tmp, db
+
+
+def _insert_file_entry(db: Path, rel_path: str):
+    """Inserta entrada en files para que files_fts tenga rowid válido."""
+    conn = sqlite3.connect(str(db))
+    conn.execute(
+        "INSERT OR IGNORE INTO files(rel_path, size_bytes, ext) VALUES (?, 0, '.csv')",
+        (rel_path,),
+    )
+    conn.commit()
+    conn.close()
+
+
+def test_csv_basic(tmp_path):
+    vault, db = _make_vault(tmp_path)
+    rel = "data/basic.csv"
+    csv_file = vault / rel
+    csv_file.parent.mkdir(parents=True, exist_ok=True)
+    csv_file.write_text("nombre,edad,score\nAna,30,9.5\nBob,25,8.0\nCarla,35,7.5\n", encoding="utf-8")
+    _insert_file_entry(db, rel)
+
+    result = vault_csv_profile(str(vault), rel, db_path=str(db))
+
+    assert result["rel_path"] == rel
+    assert result["n_rows"] == 3
+    assert len(result["cols"]) == 3
+    col_names = [c["name"] for c in result["cols"]]
+    assert "nombre" in col_names
+    assert "edad" in col_names
+    assert "score" in col_names
+    assert result["persisted"] is True
+
+    # Verificar persistencia en csv_profiles
+    conn = sqlite3.connect(str(db))
+    row = conn.execute("SELECT n_rows FROM csv_profiles WHERE rel_path = ?", (rel,)).fetchone()
+    conn.close()
+    assert row is not None
+    assert row[0] == 3
+
+
+def test_csv_date_detection(tmp_path):
+    vault, db = _make_vault(tmp_path)
+    rel = "data/fechas.csv"
+    csv_file = vault / rel
+    csv_file.parent.mkdir(parents=True, exist_ok=True)
+    csv_file.write_text(
+        "fecha,valor\n2023-01-01,100\n2023-06-15,200\n2023-12-31,300\n",
+        encoding="utf-8",
+    )
+    _insert_file_entry(db, rel)
+
+    result = vault_csv_profile(str(vault), rel, db_path=str(db))
+
+    assert result["date_min"] is not None
+    assert result["date_max"] is not None
+    assert result["date_min"] <= "2023-01-01"
+    assert result["date_max"] >= "2023-12-31"
+
+
+def test_csv_encoding_latin1(tmp_path):
+    vault, db = _make_vault(tmp_path)
+    rel = "data/tildes.csv"
+    csv_file = vault / rel
+    csv_file.parent.mkdir(parents=True, exist_ok=True)
+    csv_file.write_bytes(
+        "ciudad,poblacion\nMálaga,500000\nCórdoba,320000\n".encode("latin-1")
+    )
+    _insert_file_entry(db, rel)
+
+    result = vault_csv_profile(str(vault), rel, db_path=str(db))
+
+    assert result["n_rows"] == 2
+    assert result["encoding"] != "utf-8?"
+    # encoding detectado (algún valor no vacío)
+    assert result["encoding"]
+    assert result["persisted"] is True
+
+
+def test_csv_empty(tmp_path):
+    vault, db = _make_vault(tmp_path)
+    rel = "data/empty.csv"
+    csv_file = vault / rel
+    csv_file.parent.mkdir(parents=True, exist_ok=True)
+    csv_file.write_text("", encoding="utf-8")
+    _insert_file_entry(db, rel)
+
+    result = vault_csv_profile(str(vault), rel, db_path=str(db))
+
+    assert result["n_rows"] == 0
+    assert result["cols"] == []
+    assert result["date_min"] is None
+    assert result["date_max"] is None
+
+
+def test_csv_persists_fts(tmp_path):
+    """FTS5 contentless: verifica que las columnas son buscables con MATCH."""
+    vault, db = _make_vault(tmp_path)
+    rel = "data/fts_test.csv"
+    csv_file = vault / rel
+    csv_file.parent.mkdir(parents=True, exist_ok=True)
+    csv_file.write_text("producto,precio\nManzana,1.5\nPera,2.0\n", encoding="utf-8")
+    _insert_file_entry(db, rel)
+
+    vault_csv_profile(str(vault), rel, db_path=str(db))
+
+    conn = sqlite3.connect(str(db))
+    # FTS5 contentless no permite SELECT directo — usar MATCH para verificar indexado
+    row_prod = conn.execute(
+        "SELECT rowid FROM files_fts WHERE files_fts MATCH 'producto'",
+    ).fetchone()
+    row_prec = conn.execute(
+        "SELECT rowid FROM files_fts WHERE files_fts MATCH 'precio'",
+    ).fetchone()
+    conn.close()
+
+    assert row_prod is not None, "FTS no encontró 'producto'"
+    assert row_prec is not None, "FTS no encontró 'precio'"
--- a/Show More
+++ b/Show More