Files
egutierrez e3c8979e8d chore: auto-commit (95 archivos)
- cmd/fn/doctor.go
- cmd/fn/main.go
- cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt
- cpp/apps/primitives_gallery/playground/tables/data_table.cpp
- cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp
- cpp/apps/primitives_gallery/playground/tables/data_table_logic.h
- cpp/apps/primitives_gallery/playground/tables/self_test.cpp
- cpp/apps/primitives_gallery/playground/tables/tql.cpp
- cpp/apps/primitives_gallery/playground/tables/viz.cpp
- cpp/apps/primitives_gallery/playground/tables/viz.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 00:50:34 +02:00

1060 lines
25 KiB
Go

package main
import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"text/tabwriter"
"time"
"fn-registry/functions/infra"
"fn-registry/registry"
)
func cmdVault(args []string) {
if len(args) < 1 {
vaultUsage()
os.Exit(1)
}
switch args[0] {
case "list":
vaultList()
case "search":
vaultSearch(args[1:])
case "index":
vaultIndex(args[1:])
case "info":
vaultInfo(args[1:])
case "layout-ensure":
vaultLayoutEnsure(args[1:])
case "profile":
vaultProfile(args[1:])
case "dedupe":
vaultDedupe(args[1:])
case "aggregate":
vaultAggregate()
case "doctor":
vaultDoctorCmd()
case "audit":
vaultAudit(args[1:])
case "help", "-h", "--help":
vaultUsage()
default:
fmt.Fprintf(os.Stderr, "unknown vault subcommand: %s\n", args[0])
vaultUsage()
os.Exit(1)
}
}
func vaultUsage() {
fmt.Println(`fn vault — manage data vaults
Usage:
fn vault list List declared vaults
fn vault search <query> [--limit N] [--vault <name>] [--json]
Search files in vault(s)
fn vault index <name> Index a vault (scan + write)
fn vault index --all Index all declared vaults
fn vault info <name> Show vault summary and stats
fn vault layout-ensure <name> [--dry-run]
Ensure canonical data/knowledge layout
fn vault profile <name> Profile CSV/PDF/MD files in a vault
fn vault dedupe <name> Report duplicate files in a vault
fn vault aggregate Aggregate all vault indexes into registry.db
fn vault doctor Audit vault health (alias for fn doctor vaults)
fn vault audit <name> Run full audit pipeline on a vault
fn vault audit --all Run audit on all declared vaults`)
}
// --- list ---
func vaultList() {
db := openDB()
defer db.Close()
vaults, err := db.AllVaults()
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
if len(vaults) == 0 {
fmt.Println("No vaults registered. Declare vaults in projects/*/vaults/vault.yaml and run 'fn index'.")
return
}
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
fmt.Fprintln(w, "NAME\tPROJECT\tPATH\tTAGS")
for _, v := range vaults {
tags := strings.Join(v.Tags, ",")
path := v.Path
if path == "" {
path = "-"
}
proj := v.ProjectID
if proj == "" {
proj = "-"
}
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", v.Name, proj, path, tags)
}
w.Flush()
}
// --- search ---
func vaultSearch(args []string) {
var query string
var vaultName string
var limitN int
var jsonOut bool
i := 0
for i < len(args) {
switch args[i] {
case "--limit":
i++
if i >= len(args) {
fmt.Fprintln(os.Stderr, "--limit requires a value")
os.Exit(1)
}
n, err := strconv.Atoi(args[i])
if err != nil {
fmt.Fprintf(os.Stderr, "--limit: invalid number %q\n", args[i])
os.Exit(1)
}
limitN = n
case "--vault":
i++
if i >= len(args) {
fmt.Fprintln(os.Stderr, "--vault requires a value")
os.Exit(1)
}
vaultName = args[i]
case "--json":
jsonOut = true
default:
if query == "" && !strings.HasPrefix(args[i], "--") {
query = args[i]
}
}
i++
}
if query == "" {
fmt.Fprintln(os.Stderr, "usage: fn vault search <query> [--limit N] [--vault <name>] [--json]")
os.Exit(1)
}
if limitN <= 0 {
limitN = 50
}
db := openDB()
defer db.Close()
// Determine which vaults to search.
vaults, err := resolveSearchVaults(db, vaultName)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
if len(vaults) == 0 {
fmt.Fprintln(os.Stderr, "no vaults found")
os.Exit(1)
}
var allHits []infra.VaultSearchHit
for _, v := range vaults {
if v.Path == "" {
continue
}
hits, err := infra.VaultSearch(v.Path, query, limitN)
if err != nil {
fmt.Fprintf(os.Stderr, "warn: vault %s: %v\n", v.Name, err)
continue
}
allHits = append(allHits, hits...)
}
if jsonOut {
if allHits == nil {
allHits = []infra.VaultSearchHit{}
}
b, err := json.MarshalIndent(allHits, "", " ")
if err != nil {
fmt.Fprintf(os.Stderr, "json error: %v\n", err)
os.Exit(1)
}
fmt.Println(string(b))
return
}
if len(allHits) == 0 {
fmt.Println("No results.")
return
}
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
for _, h := range allHits {
mtime := time.Unix(h.Mtime, 0).UTC().Format("2006-01-02")
sizeStr := formatBytes(h.Size)
snip := truncate(h.Snippet, 50)
fmt.Fprintf(w, "[%s]\t%s\t%s\t%s\t%s\t%s\n",
h.VaultName, h.RelPath, sizeStr, mtime, h.Mime, snip)
}
w.Flush()
}
// --- index ---
func vaultIndex(args []string) {
indexAll := false
var name string
for _, a := range args {
if a == "--all" {
indexAll = true
} else if !strings.HasPrefix(a, "--") && name == "" {
name = a
}
}
if !indexAll && name == "" {
fmt.Fprintln(os.Stderr, "usage: fn vault index <name> | --all")
os.Exit(1)
}
db := openDB()
defer db.Close()
var vaults []registry.Vault
if indexAll {
all, err := db.AllVaults()
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
vaults = all
} else {
v, err := resolveVaultByName(db, name)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
vaults = []registry.Vault{*v}
}
for _, v := range vaults {
if v.Path == "" {
fmt.Printf("vault %s: path not set, skipping\n", v.Name)
continue
}
if err := runVaultIndex(v); err != nil {
fmt.Fprintf(os.Stderr, "vault %s: %v\n", v.Name, err)
}
}
}
// runVaultIndex runs the full inventory scan + write cycle for a single vault.
func runVaultIndex(v registry.Vault) error {
fmt.Printf("indexing %s (%s)...\n", v.Name, v.Path)
files, err := infra.VaultInventoryScan(v.Path, v.ID, v.Name)
if err != nil {
return fmt.Errorf("scan: %w", err)
}
vaultDB, err := infra.VaultIndexOpen(v.Path)
if err != nil {
return fmt.Errorf("open index: %w", err)
}
defer vaultDB.Close()
report, err := infra.VaultIndexWrite(vaultDB, files, true)
if err != nil {
return fmt.Errorf("write: %w", err)
}
fmt.Printf(" indexed %d files, %d inserted, %d updated, %d pruned\n",
len(files), report.Inserted, report.Updated, report.Pruned)
return nil
}
// --- info ---
func vaultInfo(args []string) {
if len(args) < 1 {
fmt.Fprintln(os.Stderr, "usage: fn vault info <name>")
os.Exit(1)
}
name := args[0]
db := openDB()
defer db.Close()
v, err := resolveVaultByName(db, name)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
if v.Path == "" {
fmt.Fprintf(os.Stderr, "vault %s has no path set\n", name)
os.Exit(1)
}
vaultDB, err := infra.VaultIndexOpen(v.Path)
if err != nil {
fmt.Fprintf(os.Stderr, "error opening vault index: %v\n", err)
os.Exit(1)
}
defer vaultDB.Close()
// Summary stats.
var totalFiles int
var totalSize int64
vaultDB.QueryRow(`SELECT count(*), coalesce(sum(size),0) FROM files`).Scan(&totalFiles, &totalSize)
var lastIndexedAt int64
vaultDB.QueryRow(`SELECT coalesce(max(indexed_at), 0) FROM files`).Scan(&lastIndexedAt)
lastIndexed := "-"
if lastIndexedAt > 0 {
lastIndexed = time.Unix(lastIndexedAt, 0).UTC().Format("2006-01-02 15:04:05")
}
fmt.Printf("Vault: %s (%s)\n", v.Name, v.Path)
fmt.Printf("Files: %d Total: %s Last indexed: %s\n\n",
totalFiles, formatBytes(totalSize), lastIndexed)
// By bucket.
bucketRows, err := vaultDB.Query(`
SELECT bucket, sub_bucket, count(*), coalesce(sum(size),0)
FROM files
GROUP BY bucket, sub_bucket
ORDER BY bucket, sub_bucket`)
if err == nil {
defer bucketRows.Close()
fmt.Println("By bucket:")
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
for bucketRows.Next() {
var bucket, sub string
var cnt int
var sz int64
bucketRows.Scan(&bucket, &sub, &cnt, &sz)
key := bucket
if sub != "" {
key = bucket + "/" + sub
}
fmt.Fprintf(w, " %s\t%d files\t%s\n", key, cnt, formatBytes(sz))
}
w.Flush()
fmt.Println()
}
// By mime.
mimeRows, err := vaultDB.Query(`
SELECT mime, count(*), coalesce(sum(size),0)
FROM files
GROUP BY mime
ORDER BY sum(size) DESC`)
if err == nil {
defer mimeRows.Close()
fmt.Println("By mime:")
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
for mimeRows.Next() {
var mime string
var cnt int
var sz int64
mimeRows.Scan(&mime, &cnt, &sz)
if mime == "" {
mime = "unknown"
}
fmt.Fprintf(w, " %s\t%d files\t%s\n", mime, cnt, formatBytes(sz))
}
w.Flush()
}
}
// --- layout-ensure ---
func vaultLayoutEnsure(args []string) {
dryRun := false
var name string
for _, a := range args {
switch a {
case "--dry-run":
dryRun = true
default:
if !strings.HasPrefix(a, "--") && name == "" {
name = a
}
}
}
if name == "" {
fmt.Fprintln(os.Stderr, "usage: fn vault layout-ensure <name> [--dry-run]")
os.Exit(1)
}
db := openDB()
defer db.Close()
v, err := resolveVaultByName(db, name)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
if v.Path == "" {
fmt.Fprintf(os.Stderr, "vault %s has no path set\n", name)
os.Exit(1)
}
report, err := infra.VaultLayoutEnsure(v.Path, dryRun)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
prefix := ""
if report.DryRun {
prefix = "[dry-run] "
}
fmt.Printf("%sVault: %s (%s)\n", prefix, name, report.VaultPath)
if len(report.Created) > 0 {
fmt.Printf("%s created: %s\n", prefix, strings.Join(report.Created, ", "))
}
if len(report.Migrated) > 0 {
fmt.Printf("%s migrated: %s\n", prefix, strings.Join(report.Migrated, "; "))
}
if len(report.AlreadyOK) > 0 {
fmt.Printf("%s already ok: %s\n", prefix, strings.Join(report.AlreadyOK, ", "))
}
if len(report.Skipped) > 0 {
fmt.Printf("%s skipped (unrecognized): %s\n", prefix, strings.Join(report.Skipped, ", "))
}
if len(report.Created) == 0 && len(report.Migrated) == 0 {
fmt.Printf("%s layout already canonical\n", prefix)
}
}
// --- profile ---
// profileKind returns "csv", "pdf", "md", or "" for a file based on extension/mime.
func profileKind(ext, mime string) string {
ext = strings.ToLower(strings.TrimPrefix(ext, "."))
switch ext {
case "csv":
return "csv"
case "pdf":
return "pdf"
case "md", "markdown":
return "md"
}
// Fall back to mime
if strings.Contains(mime, "csv") || strings.Contains(mime, "text/csv") {
return "csv"
}
if strings.Contains(mime, "pdf") {
return "pdf"
}
if strings.Contains(mime, "markdown") {
return "md"
}
return ""
}
func vaultProfile(args []string) {
if len(args) < 1 || strings.HasPrefix(args[0], "--") {
fmt.Fprintln(os.Stderr, "usage: fn vault profile <name>")
os.Exit(1)
}
name := args[0]
db := openDB()
defer db.Close()
v, err := resolveVaultByName(db, name)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
if v.Path == "" {
fmt.Fprintf(os.Stderr, "vault %s has no path set\n", name)
os.Exit(1)
}
vaultDB, err := infra.VaultIndexOpen(v.Path)
if err != nil {
fmt.Fprintf(os.Stderr, "error opening vault index: %v\n", err)
os.Exit(1)
}
defer vaultDB.Close()
// List files with their ext and mime from vault_index.db
type fileRow struct {
RelPath string
Ext string
Mime string
}
rows, err := vaultDB.Query(`SELECT rel_path, ext, mime FROM files ORDER BY rel_path`)
if err != nil {
fmt.Fprintf(os.Stderr, "error querying vault index: %v\n", err)
os.Exit(1)
}
var files []fileRow
for rows.Next() {
var f fileRow
if scanErr := rows.Scan(&f.RelPath, &f.Ext, &f.Mime); scanErr == nil {
files = append(files, f)
}
}
rows.Close()
if len(files) == 0 {
fmt.Printf("vault %s: no files in index (run 'fn vault index %s' first)\n", name, name)
return
}
// Locate the Python dispatcher
registryRoot := root()
dispatchScript := filepath.Join(registryRoot, "python", "functions", "infra", "vault_profile_dispatch.py")
if _, err := os.Stat(dispatchScript); os.IsNotExist(err) {
fmt.Fprintf(os.Stderr, "error: dispatch script not found: %s\n", dispatchScript)
os.Exit(1)
}
pythonBin := filepath.Join(registryRoot, "python", ".venv", "bin", "python3")
if _, err := os.Stat(pythonBin); os.IsNotExist(err) {
pythonBin = "python3"
}
pythonPath := filepath.Join(registryRoot, "python", "functions")
var nCSV, nPDF, nMD, nSkip, nErr int
fmt.Printf("profiling vault: %s (%s)\n", name, v.Path)
for _, f := range files {
kind := profileKind(f.Ext, f.Mime)
if kind == "" {
nSkip++
continue
}
cmd := exec.Command(pythonBin, dispatchScript,
"--vault", v.Path,
"--rel-path", f.RelPath,
"--kind", kind,
)
cmd.Env = append(os.Environ(),
"PYTHONPATH="+pythonPath,
"FN_REGISTRY_ROOT="+registryRoot,
)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if runErr := cmd.Run(); runErr != nil {
fmt.Fprintf(os.Stderr, " warn: %s (%s): %v\n", f.RelPath, kind, strings.TrimSpace(stderr.String()))
nErr++
continue
}
switch kind {
case "csv":
nCSV++
case "pdf":
nPDF++
case "md":
nMD++
}
}
fmt.Printf(" csv: %d pdf: %d md: %d skipped: %d errors: %d\n",
nCSV, nPDF, nMD, nSkip, nErr)
}
// --- dedupe ---
func vaultDedupe(args []string) {
if len(args) < 1 || strings.HasPrefix(args[0], "--") {
fmt.Fprintln(os.Stderr, "usage: fn vault dedupe <name>")
os.Exit(1)
}
name := args[0]
db := openDB()
defer db.Close()
v, err := resolveVaultByName(db, name)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
if v.Path == "" {
fmt.Fprintf(os.Stderr, "vault %s has no path set\n", name)
os.Exit(1)
}
vaultDB, err := infra.VaultIndexOpen(v.Path)
if err != nil {
fmt.Fprintf(os.Stderr, "error opening vault index: %v\n", err)
os.Exit(1)
}
defer vaultDB.Close()
// Find duplicates: groups with the same sha256 hash and size > 0
rows, err := vaultDB.Query(`
SELECT sha256, count(*) as cnt, sum(size) as total_size, min(size) as file_size,
group_concat(rel_path, '|') as paths
FROM files
WHERE sha256 != '' AND size > 0
GROUP BY sha256
HAVING count(*) > 1
ORDER BY sum(size) DESC
LIMIT 50`)
if err != nil {
fmt.Fprintf(os.Stderr, "error querying duplicates: %v\n", err)
os.Exit(1)
}
defer rows.Close()
type dupeGroup struct {
Sha256 string
Count int
TotalSize int64
FileSize int64
Paths []string
}
var groups []dupeGroup
var totalWasted int64
for rows.Next() {
var g dupeGroup
var pathsConcat string
if scanErr := rows.Scan(&g.Sha256, &g.Count, &g.TotalSize, &g.FileSize, &pathsConcat); scanErr == nil {
g.Paths = strings.Split(pathsConcat, "|")
wasted := g.FileSize * int64(g.Count-1)
totalWasted += wasted
groups = append(groups, g)
}
}
rows.Close()
fmt.Printf("Vault: %s — duplicate report\n\n", name)
if len(groups) == 0 {
fmt.Println(" No duplicates found.")
return
}
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
fmt.Fprintln(w, "SHA256\tCOUNT\tSIZE\tWASTED\tPATHS")
for _, g := range groups {
sha := g.Sha256
if len(sha) > 12 {
sha = sha[:12] + "..."
}
wasted := g.FileSize * int64(g.Count-1)
pathsStr := strings.Join(g.Paths, ", ")
if len(pathsStr) > 60 {
pathsStr = pathsStr[:57] + "..."
}
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n",
sha, g.Count, formatBytes(g.FileSize), formatBytes(wasted), pathsStr)
}
w.Flush()
fmt.Printf("\nTotal wasted space: %s (%d duplicate groups)\n",
formatBytes(totalWasted), len(groups))
}
// --- aggregate ---
func vaultAggregate() {
registryRoot := root()
fmt.Println("aggregating vault indexes into registry.db...")
report, err := infra.VaultAggregateIndex(registryRoot)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
fmt.Printf(" vaults processed: %d skipped: %d total files: %d\n",
report.VaultsProcessed, report.VaultsSkipped, report.TotalFiles)
if len(report.Errors) > 0 {
for _, e := range report.Errors {
fmt.Fprintf(os.Stderr, " warn: %s\n", e)
}
}
}
// --- doctor (vault alias) ---
func vaultDoctorCmd() {
registryRoot := root()
entries, err := infra.VaultDoctor(registryRoot)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
if len(entries) == 0 {
fmt.Println("No vaults registered.")
return
}
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
fmt.Fprintln(w, "VAULT\tSTATUS\tDISK\tINDEXED\tISSUES")
for _, e := range entries {
issues := "-"
if len(e.Issues) > 0 {
issues = strings.Join(e.Issues, ", ")
}
fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%s\n",
e.VaultName, e.Status, e.DiskFiles, e.IndexedFiles, issues)
}
w.Flush()
}
// --- audit ---
func vaultAudit(args []string) {
auditAll := false
skipProfilers := false
dryRunLayout := false
var names []string
for _, a := range args {
switch a {
case "--all":
auditAll = true
case "--skip-profilers":
skipProfilers = true
case "--dry-run-layout":
dryRunLayout = true
default:
if !strings.HasPrefix(a, "--") {
names = append(names, a)
}
}
}
if !auditAll && len(names) == 0 {
fmt.Fprintln(os.Stderr, "usage: fn vault audit <name> | --all [--skip-profilers] [--dry-run-layout]")
os.Exit(1)
}
db := openDB()
defer db.Close()
var vaults []registry.Vault
if auditAll {
all, err := db.AllVaults()
if err != nil {
fmt.Fprintf(os.Stderr, "error listing vaults: %v\n", err)
os.Exit(1)
}
vaults = all
} else {
for _, n := range names {
v, err := resolveVaultByName(db, n)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
vaults = append(vaults, *v)
}
}
db.Close()
type auditResult struct {
Name string
Status string
Errors []string
}
var results []auditResult
for _, v := range vaults {
fmt.Printf("\n=== vault: %s ===\n", v.Name)
if v.Path == "" {
fmt.Printf(" SKIP: no path set\n")
results = append(results, auditResult{Name: v.Name, Status: "skip"})
continue
}
var errs []string
// Step 1: layout-ensure
fmt.Printf(" [1/5] layout-ensure")
if dryRunLayout {
fmt.Printf(" (dry-run)")
}
fmt.Println()
layoutReport, layoutErr := infra.VaultLayoutEnsure(v.Path, dryRunLayout)
if layoutErr != nil {
fmt.Printf(" ERROR: %v\n", layoutErr)
errs = append(errs, "layout-ensure: "+layoutErr.Error())
} else {
if len(layoutReport.Created) > 0 {
fmt.Printf(" created: %s\n", strings.Join(layoutReport.Created, ", "))
}
if len(layoutReport.Migrated) > 0 {
fmt.Printf(" migrated: %s\n", strings.Join(layoutReport.Migrated, "; "))
}
if len(layoutReport.Created) == 0 && len(layoutReport.Migrated) == 0 {
fmt.Printf(" layout ok\n")
}
}
// Step 2: index
fmt.Println(" [2/5] index")
if indexErr := runVaultIndex(v); indexErr != nil {
fmt.Printf(" ERROR: %v\n", indexErr)
errs = append(errs, "index: "+indexErr.Error())
}
// Step 3: profile (optional)
if !skipProfilers {
fmt.Println(" [3/5] profile")
runVaultProfileSubcmd(v)
} else {
fmt.Println(" [3/5] profile (skipped)")
}
// Step 4: dedupe (informational, non-fatal)
fmt.Println(" [4/5] dedupe")
runVaultDedupeSubcmd(v)
// Step 5: aggregate is done once after all vaults
fmt.Println(" [5/5] aggregate (deferred to end)")
status := "ok"
if len(errs) > 0 {
status = "error"
}
results = append(results, auditResult{Name: v.Name, Status: status, Errors: errs})
}
// Final aggregate
fmt.Println("\n=== aggregate ===")
vaultAggregate()
// Summary table
fmt.Println("\n=== summary ===")
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
fmt.Fprintln(w, "VAULT\tSTATUS\tERRORS")
for _, r := range results {
errStr := "-"
if len(r.Errors) > 0 {
errStr = strings.Join(r.Errors, "; ")
}
fmt.Fprintf(w, "%s\t%s\t%s\n", r.Name, r.Status, errStr)
}
w.Flush()
}
// runVaultProfileSubcmd runs the profiling loop for a single vault (used by audit).
func runVaultProfileSubcmd(v registry.Vault) {
registryRoot := root()
vaultDB, err := infra.VaultIndexOpen(v.Path)
if err != nil {
fmt.Printf(" warn: cannot open vault index: %v\n", err)
return
}
defer vaultDB.Close()
rows, err := vaultDB.Query(`SELECT rel_path, ext, mime FROM files ORDER BY rel_path`)
if err != nil {
fmt.Printf(" warn: query failed: %v\n", err)
return
}
type fileRow struct {
RelPath string
Ext string
Mime string
}
var files []fileRow
for rows.Next() {
var f fileRow
if scanErr := rows.Scan(&f.RelPath, &f.Ext, &f.Mime); scanErr == nil {
files = append(files, f)
}
}
rows.Close()
if len(files) == 0 {
fmt.Printf(" no files in index\n")
return
}
dispatchScript := filepath.Join(registryRoot, "python", "functions", "infra", "vault_profile_dispatch.py")
pythonBin := filepath.Join(registryRoot, "python", ".venv", "bin", "python3")
if _, statErr := os.Stat(pythonBin); os.IsNotExist(statErr) {
pythonBin = "python3"
}
pythonPath := filepath.Join(registryRoot, "python", "functions")
var nCSV, nPDF, nMD, nSkip, nErr int
for _, f := range files {
kind := profileKind(f.Ext, f.Mime)
if kind == "" {
nSkip++
continue
}
cmd := exec.Command(pythonBin, dispatchScript,
"--vault", v.Path,
"--rel-path", f.RelPath,
"--kind", kind,
)
cmd.Env = append(os.Environ(),
"PYTHONPATH="+pythonPath,
"FN_REGISTRY_ROOT="+registryRoot,
)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if runErr := cmd.Run(); runErr != nil {
nErr++
} else {
switch kind {
case "csv":
nCSV++
case "pdf":
nPDF++
case "md":
nMD++
}
}
}
fmt.Printf(" csv: %d pdf: %d md: %d skipped: %d errors: %d\n",
nCSV, nPDF, nMD, nSkip, nErr)
}
// runVaultDedupeSubcmd prints dedupe summary for a single vault (used by audit).
func runVaultDedupeSubcmd(v registry.Vault) {
vaultDB, err := infra.VaultIndexOpen(v.Path)
if err != nil {
fmt.Printf(" warn: cannot open vault index: %v\n", err)
return
}
defer vaultDB.Close()
var dupeGroups int
var totalWasted int64
rows, err := vaultDB.Query(`
SELECT count(*) as cnt, min(size) as file_size
FROM files
WHERE sha256 != '' AND size > 0
GROUP BY sha256
HAVING count(*) > 1`)
if err != nil {
fmt.Printf(" warn: query failed: %v\n", err)
return
}
for rows.Next() {
var cnt int
var fileSize int64
if scanErr := rows.Scan(&cnt, &fileSize); scanErr == nil {
dupeGroups++
totalWasted += fileSize * int64(cnt-1)
}
}
rows.Close()
if dupeGroups == 0 {
fmt.Printf(" no duplicates\n")
} else {
fmt.Printf(" %d duplicate groups, %s wasted (run 'fn vault dedupe %s' for details)\n",
dupeGroups, formatBytes(totalWasted), v.Name)
}
}
// suppress unused sql import if needed
var _ = sql.ErrNoRows
// --- helpers ---
// resolveVaultByName looks up a vault by name in registry.db.
// Returns an error if not found or if name is ambiguous.
func resolveVaultByName(db *registry.DB, name string) (*registry.Vault, error) {
// Try direct ID first.
if v, err := db.GetVault(name); err == nil {
return v, nil
}
// Search by name.
vaults, err := db.SearchVaults(name, "")
if err != nil {
return nil, fmt.Errorf("search vaults: %w", err)
}
// Exact name match.
var exact []registry.Vault
for _, v := range vaults {
if v.Name == name {
exact = append(exact, v)
}
}
if len(exact) == 1 {
return &exact[0], nil
}
if len(exact) > 1 {
ids := make([]string, len(exact))
for i, v := range exact {
ids[i] = v.ID
}
return nil, fmt.Errorf("ambiguous vault name %q: %s", name, strings.Join(ids, ", "))
}
// Partial match fallback.
if len(vaults) == 1 {
return &vaults[0], nil
}
if len(vaults) > 1 {
ids := make([]string, len(vaults))
for i, v := range vaults {
ids[i] = v.ID
}
return nil, fmt.Errorf("ambiguous vault %q — use full name or ID: %s", name, strings.Join(ids, ", "))
}
return nil, fmt.Errorf("vault not found: %q (run 'fn index' to register vaults)", name)
}
// resolveSearchVaults returns the vault(s) to search.
// If name is non-empty, returns only that vault. Otherwise returns all vaults.
func resolveSearchVaults(db *registry.DB, name string) ([]registry.Vault, error) {
if name != "" {
v, err := resolveVaultByName(db, name)
if err != nil {
return nil, err
}
return []registry.Vault{*v}, nil
}
return db.AllVaults()
}
// formatBytes formats a byte count as a human-readable string (KB, MB, GB).
func formatBytes(b int64) string {
switch {
case b >= 1<<30:
return fmt.Sprintf("%.1f GB", float64(b)/float64(1<<30))
case b >= 1<<20:
return fmt.Sprintf("%.1f MB", float64(b)/float64(1<<20))
case b >= 1<<10:
return fmt.Sprintf("%.1f KB", float64(b)/float64(1<<10))
default:
return fmt.Sprintf("%d B", b)
}
}
// resolveVaultPath resolves the actual directory path for a vault,
// following symlinks if needed. Returns the resolved absolute path.
func resolveVaultPath(vaultPath string) string {
resolved, err := filepath.EvalSymlinks(vaultPath)
if err != nil {
return vaultPath
}
return resolved
}