a802f59f55
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1060 lines
25 KiB
Go
1060 lines
25 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"database/sql"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"text/tabwriter"
|
|
"time"
|
|
|
|
"fn-registry/functions/infra"
|
|
"fn-registry/registry"
|
|
)
|
|
|
|
func cmdVault(args []string) {
|
|
if len(args) < 1 {
|
|
vaultUsage()
|
|
os.Exit(1)
|
|
}
|
|
|
|
switch args[0] {
|
|
case "list":
|
|
vaultList()
|
|
case "search":
|
|
vaultSearch(args[1:])
|
|
case "index":
|
|
vaultIndex(args[1:])
|
|
case "info":
|
|
vaultInfo(args[1:])
|
|
case "layout-ensure":
|
|
vaultLayoutEnsure(args[1:])
|
|
case "profile":
|
|
vaultProfile(args[1:])
|
|
case "dedupe":
|
|
vaultDedupe(args[1:])
|
|
case "aggregate":
|
|
vaultAggregate()
|
|
case "doctor":
|
|
vaultDoctorCmd()
|
|
case "audit":
|
|
vaultAudit(args[1:])
|
|
case "help", "-h", "--help":
|
|
vaultUsage()
|
|
default:
|
|
fmt.Fprintf(os.Stderr, "unknown vault subcommand: %s\n", args[0])
|
|
vaultUsage()
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func vaultUsage() {
|
|
fmt.Println(`fn vault — manage data vaults
|
|
|
|
Usage:
|
|
fn vault list List declared vaults
|
|
fn vault search <query> [--limit N] [--vault <name>] [--json]
|
|
Search files in vault(s)
|
|
fn vault index <name> Index a vault (scan + write)
|
|
fn vault index --all Index all declared vaults
|
|
fn vault info <name> Show vault summary and stats
|
|
fn vault layout-ensure <name> [--dry-run]
|
|
Ensure canonical data/knowledge layout
|
|
fn vault profile <name> Profile CSV/PDF/MD files in a vault
|
|
fn vault dedupe <name> Report duplicate files in a vault
|
|
fn vault aggregate Aggregate all vault indexes into registry.db
|
|
fn vault doctor Audit vault health (alias for fn doctor vaults)
|
|
fn vault audit <name> Run full audit pipeline on a vault
|
|
fn vault audit --all Run audit on all declared vaults`)
|
|
}
|
|
|
|
// --- list ---
|
|
|
|
func vaultList() {
|
|
db := openDB()
|
|
defer db.Close()
|
|
|
|
vaults, err := db.AllVaults()
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
if len(vaults) == 0 {
|
|
fmt.Println("No vaults registered. Declare vaults in projects/*/vaults/vault.yaml and run 'fn index'.")
|
|
return
|
|
}
|
|
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
fmt.Fprintln(w, "NAME\tPROJECT\tPATH\tTAGS")
|
|
for _, v := range vaults {
|
|
tags := strings.Join(v.Tags, ",")
|
|
path := v.Path
|
|
if path == "" {
|
|
path = "-"
|
|
}
|
|
proj := v.ProjectID
|
|
if proj == "" {
|
|
proj = "-"
|
|
}
|
|
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", v.Name, proj, path, tags)
|
|
}
|
|
w.Flush()
|
|
}
|
|
|
|
// --- search ---
|
|
|
|
func vaultSearch(args []string) {
|
|
var query string
|
|
var vaultName string
|
|
var limitN int
|
|
var jsonOut bool
|
|
|
|
i := 0
|
|
for i < len(args) {
|
|
switch args[i] {
|
|
case "--limit":
|
|
i++
|
|
if i >= len(args) {
|
|
fmt.Fprintln(os.Stderr, "--limit requires a value")
|
|
os.Exit(1)
|
|
}
|
|
n, err := strconv.Atoi(args[i])
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "--limit: invalid number %q\n", args[i])
|
|
os.Exit(1)
|
|
}
|
|
limitN = n
|
|
case "--vault":
|
|
i++
|
|
if i >= len(args) {
|
|
fmt.Fprintln(os.Stderr, "--vault requires a value")
|
|
os.Exit(1)
|
|
}
|
|
vaultName = args[i]
|
|
case "--json":
|
|
jsonOut = true
|
|
default:
|
|
if query == "" && !strings.HasPrefix(args[i], "--") {
|
|
query = args[i]
|
|
}
|
|
}
|
|
i++
|
|
}
|
|
|
|
if query == "" {
|
|
fmt.Fprintln(os.Stderr, "usage: fn vault search <query> [--limit N] [--vault <name>] [--json]")
|
|
os.Exit(1)
|
|
}
|
|
if limitN <= 0 {
|
|
limitN = 50
|
|
}
|
|
|
|
db := openDB()
|
|
defer db.Close()
|
|
|
|
// Determine which vaults to search.
|
|
vaults, err := resolveSearchVaults(db, vaultName)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
if len(vaults) == 0 {
|
|
fmt.Fprintln(os.Stderr, "no vaults found")
|
|
os.Exit(1)
|
|
}
|
|
|
|
var allHits []infra.VaultSearchHit
|
|
for _, v := range vaults {
|
|
if v.Path == "" {
|
|
continue
|
|
}
|
|
hits, err := infra.VaultSearch(v.Path, query, limitN)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "warn: vault %s: %v\n", v.Name, err)
|
|
continue
|
|
}
|
|
allHits = append(allHits, hits...)
|
|
}
|
|
|
|
if jsonOut {
|
|
if allHits == nil {
|
|
allHits = []infra.VaultSearchHit{}
|
|
}
|
|
b, err := json.MarshalIndent(allHits, "", " ")
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "json error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
fmt.Println(string(b))
|
|
return
|
|
}
|
|
|
|
if len(allHits) == 0 {
|
|
fmt.Println("No results.")
|
|
return
|
|
}
|
|
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
for _, h := range allHits {
|
|
mtime := time.Unix(h.Mtime, 0).UTC().Format("2006-01-02")
|
|
sizeStr := formatBytes(h.Size)
|
|
snip := truncate(h.Snippet, 50)
|
|
fmt.Fprintf(w, "[%s]\t%s\t%s\t%s\t%s\t%s\n",
|
|
h.VaultName, h.RelPath, sizeStr, mtime, h.Mime, snip)
|
|
}
|
|
w.Flush()
|
|
}
|
|
|
|
// --- index ---
|
|
|
|
func vaultIndex(args []string) {
|
|
indexAll := false
|
|
var name string
|
|
|
|
for _, a := range args {
|
|
if a == "--all" {
|
|
indexAll = true
|
|
} else if !strings.HasPrefix(a, "--") && name == "" {
|
|
name = a
|
|
}
|
|
}
|
|
|
|
if !indexAll && name == "" {
|
|
fmt.Fprintln(os.Stderr, "usage: fn vault index <name> | --all")
|
|
os.Exit(1)
|
|
}
|
|
|
|
db := openDB()
|
|
defer db.Close()
|
|
|
|
var vaults []registry.Vault
|
|
if indexAll {
|
|
all, err := db.AllVaults()
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
vaults = all
|
|
} else {
|
|
v, err := resolveVaultByName(db, name)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
vaults = []registry.Vault{*v}
|
|
}
|
|
|
|
for _, v := range vaults {
|
|
if v.Path == "" {
|
|
fmt.Printf("vault %s: path not set, skipping\n", v.Name)
|
|
continue
|
|
}
|
|
if err := runVaultIndex(v); err != nil {
|
|
fmt.Fprintf(os.Stderr, "vault %s: %v\n", v.Name, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// runVaultIndex runs the full inventory scan + write cycle for a single vault.
|
|
func runVaultIndex(v registry.Vault) error {
|
|
fmt.Printf("indexing %s (%s)...\n", v.Name, v.Path)
|
|
|
|
files, err := infra.VaultInventoryScan(v.Path, v.ID, v.Name)
|
|
if err != nil {
|
|
return fmt.Errorf("scan: %w", err)
|
|
}
|
|
|
|
vaultDB, err := infra.VaultIndexOpen(v.Path)
|
|
if err != nil {
|
|
return fmt.Errorf("open index: %w", err)
|
|
}
|
|
defer vaultDB.Close()
|
|
|
|
report, err := infra.VaultIndexWrite(vaultDB, files, true)
|
|
if err != nil {
|
|
return fmt.Errorf("write: %w", err)
|
|
}
|
|
|
|
fmt.Printf(" indexed %d files, %d inserted, %d updated, %d pruned\n",
|
|
len(files), report.Inserted, report.Updated, report.Pruned)
|
|
return nil
|
|
}
|
|
|
|
// --- info ---
|
|
|
|
func vaultInfo(args []string) {
|
|
if len(args) < 1 {
|
|
fmt.Fprintln(os.Stderr, "usage: fn vault info <name>")
|
|
os.Exit(1)
|
|
}
|
|
name := args[0]
|
|
|
|
db := openDB()
|
|
defer db.Close()
|
|
|
|
v, err := resolveVaultByName(db, name)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
if v.Path == "" {
|
|
fmt.Fprintf(os.Stderr, "vault %s has no path set\n", name)
|
|
os.Exit(1)
|
|
}
|
|
|
|
vaultDB, err := infra.VaultIndexOpen(v.Path)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error opening vault index: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
defer vaultDB.Close()
|
|
|
|
// Summary stats.
|
|
var totalFiles int
|
|
var totalSize int64
|
|
vaultDB.QueryRow(`SELECT count(*), coalesce(sum(size),0) FROM files`).Scan(&totalFiles, &totalSize)
|
|
|
|
var lastIndexedAt int64
|
|
vaultDB.QueryRow(`SELECT coalesce(max(indexed_at), 0) FROM files`).Scan(&lastIndexedAt)
|
|
|
|
lastIndexed := "-"
|
|
if lastIndexedAt > 0 {
|
|
lastIndexed = time.Unix(lastIndexedAt, 0).UTC().Format("2006-01-02 15:04:05")
|
|
}
|
|
|
|
fmt.Printf("Vault: %s (%s)\n", v.Name, v.Path)
|
|
fmt.Printf("Files: %d Total: %s Last indexed: %s\n\n",
|
|
totalFiles, formatBytes(totalSize), lastIndexed)
|
|
|
|
// By bucket.
|
|
bucketRows, err := vaultDB.Query(`
|
|
SELECT bucket, sub_bucket, count(*), coalesce(sum(size),0)
|
|
FROM files
|
|
GROUP BY bucket, sub_bucket
|
|
ORDER BY bucket, sub_bucket`)
|
|
if err == nil {
|
|
defer bucketRows.Close()
|
|
fmt.Println("By bucket:")
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
for bucketRows.Next() {
|
|
var bucket, sub string
|
|
var cnt int
|
|
var sz int64
|
|
bucketRows.Scan(&bucket, &sub, &cnt, &sz)
|
|
key := bucket
|
|
if sub != "" {
|
|
key = bucket + "/" + sub
|
|
}
|
|
fmt.Fprintf(w, " %s\t%d files\t%s\n", key, cnt, formatBytes(sz))
|
|
}
|
|
w.Flush()
|
|
fmt.Println()
|
|
}
|
|
|
|
// By mime.
|
|
mimeRows, err := vaultDB.Query(`
|
|
SELECT mime, count(*), coalesce(sum(size),0)
|
|
FROM files
|
|
GROUP BY mime
|
|
ORDER BY sum(size) DESC`)
|
|
if err == nil {
|
|
defer mimeRows.Close()
|
|
fmt.Println("By mime:")
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
for mimeRows.Next() {
|
|
var mime string
|
|
var cnt int
|
|
var sz int64
|
|
mimeRows.Scan(&mime, &cnt, &sz)
|
|
if mime == "" {
|
|
mime = "unknown"
|
|
}
|
|
fmt.Fprintf(w, " %s\t%d files\t%s\n", mime, cnt, formatBytes(sz))
|
|
}
|
|
w.Flush()
|
|
}
|
|
}
|
|
|
|
// --- layout-ensure ---
|
|
|
|
func vaultLayoutEnsure(args []string) {
|
|
dryRun := false
|
|
var name string
|
|
|
|
for _, a := range args {
|
|
switch a {
|
|
case "--dry-run":
|
|
dryRun = true
|
|
default:
|
|
if !strings.HasPrefix(a, "--") && name == "" {
|
|
name = a
|
|
}
|
|
}
|
|
}
|
|
|
|
if name == "" {
|
|
fmt.Fprintln(os.Stderr, "usage: fn vault layout-ensure <name> [--dry-run]")
|
|
os.Exit(1)
|
|
}
|
|
|
|
db := openDB()
|
|
defer db.Close()
|
|
|
|
v, err := resolveVaultByName(db, name)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
if v.Path == "" {
|
|
fmt.Fprintf(os.Stderr, "vault %s has no path set\n", name)
|
|
os.Exit(1)
|
|
}
|
|
|
|
report, err := infra.VaultLayoutEnsure(v.Path, dryRun)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
prefix := ""
|
|
if report.DryRun {
|
|
prefix = "[dry-run] "
|
|
}
|
|
fmt.Printf("%sVault: %s (%s)\n", prefix, name, report.VaultPath)
|
|
if len(report.Created) > 0 {
|
|
fmt.Printf("%s created: %s\n", prefix, strings.Join(report.Created, ", "))
|
|
}
|
|
if len(report.Migrated) > 0 {
|
|
fmt.Printf("%s migrated: %s\n", prefix, strings.Join(report.Migrated, "; "))
|
|
}
|
|
if len(report.AlreadyOK) > 0 {
|
|
fmt.Printf("%s already ok: %s\n", prefix, strings.Join(report.AlreadyOK, ", "))
|
|
}
|
|
if len(report.Skipped) > 0 {
|
|
fmt.Printf("%s skipped (unrecognized): %s\n", prefix, strings.Join(report.Skipped, ", "))
|
|
}
|
|
if len(report.Created) == 0 && len(report.Migrated) == 0 {
|
|
fmt.Printf("%s layout already canonical\n", prefix)
|
|
}
|
|
}
|
|
|
|
// --- profile ---
|
|
|
|
// profileKind returns "csv", "pdf", "md", or "" for a file based on extension/mime.
|
|
func profileKind(ext, mime string) string {
|
|
ext = strings.ToLower(strings.TrimPrefix(ext, "."))
|
|
switch ext {
|
|
case "csv":
|
|
return "csv"
|
|
case "pdf":
|
|
return "pdf"
|
|
case "md", "markdown":
|
|
return "md"
|
|
}
|
|
// Fall back to mime
|
|
if strings.Contains(mime, "csv") || strings.Contains(mime, "text/csv") {
|
|
return "csv"
|
|
}
|
|
if strings.Contains(mime, "pdf") {
|
|
return "pdf"
|
|
}
|
|
if strings.Contains(mime, "markdown") {
|
|
return "md"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func vaultProfile(args []string) {
|
|
if len(args) < 1 || strings.HasPrefix(args[0], "--") {
|
|
fmt.Fprintln(os.Stderr, "usage: fn vault profile <name>")
|
|
os.Exit(1)
|
|
}
|
|
name := args[0]
|
|
|
|
db := openDB()
|
|
defer db.Close()
|
|
|
|
v, err := resolveVaultByName(db, name)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
if v.Path == "" {
|
|
fmt.Fprintf(os.Stderr, "vault %s has no path set\n", name)
|
|
os.Exit(1)
|
|
}
|
|
|
|
vaultDB, err := infra.VaultIndexOpen(v.Path)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error opening vault index: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
defer vaultDB.Close()
|
|
|
|
// List files with their ext and mime from vault_index.db
|
|
type fileRow struct {
|
|
RelPath string
|
|
Ext string
|
|
Mime string
|
|
}
|
|
rows, err := vaultDB.Query(`SELECT rel_path, ext, mime FROM files ORDER BY rel_path`)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error querying vault index: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
var files []fileRow
|
|
for rows.Next() {
|
|
var f fileRow
|
|
if scanErr := rows.Scan(&f.RelPath, &f.Ext, &f.Mime); scanErr == nil {
|
|
files = append(files, f)
|
|
}
|
|
}
|
|
rows.Close()
|
|
|
|
if len(files) == 0 {
|
|
fmt.Printf("vault %s: no files in index (run 'fn vault index %s' first)\n", name, name)
|
|
return
|
|
}
|
|
|
|
// Locate the Python dispatcher
|
|
registryRoot := root()
|
|
dispatchScript := filepath.Join(registryRoot, "python", "functions", "infra", "vault_profile_dispatch.py")
|
|
if _, err := os.Stat(dispatchScript); os.IsNotExist(err) {
|
|
fmt.Fprintf(os.Stderr, "error: dispatch script not found: %s\n", dispatchScript)
|
|
os.Exit(1)
|
|
}
|
|
|
|
pythonBin := filepath.Join(registryRoot, "python", ".venv", "bin", "python3")
|
|
if _, err := os.Stat(pythonBin); os.IsNotExist(err) {
|
|
pythonBin = "python3"
|
|
}
|
|
|
|
pythonPath := filepath.Join(registryRoot, "python", "functions")
|
|
|
|
var nCSV, nPDF, nMD, nSkip, nErr int
|
|
fmt.Printf("profiling vault: %s (%s)\n", name, v.Path)
|
|
|
|
for _, f := range files {
|
|
kind := profileKind(f.Ext, f.Mime)
|
|
if kind == "" {
|
|
nSkip++
|
|
continue
|
|
}
|
|
|
|
cmd := exec.Command(pythonBin, dispatchScript,
|
|
"--vault", v.Path,
|
|
"--rel-path", f.RelPath,
|
|
"--kind", kind,
|
|
)
|
|
cmd.Env = append(os.Environ(),
|
|
"PYTHONPATH="+pythonPath,
|
|
"FN_REGISTRY_ROOT="+registryRoot,
|
|
)
|
|
|
|
var stderr bytes.Buffer
|
|
cmd.Stderr = &stderr
|
|
if runErr := cmd.Run(); runErr != nil {
|
|
fmt.Fprintf(os.Stderr, " warn: %s (%s): %v\n", f.RelPath, kind, strings.TrimSpace(stderr.String()))
|
|
nErr++
|
|
continue
|
|
}
|
|
|
|
switch kind {
|
|
case "csv":
|
|
nCSV++
|
|
case "pdf":
|
|
nPDF++
|
|
case "md":
|
|
nMD++
|
|
}
|
|
}
|
|
|
|
fmt.Printf(" csv: %d pdf: %d md: %d skipped: %d errors: %d\n",
|
|
nCSV, nPDF, nMD, nSkip, nErr)
|
|
}
|
|
|
|
// --- dedupe ---
|
|
|
|
func vaultDedupe(args []string) {
|
|
if len(args) < 1 || strings.HasPrefix(args[0], "--") {
|
|
fmt.Fprintln(os.Stderr, "usage: fn vault dedupe <name>")
|
|
os.Exit(1)
|
|
}
|
|
name := args[0]
|
|
|
|
db := openDB()
|
|
defer db.Close()
|
|
|
|
v, err := resolveVaultByName(db, name)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
if v.Path == "" {
|
|
fmt.Fprintf(os.Stderr, "vault %s has no path set\n", name)
|
|
os.Exit(1)
|
|
}
|
|
|
|
vaultDB, err := infra.VaultIndexOpen(v.Path)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error opening vault index: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
defer vaultDB.Close()
|
|
|
|
// Find duplicates: groups with the same sha256 hash and size > 0
|
|
rows, err := vaultDB.Query(`
|
|
SELECT sha256, count(*) as cnt, sum(size) as total_size, min(size) as file_size,
|
|
group_concat(rel_path, '|') as paths
|
|
FROM files
|
|
WHERE sha256 != '' AND size > 0
|
|
GROUP BY sha256
|
|
HAVING count(*) > 1
|
|
ORDER BY sum(size) DESC
|
|
LIMIT 50`)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error querying duplicates: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
defer rows.Close()
|
|
|
|
type dupeGroup struct {
|
|
Sha256 string
|
|
Count int
|
|
TotalSize int64
|
|
FileSize int64
|
|
Paths []string
|
|
}
|
|
var groups []dupeGroup
|
|
var totalWasted int64
|
|
for rows.Next() {
|
|
var g dupeGroup
|
|
var pathsConcat string
|
|
if scanErr := rows.Scan(&g.Sha256, &g.Count, &g.TotalSize, &g.FileSize, &pathsConcat); scanErr == nil {
|
|
g.Paths = strings.Split(pathsConcat, "|")
|
|
wasted := g.FileSize * int64(g.Count-1)
|
|
totalWasted += wasted
|
|
groups = append(groups, g)
|
|
}
|
|
}
|
|
rows.Close()
|
|
|
|
fmt.Printf("Vault: %s — duplicate report\n\n", name)
|
|
if len(groups) == 0 {
|
|
fmt.Println(" No duplicates found.")
|
|
return
|
|
}
|
|
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
fmt.Fprintln(w, "SHA256\tCOUNT\tSIZE\tWASTED\tPATHS")
|
|
for _, g := range groups {
|
|
sha := g.Sha256
|
|
if len(sha) > 12 {
|
|
sha = sha[:12] + "..."
|
|
}
|
|
wasted := g.FileSize * int64(g.Count-1)
|
|
pathsStr := strings.Join(g.Paths, ", ")
|
|
if len(pathsStr) > 60 {
|
|
pathsStr = pathsStr[:57] + "..."
|
|
}
|
|
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n",
|
|
sha, g.Count, formatBytes(g.FileSize), formatBytes(wasted), pathsStr)
|
|
}
|
|
w.Flush()
|
|
fmt.Printf("\nTotal wasted space: %s (%d duplicate groups)\n",
|
|
formatBytes(totalWasted), len(groups))
|
|
}
|
|
|
|
// --- aggregate ---
|
|
|
|
func vaultAggregate() {
|
|
registryRoot := root()
|
|
fmt.Println("aggregating vault indexes into registry.db...")
|
|
report, err := infra.VaultAggregateIndex(registryRoot)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
fmt.Printf(" vaults processed: %d skipped: %d total files: %d\n",
|
|
report.VaultsProcessed, report.VaultsSkipped, report.TotalFiles)
|
|
if len(report.Errors) > 0 {
|
|
for _, e := range report.Errors {
|
|
fmt.Fprintf(os.Stderr, " warn: %s\n", e)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- doctor (vault alias) ---
|
|
|
|
func vaultDoctorCmd() {
|
|
registryRoot := root()
|
|
entries, err := infra.VaultDoctor(registryRoot)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
if len(entries) == 0 {
|
|
fmt.Println("No vaults registered.")
|
|
return
|
|
}
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
fmt.Fprintln(w, "VAULT\tSTATUS\tDISK\tINDEXED\tISSUES")
|
|
for _, e := range entries {
|
|
issues := "-"
|
|
if len(e.Issues) > 0 {
|
|
issues = strings.Join(e.Issues, ", ")
|
|
}
|
|
fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%s\n",
|
|
e.VaultName, e.Status, e.DiskFiles, e.IndexedFiles, issues)
|
|
}
|
|
w.Flush()
|
|
}
|
|
|
|
// --- audit ---
|
|
|
|
func vaultAudit(args []string) {
|
|
auditAll := false
|
|
skipProfilers := false
|
|
dryRunLayout := false
|
|
var names []string
|
|
|
|
for _, a := range args {
|
|
switch a {
|
|
case "--all":
|
|
auditAll = true
|
|
case "--skip-profilers":
|
|
skipProfilers = true
|
|
case "--dry-run-layout":
|
|
dryRunLayout = true
|
|
default:
|
|
if !strings.HasPrefix(a, "--") {
|
|
names = append(names, a)
|
|
}
|
|
}
|
|
}
|
|
|
|
if !auditAll && len(names) == 0 {
|
|
fmt.Fprintln(os.Stderr, "usage: fn vault audit <name> | --all [--skip-profilers] [--dry-run-layout]")
|
|
os.Exit(1)
|
|
}
|
|
|
|
db := openDB()
|
|
defer db.Close()
|
|
|
|
var vaults []registry.Vault
|
|
if auditAll {
|
|
all, err := db.AllVaults()
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error listing vaults: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
vaults = all
|
|
} else {
|
|
for _, n := range names {
|
|
v, err := resolveVaultByName(db, n)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
vaults = append(vaults, *v)
|
|
}
|
|
}
|
|
db.Close()
|
|
|
|
type auditResult struct {
|
|
Name string
|
|
Status string
|
|
Errors []string
|
|
}
|
|
var results []auditResult
|
|
|
|
for _, v := range vaults {
|
|
fmt.Printf("\n=== vault: %s ===\n", v.Name)
|
|
if v.Path == "" {
|
|
fmt.Printf(" SKIP: no path set\n")
|
|
results = append(results, auditResult{Name: v.Name, Status: "skip"})
|
|
continue
|
|
}
|
|
|
|
var errs []string
|
|
|
|
// Step 1: layout-ensure
|
|
fmt.Printf(" [1/5] layout-ensure")
|
|
if dryRunLayout {
|
|
fmt.Printf(" (dry-run)")
|
|
}
|
|
fmt.Println()
|
|
layoutReport, layoutErr := infra.VaultLayoutEnsure(v.Path, dryRunLayout)
|
|
if layoutErr != nil {
|
|
fmt.Printf(" ERROR: %v\n", layoutErr)
|
|
errs = append(errs, "layout-ensure: "+layoutErr.Error())
|
|
} else {
|
|
if len(layoutReport.Created) > 0 {
|
|
fmt.Printf(" created: %s\n", strings.Join(layoutReport.Created, ", "))
|
|
}
|
|
if len(layoutReport.Migrated) > 0 {
|
|
fmt.Printf(" migrated: %s\n", strings.Join(layoutReport.Migrated, "; "))
|
|
}
|
|
if len(layoutReport.Created) == 0 && len(layoutReport.Migrated) == 0 {
|
|
fmt.Printf(" layout ok\n")
|
|
}
|
|
}
|
|
|
|
// Step 2: index
|
|
fmt.Println(" [2/5] index")
|
|
if indexErr := runVaultIndex(v); indexErr != nil {
|
|
fmt.Printf(" ERROR: %v\n", indexErr)
|
|
errs = append(errs, "index: "+indexErr.Error())
|
|
}
|
|
|
|
// Step 3: profile (optional)
|
|
if !skipProfilers {
|
|
fmt.Println(" [3/5] profile")
|
|
runVaultProfileSubcmd(v)
|
|
} else {
|
|
fmt.Println(" [3/5] profile (skipped)")
|
|
}
|
|
|
|
// Step 4: dedupe (informational, non-fatal)
|
|
fmt.Println(" [4/5] dedupe")
|
|
runVaultDedupeSubcmd(v)
|
|
|
|
// Step 5: aggregate is done once after all vaults
|
|
fmt.Println(" [5/5] aggregate (deferred to end)")
|
|
|
|
status := "ok"
|
|
if len(errs) > 0 {
|
|
status = "error"
|
|
}
|
|
results = append(results, auditResult{Name: v.Name, Status: status, Errors: errs})
|
|
}
|
|
|
|
// Final aggregate
|
|
fmt.Println("\n=== aggregate ===")
|
|
vaultAggregate()
|
|
|
|
// Summary table
|
|
fmt.Println("\n=== summary ===")
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
fmt.Fprintln(w, "VAULT\tSTATUS\tERRORS")
|
|
for _, r := range results {
|
|
errStr := "-"
|
|
if len(r.Errors) > 0 {
|
|
errStr = strings.Join(r.Errors, "; ")
|
|
}
|
|
fmt.Fprintf(w, "%s\t%s\t%s\n", r.Name, r.Status, errStr)
|
|
}
|
|
w.Flush()
|
|
}
|
|
|
|
// runVaultProfileSubcmd runs the profiling loop for a single vault (used by audit).
|
|
func runVaultProfileSubcmd(v registry.Vault) {
|
|
registryRoot := root()
|
|
|
|
vaultDB, err := infra.VaultIndexOpen(v.Path)
|
|
if err != nil {
|
|
fmt.Printf(" warn: cannot open vault index: %v\n", err)
|
|
return
|
|
}
|
|
defer vaultDB.Close()
|
|
|
|
rows, err := vaultDB.Query(`SELECT rel_path, ext, mime FROM files ORDER BY rel_path`)
|
|
if err != nil {
|
|
fmt.Printf(" warn: query failed: %v\n", err)
|
|
return
|
|
}
|
|
type fileRow struct {
|
|
RelPath string
|
|
Ext string
|
|
Mime string
|
|
}
|
|
var files []fileRow
|
|
for rows.Next() {
|
|
var f fileRow
|
|
if scanErr := rows.Scan(&f.RelPath, &f.Ext, &f.Mime); scanErr == nil {
|
|
files = append(files, f)
|
|
}
|
|
}
|
|
rows.Close()
|
|
|
|
if len(files) == 0 {
|
|
fmt.Printf(" no files in index\n")
|
|
return
|
|
}
|
|
|
|
dispatchScript := filepath.Join(registryRoot, "python", "functions", "infra", "vault_profile_dispatch.py")
|
|
pythonBin := filepath.Join(registryRoot, "python", ".venv", "bin", "python3")
|
|
if _, statErr := os.Stat(pythonBin); os.IsNotExist(statErr) {
|
|
pythonBin = "python3"
|
|
}
|
|
pythonPath := filepath.Join(registryRoot, "python", "functions")
|
|
|
|
var nCSV, nPDF, nMD, nSkip, nErr int
|
|
for _, f := range files {
|
|
kind := profileKind(f.Ext, f.Mime)
|
|
if kind == "" {
|
|
nSkip++
|
|
continue
|
|
}
|
|
cmd := exec.Command(pythonBin, dispatchScript,
|
|
"--vault", v.Path,
|
|
"--rel-path", f.RelPath,
|
|
"--kind", kind,
|
|
)
|
|
cmd.Env = append(os.Environ(),
|
|
"PYTHONPATH="+pythonPath,
|
|
"FN_REGISTRY_ROOT="+registryRoot,
|
|
)
|
|
var stderr bytes.Buffer
|
|
cmd.Stderr = &stderr
|
|
if runErr := cmd.Run(); runErr != nil {
|
|
nErr++
|
|
} else {
|
|
switch kind {
|
|
case "csv":
|
|
nCSV++
|
|
case "pdf":
|
|
nPDF++
|
|
case "md":
|
|
nMD++
|
|
}
|
|
}
|
|
}
|
|
fmt.Printf(" csv: %d pdf: %d md: %d skipped: %d errors: %d\n",
|
|
nCSV, nPDF, nMD, nSkip, nErr)
|
|
}
|
|
|
|
// runVaultDedupeSubcmd prints dedupe summary for a single vault (used by audit).
|
|
func runVaultDedupeSubcmd(v registry.Vault) {
|
|
vaultDB, err := infra.VaultIndexOpen(v.Path)
|
|
if err != nil {
|
|
fmt.Printf(" warn: cannot open vault index: %v\n", err)
|
|
return
|
|
}
|
|
defer vaultDB.Close()
|
|
|
|
var dupeGroups int
|
|
var totalWasted int64
|
|
rows, err := vaultDB.Query(`
|
|
SELECT count(*) as cnt, min(size) as file_size
|
|
FROM files
|
|
WHERE sha256 != '' AND size > 0
|
|
GROUP BY sha256
|
|
HAVING count(*) > 1`)
|
|
if err != nil {
|
|
fmt.Printf(" warn: query failed: %v\n", err)
|
|
return
|
|
}
|
|
for rows.Next() {
|
|
var cnt int
|
|
var fileSize int64
|
|
if scanErr := rows.Scan(&cnt, &fileSize); scanErr == nil {
|
|
dupeGroups++
|
|
totalWasted += fileSize * int64(cnt-1)
|
|
}
|
|
}
|
|
rows.Close()
|
|
|
|
if dupeGroups == 0 {
|
|
fmt.Printf(" no duplicates\n")
|
|
} else {
|
|
fmt.Printf(" %d duplicate groups, %s wasted (run 'fn vault dedupe %s' for details)\n",
|
|
dupeGroups, formatBytes(totalWasted), v.Name)
|
|
}
|
|
}
|
|
|
|
// suppress unused sql import if needed
|
|
var _ = sql.ErrNoRows
|
|
|
|
// --- helpers ---
|
|
|
|
// resolveVaultByName looks up a vault by name in registry.db.
|
|
// Returns an error if not found or if name is ambiguous.
|
|
func resolveVaultByName(db *registry.DB, name string) (*registry.Vault, error) {
|
|
// Try direct ID first.
|
|
if v, err := db.GetVault(name); err == nil {
|
|
return v, nil
|
|
}
|
|
|
|
// Search by name.
|
|
vaults, err := db.SearchVaults(name, "")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("search vaults: %w", err)
|
|
}
|
|
|
|
// Exact name match.
|
|
var exact []registry.Vault
|
|
for _, v := range vaults {
|
|
if v.Name == name {
|
|
exact = append(exact, v)
|
|
}
|
|
}
|
|
if len(exact) == 1 {
|
|
return &exact[0], nil
|
|
}
|
|
if len(exact) > 1 {
|
|
ids := make([]string, len(exact))
|
|
for i, v := range exact {
|
|
ids[i] = v.ID
|
|
}
|
|
return nil, fmt.Errorf("ambiguous vault name %q: %s", name, strings.Join(ids, ", "))
|
|
}
|
|
|
|
// Partial match fallback.
|
|
if len(vaults) == 1 {
|
|
return &vaults[0], nil
|
|
}
|
|
if len(vaults) > 1 {
|
|
ids := make([]string, len(vaults))
|
|
for i, v := range vaults {
|
|
ids[i] = v.ID
|
|
}
|
|
return nil, fmt.Errorf("ambiguous vault %q — use full name or ID: %s", name, strings.Join(ids, ", "))
|
|
}
|
|
|
|
return nil, fmt.Errorf("vault not found: %q (run 'fn index' to register vaults)", name)
|
|
}
|
|
|
|
// resolveSearchVaults returns the vault(s) to search.
|
|
// If name is non-empty, returns only that vault. Otherwise returns all vaults.
|
|
func resolveSearchVaults(db *registry.DB, name string) ([]registry.Vault, error) {
|
|
if name != "" {
|
|
v, err := resolveVaultByName(db, name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return []registry.Vault{*v}, nil
|
|
}
|
|
return db.AllVaults()
|
|
}
|
|
|
|
// formatBytes formats a byte count as a human-readable string (KB, MB, GB).
|
|
func formatBytes(b int64) string {
|
|
switch {
|
|
case b >= 1<<30:
|
|
return fmt.Sprintf("%.1f GB", float64(b)/float64(1<<30))
|
|
case b >= 1<<20:
|
|
return fmt.Sprintf("%.1f MB", float64(b)/float64(1<<20))
|
|
case b >= 1<<10:
|
|
return fmt.Sprintf("%.1f KB", float64(b)/float64(1<<10))
|
|
default:
|
|
return fmt.Sprintf("%d B", b)
|
|
}
|
|
}
|
|
|
|
// resolveVaultPath resolves the actual directory path for a vault,
|
|
// following symlinks if needed. Returns the resolved absolute path.
|
|
func resolveVaultPath(vaultPath string) string {
|
|
resolved, err := filepath.EvalSymlinks(vaultPath)
|
|
if err != nil {
|
|
return vaultPath
|
|
}
|
|
return resolved
|
|
}
|