a802f59f55
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
175 lines
4.8 KiB
Go
175 lines
4.8 KiB
Go
package infra
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// VaultInventoryScan walks vaultPath and returns a VaultFile slice (sorted by RelPath)
|
|
// for every regular file found, skipping:
|
|
// - vault_index.db, vault_index.db-shm, vault_index.db-wal
|
|
// - .git/ directories at any depth
|
|
// - hidden files/dirs (names starting with ".") at the vault root level only
|
|
//
|
|
// For each file it computes: relative path (forward slashes), size, mtime (unix UTC),
|
|
// sha256 (streaming, hex lowercase), MIME type, extension, bucket and sub-bucket.
|
|
//
|
|
// MIME detection priority:
|
|
// 1. Extension override: .csv → text/csv, .md → text/markdown, .parquet → application/parquet
|
|
// 2. http.DetectContentType on first 512 bytes (magic bytes, stdlib)
|
|
//
|
|
// NOTE: file_validate_type_go_infra (FileValidateType) was not used here because its
|
|
// signature requires an allowedTypes allowlist and returns (mime, bool) — it is designed
|
|
// for upload validation, not for open-ended inventory scanning where any MIME is valid.
|
|
// http.DetectContentType provides the same magic-byte detection without the allowlist
|
|
// coupling and handles a broader set of formats including text/plain for CSV fallback.
|
|
func VaultInventoryScan(vaultPath, vaultID, vaultName string) ([]VaultFile, error) {
|
|
var files []VaultFile
|
|
|
|
err := filepath.WalkDir(vaultPath, func(path string, d os.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
name := d.Name()
|
|
|
|
// Skip .git directories at any depth.
|
|
if d.IsDir() && name == ".git" {
|
|
return filepath.SkipDir
|
|
}
|
|
|
|
// Skip hidden entries (names starting with ".") at vault root only.
|
|
if strings.HasPrefix(name, ".") {
|
|
rel, relErr := filepath.Rel(vaultPath, path)
|
|
if relErr == nil {
|
|
// At root level the relative path has no separator.
|
|
if !strings.Contains(filepath.ToSlash(rel), "/") {
|
|
if d.IsDir() {
|
|
return filepath.SkipDir
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
// Skip vault_index.db and its WAL/SHM sidecar files.
|
|
if name == "vault_index.db" || name == "vault_index.db-shm" || name == "vault_index.db-wal" {
|
|
return nil
|
|
}
|
|
|
|
rel, err := filepath.Rel(vaultPath, path)
|
|
if err != nil {
|
|
return fmt.Errorf("vault_inventory_scan: rel path for %q: %w", path, err)
|
|
}
|
|
rel = filepath.ToSlash(rel)
|
|
|
|
info, err := d.Info()
|
|
if err != nil {
|
|
return fmt.Errorf("vault_inventory_scan: stat %q: %w", path, err)
|
|
}
|
|
|
|
// Compute sha256 by streaming — avoids loading large files into memory.
|
|
sha, err := fileSha256(path)
|
|
if err != nil {
|
|
return fmt.Errorf("vault_inventory_scan: sha256 %q: %w", path, err)
|
|
}
|
|
|
|
mime, err := detectVaultFileMime(path, name)
|
|
if err != nil {
|
|
return fmt.Errorf("vault_inventory_scan: mime %q: %w", path, err)
|
|
}
|
|
|
|
ext := strings.ToLower(filepath.Ext(name))
|
|
bucket, subBucket := vaultBucketParts(rel)
|
|
|
|
files = append(files, VaultFile{
|
|
VaultID: vaultID,
|
|
VaultName: vaultName,
|
|
RelPath: rel,
|
|
Size: info.Size(),
|
|
Mtime: info.ModTime().UTC().Unix(),
|
|
Sha256: sha,
|
|
Mime: mime,
|
|
Ext: ext,
|
|
Bucket: bucket,
|
|
SubBucket: subBucket,
|
|
})
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("vault_inventory_scan: walk %q: %w", vaultPath, err)
|
|
}
|
|
|
|
sort.Slice(files, func(i, j int) bool {
|
|
return files[i].RelPath < files[j].RelPath
|
|
})
|
|
return files, nil
|
|
}
|
|
|
|
// fileSha256 computes the hex-lowercase SHA-256 of the file at path by streaming.
|
|
func fileSha256(path string) (string, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
h := sha256.New()
|
|
if _, err := io.Copy(h, f); err != nil {
|
|
return "", err
|
|
}
|
|
return hex.EncodeToString(h.Sum(nil)), nil
|
|
}
|
|
|
|
// detectVaultFileMime returns the MIME type for a vault file.
|
|
// Extension overrides take priority; otherwise http.DetectContentType is used.
|
|
func detectVaultFileMime(path, name string) (string, error) {
|
|
ext := strings.ToLower(filepath.Ext(name))
|
|
switch ext {
|
|
case ".csv":
|
|
return "text/csv", nil
|
|
case ".md":
|
|
return "text/markdown", nil
|
|
case ".parquet":
|
|
return "application/parquet", nil
|
|
}
|
|
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
|
|
buf := make([]byte, 512)
|
|
n, err := f.Read(buf)
|
|
if err != nil && err != io.EOF {
|
|
return "", err
|
|
}
|
|
return http.DetectContentType(buf[:n]), nil
|
|
}
|
|
|
|
// vaultBucketParts extracts the top-level bucket ("data" or "knowledge") and
|
|
// the second-level sub-bucket from a forward-slash relative path.
|
|
// Returns empty strings for files at vault root or with no recognisable bucket.
|
|
func vaultBucketParts(relPath string) (bucket, subBucket string) {
|
|
parts := strings.SplitN(relPath, "/", 3)
|
|
if len(parts) < 1 {
|
|
return "", ""
|
|
}
|
|
bucket = parts[0]
|
|
if len(parts) >= 2 {
|
|
subBucket = parts[1]
|
|
}
|
|
return bucket, subBucket
|
|
}
|