chore: auto-commit (95 archivos)
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,174 @@
|
||||
package infra
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// VaultInventoryScan walks vaultPath and returns a VaultFile slice (sorted by RelPath)
|
||||
// for every regular file found, skipping:
|
||||
// - vault_index.db, vault_index.db-shm, vault_index.db-wal
|
||||
// - .git/ directories at any depth
|
||||
// - hidden files/dirs (names starting with ".") at the vault root level only
|
||||
//
|
||||
// For each file it computes: relative path (forward slashes), size, mtime (unix UTC),
|
||||
// sha256 (streaming, hex lowercase), MIME type, extension, bucket and sub-bucket.
|
||||
//
|
||||
// MIME detection priority:
|
||||
// 1. Extension override: .csv → text/csv, .md → text/markdown, .parquet → application/parquet
|
||||
// 2. http.DetectContentType on first 512 bytes (magic bytes, stdlib)
|
||||
//
|
||||
// NOTE: file_validate_type_go_infra (FileValidateType) was not used here because its
|
||||
// signature requires an allowedTypes allowlist and returns (mime, bool) — it is designed
|
||||
// for upload validation, not for open-ended inventory scanning where any MIME is valid.
|
||||
// http.DetectContentType provides the same magic-byte detection without the allowlist
|
||||
// coupling and handles a broader set of formats including text/plain for CSV fallback.
|
||||
func VaultInventoryScan(vaultPath, vaultID, vaultName string) ([]VaultFile, error) {
|
||||
var files []VaultFile
|
||||
|
||||
err := filepath.WalkDir(vaultPath, func(path string, d os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
name := d.Name()
|
||||
|
||||
// Skip .git directories at any depth.
|
||||
if d.IsDir() && name == ".git" {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
// Skip hidden entries (names starting with ".") at vault root only.
|
||||
if strings.HasPrefix(name, ".") {
|
||||
rel, relErr := filepath.Rel(vaultPath, path)
|
||||
if relErr == nil {
|
||||
// At root level the relative path has no separator.
|
||||
if !strings.Contains(filepath.ToSlash(rel), "/") {
|
||||
if d.IsDir() {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip vault_index.db and its WAL/SHM sidecar files.
|
||||
if name == "vault_index.db" || name == "vault_index.db-shm" || name == "vault_index.db-wal" {
|
||||
return nil
|
||||
}
|
||||
|
||||
rel, err := filepath.Rel(vaultPath, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("vault_inventory_scan: rel path for %q: %w", path, err)
|
||||
}
|
||||
rel = filepath.ToSlash(rel)
|
||||
|
||||
info, err := d.Info()
|
||||
if err != nil {
|
||||
return fmt.Errorf("vault_inventory_scan: stat %q: %w", path, err)
|
||||
}
|
||||
|
||||
// Compute sha256 by streaming — avoids loading large files into memory.
|
||||
sha, err := fileSha256(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("vault_inventory_scan: sha256 %q: %w", path, err)
|
||||
}
|
||||
|
||||
mime, err := detectVaultFileMime(path, name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("vault_inventory_scan: mime %q: %w", path, err)
|
||||
}
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(name))
|
||||
bucket, subBucket := vaultBucketParts(rel)
|
||||
|
||||
files = append(files, VaultFile{
|
||||
VaultID: vaultID,
|
||||
VaultName: vaultName,
|
||||
RelPath: rel,
|
||||
Size: info.Size(),
|
||||
Mtime: info.ModTime().UTC().Unix(),
|
||||
Sha256: sha,
|
||||
Mime: mime,
|
||||
Ext: ext,
|
||||
Bucket: bucket,
|
||||
SubBucket: subBucket,
|
||||
})
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("vault_inventory_scan: walk %q: %w", vaultPath, err)
|
||||
}
|
||||
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].RelPath < files[j].RelPath
|
||||
})
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// fileSha256 computes the hex-lowercase SHA-256 of the file at path by streaming.
|
||||
func fileSha256(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
h := sha256.New()
|
||||
if _, err := io.Copy(h, f); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return hex.EncodeToString(h.Sum(nil)), nil
|
||||
}
|
||||
|
||||
// detectVaultFileMime returns the MIME type for a vault file.
|
||||
// Extension overrides take priority; otherwise http.DetectContentType is used.
|
||||
func detectVaultFileMime(path, name string) (string, error) {
|
||||
ext := strings.ToLower(filepath.Ext(name))
|
||||
switch ext {
|
||||
case ".csv":
|
||||
return "text/csv", nil
|
||||
case ".md":
|
||||
return "text/markdown", nil
|
||||
case ".parquet":
|
||||
return "application/parquet", nil
|
||||
}
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
buf := make([]byte, 512)
|
||||
n, err := f.Read(buf)
|
||||
if err != nil && err != io.EOF {
|
||||
return "", err
|
||||
}
|
||||
return http.DetectContentType(buf[:n]), nil
|
||||
}
|
||||
|
||||
// vaultBucketParts extracts the top-level bucket ("data" or "knowledge") and
|
||||
// the second-level sub-bucket from a forward-slash relative path.
|
||||
// Returns empty strings for files at vault root or with no recognisable bucket.
|
||||
func vaultBucketParts(relPath string) (bucket, subBucket string) {
|
||||
parts := strings.SplitN(relPath, "/", 3)
|
||||
if len(parts) < 1 {
|
||||
return "", ""
|
||||
}
|
||||
bucket = parts[0]
|
||||
if len(parts) >= 2 {
|
||||
subBucket = parts[1]
|
||||
}
|
||||
return bucket, subBucket
|
||||
}
|
||||
Reference in New Issue
Block a user