chore: auto-commit (95 archivos)
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,265 @@
|
||||
package infra
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// VaultSearchHit is a single result returned by VaultSearch.
|
||||
type VaultSearchHit struct {
|
||||
VaultPath string `json:"vault_path"`
|
||||
VaultName string `json:"vault_name"` // basename of VaultPath (after resolving symlinks)
|
||||
RelPath string `json:"rel_path"`
|
||||
Size int64 `json:"size"`
|
||||
Mtime int64 `json:"mtime"`
|
||||
Mime string `json:"mime"`
|
||||
Bucket string `json:"bucket"`
|
||||
SubBucket string `json:"sub_bucket"`
|
||||
Snippet string `json:"snippet"` // FTS5 snippet or empty if match is only by rel_path (fallback)
|
||||
}
|
||||
|
||||
// VaultSearch searches vault_index.db inside vaultPath for files matching query.
|
||||
//
|
||||
// Behaviour:
|
||||
// 1. Opens vault_index.db via VaultIndexOpen.
|
||||
// 2. If limit <= 0, defaults to 50.
|
||||
// 3. Runs a FTS5 MATCH query over files_fts to find content matches (when content_text
|
||||
// is populated by profilers). Because the FTS5 table uses content='' (contentless),
|
||||
// column values are not stored; results are correlated back to files via a LIKE
|
||||
// match on rel_path for path tokens, or via an IN clause of matched rowids for
|
||||
// content_text matches.
|
||||
// 4. Also searches files.rel_path with LIKE to find path matches.
|
||||
// 5. Results from both searches are merged (deduplication by rel_path).
|
||||
// 6. If both FTS5 and LIKE queries fail, returns the error.
|
||||
// 7. VaultName is derived from the basename of vaultPath (after resolving symlinks).
|
||||
func VaultSearch(vaultPath, query string, limit int) ([]VaultSearchHit, error) {
|
||||
if limit <= 0 {
|
||||
limit = 50
|
||||
}
|
||||
|
||||
db, err := VaultIndexOpen(vaultPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("vault_search: open index: %w", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
vaultName := resolveVaultName(vaultPath)
|
||||
|
||||
hits, err := vaultSearchCombined(db, vaultPath, vaultName, query, limit)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("vault_search: %w", err)
|
||||
}
|
||||
return hits, nil
|
||||
}
|
||||
|
||||
// vaultSearchCombined runs the search using two strategies and merges deduplicated results:
|
||||
// 1. FTS5 MATCH on files_fts (for content_text when populated by profilers).
|
||||
// Correlation back to files uses rowid (reliable for fresh indexes) or falls back.
|
||||
// 2. LIKE on files.rel_path (always reliable for path searching).
|
||||
//
|
||||
// Results are deduplicated by rel_path, up to limit entries.
|
||||
func vaultSearchCombined(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
|
||||
seen := make(map[string]struct{})
|
||||
var hits []VaultSearchHit
|
||||
|
||||
// Strategy 1: FTS5 MATCH on content_text (populated by profilers).
|
||||
// With contentless FTS5 (content=''), column values are NOT retrievable via SELECT.
|
||||
// We get matching rowids from FTS5, then look up files by rowid.
|
||||
// This is reliable for content_text matches because VaultIndexWrite inserts
|
||||
// content_text rows independently of the path rows (profilers update them).
|
||||
// NOTE: for rel_path token matching, strategy 2 (LIKE) is more reliable.
|
||||
ftsQuery := safeFTSQuery(query)
|
||||
ftsHits, ftsErr := vaultSearchFTSContent(db, vaultPath, vaultName, ftsQuery, limit)
|
||||
if ftsErr == nil {
|
||||
for _, h := range ftsHits {
|
||||
if len(hits) >= limit {
|
||||
break
|
||||
}
|
||||
if _, ok := seen[h.RelPath]; !ok {
|
||||
seen[h.RelPath] = struct{}{}
|
||||
hits = append(hits, h)
|
||||
}
|
||||
}
|
||||
}
|
||||
// If FTS5 failed with a syntax error, that's expected for bad queries — continue.
|
||||
// If it failed with a non-syntax error, still continue to LIKE fallback.
|
||||
|
||||
// Strategy 2: LIKE on rel_path — reliable path search.
|
||||
// When query contains FTS5 special chars (e.g. "foo:bar:"), extract the first
|
||||
// word-like token so the LIKE pattern is still useful.
|
||||
likeQuery := simplifyForLike(query)
|
||||
if len(hits) < limit && likeQuery != "" {
|
||||
remaining := limit - len(hits)
|
||||
likeHits, likeErr := vaultSearchLike(db, vaultPath, vaultName, likeQuery, remaining+len(seen))
|
||||
if likeErr != nil && ftsErr != nil {
|
||||
// Both failed — return a combined error.
|
||||
return nil, fmt.Errorf("fts: %v; like: %v", ftsErr, likeErr)
|
||||
}
|
||||
for _, h := range likeHits {
|
||||
if len(hits) >= limit {
|
||||
break
|
||||
}
|
||||
if _, ok := seen[h.RelPath]; !ok {
|
||||
seen[h.RelPath] = struct{}{}
|
||||
hits = append(hits, h)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if hits == nil {
|
||||
hits = []VaultSearchHit{}
|
||||
}
|
||||
return hits, nil
|
||||
}
|
||||
|
||||
// vaultSearchFTSContent queries files_fts with a MATCH and correlates results
|
||||
// back to the files table.
|
||||
//
|
||||
// Design note: with content='' (contentless FTS5), SELECT on columns returns ''.
|
||||
// We get the rowid from the FTS5 match and look up files.rel_path via rowid.
|
||||
// This works correctly when content_text was populated by a profiler that did NOT
|
||||
// delete+reinsert the FTS row (i.e. profilers do direct INSERT/UPDATE of content_text
|
||||
// without changing the rowid). For the current VaultIndexWrite implementation
|
||||
// (which inserts content_text='' and profilers update it in-place), the rowids
|
||||
// remain stable after profiling.
|
||||
func vaultSearchFTSContent(db *sql.DB, vaultPath, vaultName, safeQuery string, limit int) ([]VaultSearchHit, error) {
|
||||
// Get matching rowids from FTS5.
|
||||
const qRowids = `
|
||||
SELECT rowid
|
||||
FROM files_fts
|
||||
WHERE files_fts MATCH ?
|
||||
ORDER BY rank
|
||||
LIMIT ?`
|
||||
|
||||
rows, err := db.Query(qRowids, safeQuery, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var rowids []int64
|
||||
for rows.Next() {
|
||||
var rid int64
|
||||
if err := rows.Scan(&rid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rowids = append(rowids, rid)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(rowids) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Look up files by rowid. files uses a TEXT PK so its rowid is implicit.
|
||||
// Snippet is empty for contentless FTS5 (snippet() returns NULL there).
|
||||
var hits []VaultSearchHit
|
||||
for _, rid := range rowids {
|
||||
var h VaultSearchHit
|
||||
err := db.QueryRow(`
|
||||
SELECT rel_path, size, mtime, mime, bucket, sub_bucket
|
||||
FROM files WHERE rowid = ?`, rid,
|
||||
).Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket)
|
||||
if err != nil {
|
||||
// rowid mismatch (happens after update cycles) — skip gracefully.
|
||||
continue
|
||||
}
|
||||
h.VaultPath = vaultPath
|
||||
h.VaultName = vaultName
|
||||
h.Snippet = ""
|
||||
hits = append(hits, h)
|
||||
}
|
||||
return hits, nil
|
||||
}
|
||||
|
||||
// vaultSearchLike searches files.rel_path with LIKE, ordered by mtime DESC.
|
||||
func vaultSearchLike(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
|
||||
const qLike = `
|
||||
SELECT rel_path, size, mtime, mime, bucket, sub_bucket
|
||||
FROM files
|
||||
WHERE rel_path LIKE '%' || ? || '%'
|
||||
ORDER BY mtime DESC
|
||||
LIMIT ?`
|
||||
|
||||
rows, err := db.Query(qLike, query, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var hits []VaultSearchHit
|
||||
for rows.Next() {
|
||||
var h VaultSearchHit
|
||||
if err := rows.Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
h.VaultPath = vaultPath
|
||||
h.VaultName = vaultName
|
||||
h.Snippet = ""
|
||||
hits = append(hits, h)
|
||||
}
|
||||
return hits, rows.Err()
|
||||
}
|
||||
|
||||
// resolveVaultName returns the basename of vaultPath after resolving symlinks.
|
||||
// Falls back to filepath.Base if EvalSymlinks fails.
|
||||
func resolveVaultName(vaultPath string) string {
|
||||
resolved, err := filepath.EvalSymlinks(vaultPath)
|
||||
if err != nil {
|
||||
resolved = vaultPath
|
||||
}
|
||||
return filepath.Base(resolved)
|
||||
}
|
||||
|
||||
// safeFTSQuery wraps the query in double-quotes if it does not already contain
|
||||
// FTS5 boolean operators (AND, OR, NOT) or column prefixes (containing ":").
|
||||
// This prevents FTS5 syntax errors on tokens like "foo:bar:" or "hello-world".
|
||||
func safeFTSQuery(query string) string {
|
||||
q := strings.TrimSpace(query)
|
||||
if q == "" {
|
||||
return q
|
||||
}
|
||||
upper := strings.ToUpper(q)
|
||||
// If user already uses explicit operators or column prefix, pass through.
|
||||
if strings.ContainsAny(q, ":") ||
|
||||
strings.Contains(upper, " AND ") ||
|
||||
strings.Contains(upper, " OR ") ||
|
||||
strings.Contains(upper, " NOT ") {
|
||||
return q
|
||||
}
|
||||
// Escape any double-quotes in the query before wrapping.
|
||||
escaped := strings.ReplaceAll(q, `"`, `""`)
|
||||
return `"` + escaped + `"`
|
||||
}
|
||||
|
||||
// isFTSSyntaxError returns true when the error looks like an FTS5 query parser error.
|
||||
func isFTSSyntaxError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
msg := strings.ToLower(err.Error())
|
||||
return strings.Contains(msg, "syntax error") ||
|
||||
strings.Contains(msg, "no such column") ||
|
||||
strings.Contains(msg, "fts5: syntax error")
|
||||
}
|
||||
|
||||
// simplifyForLike extracts a clean substring from query suitable for LIKE matching.
|
||||
// When the query contains FTS5 special characters (colons, double-quotes, operators),
|
||||
// only the first word-like sequence of alphanumeric/underscore/hyphen characters is
|
||||
// used. This ensures the LIKE fallback remains useful even when the FTS5 query is
|
||||
// syntactically complex or contains column-prefix syntax like "foo:bar:".
|
||||
func simplifyForLike(query string) string {
|
||||
q := strings.TrimSpace(query)
|
||||
var token strings.Builder
|
||||
for _, r := range q {
|
||||
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
|
||||
token.WriteRune(r)
|
||||
} else if token.Len() > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return token.String()
|
||||
}
|
||||
Reference in New Issue
Block a user