a802f59f55
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
266 lines
8.8 KiB
Go
266 lines
8.8 KiB
Go
package infra
|
|
|
|
import (
|
|
"database/sql"
|
|
"fmt"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
// VaultSearchHit is a single result returned by VaultSearch.
|
|
type VaultSearchHit struct {
|
|
VaultPath string `json:"vault_path"`
|
|
VaultName string `json:"vault_name"` // basename of VaultPath (after resolving symlinks)
|
|
RelPath string `json:"rel_path"`
|
|
Size int64 `json:"size"`
|
|
Mtime int64 `json:"mtime"`
|
|
Mime string `json:"mime"`
|
|
Bucket string `json:"bucket"`
|
|
SubBucket string `json:"sub_bucket"`
|
|
Snippet string `json:"snippet"` // FTS5 snippet or empty if match is only by rel_path (fallback)
|
|
}
|
|
|
|
// VaultSearch searches vault_index.db inside vaultPath for files matching query.
|
|
//
|
|
// Behaviour:
|
|
// 1. Opens vault_index.db via VaultIndexOpen.
|
|
// 2. If limit <= 0, defaults to 50.
|
|
// 3. Runs a FTS5 MATCH query over files_fts to find content matches (when content_text
|
|
// is populated by profilers). Because the FTS5 table uses content='' (contentless),
|
|
// column values are not stored; results are correlated back to files via a LIKE
|
|
// match on rel_path for path tokens, or via an IN clause of matched rowids for
|
|
// content_text matches.
|
|
// 4. Also searches files.rel_path with LIKE to find path matches.
|
|
// 5. Results from both searches are merged (deduplication by rel_path).
|
|
// 6. If both FTS5 and LIKE queries fail, returns the error.
|
|
// 7. VaultName is derived from the basename of vaultPath (after resolving symlinks).
|
|
func VaultSearch(vaultPath, query string, limit int) ([]VaultSearchHit, error) {
|
|
if limit <= 0 {
|
|
limit = 50
|
|
}
|
|
|
|
db, err := VaultIndexOpen(vaultPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("vault_search: open index: %w", err)
|
|
}
|
|
defer db.Close()
|
|
|
|
vaultName := resolveVaultName(vaultPath)
|
|
|
|
hits, err := vaultSearchCombined(db, vaultPath, vaultName, query, limit)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("vault_search: %w", err)
|
|
}
|
|
return hits, nil
|
|
}
|
|
|
|
// vaultSearchCombined runs the search using two strategies and merges deduplicated results:
|
|
// 1. FTS5 MATCH on files_fts (for content_text when populated by profilers).
|
|
// Correlation back to files uses rowid (reliable for fresh indexes) or falls back.
|
|
// 2. LIKE on files.rel_path (always reliable for path searching).
|
|
//
|
|
// Results are deduplicated by rel_path, up to limit entries.
|
|
func vaultSearchCombined(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
|
|
seen := make(map[string]struct{})
|
|
var hits []VaultSearchHit
|
|
|
|
// Strategy 1: FTS5 MATCH on content_text (populated by profilers).
|
|
// With contentless FTS5 (content=''), column values are NOT retrievable via SELECT.
|
|
// We get matching rowids from FTS5, then look up files by rowid.
|
|
// This is reliable for content_text matches because VaultIndexWrite inserts
|
|
// content_text rows independently of the path rows (profilers update them).
|
|
// NOTE: for rel_path token matching, strategy 2 (LIKE) is more reliable.
|
|
ftsQuery := safeFTSQuery(query)
|
|
ftsHits, ftsErr := vaultSearchFTSContent(db, vaultPath, vaultName, ftsQuery, limit)
|
|
if ftsErr == nil {
|
|
for _, h := range ftsHits {
|
|
if len(hits) >= limit {
|
|
break
|
|
}
|
|
if _, ok := seen[h.RelPath]; !ok {
|
|
seen[h.RelPath] = struct{}{}
|
|
hits = append(hits, h)
|
|
}
|
|
}
|
|
}
|
|
// If FTS5 failed with a syntax error, that's expected for bad queries — continue.
|
|
// If it failed with a non-syntax error, still continue to LIKE fallback.
|
|
|
|
// Strategy 2: LIKE on rel_path — reliable path search.
|
|
// When query contains FTS5 special chars (e.g. "foo:bar:"), extract the first
|
|
// word-like token so the LIKE pattern is still useful.
|
|
likeQuery := simplifyForLike(query)
|
|
if len(hits) < limit && likeQuery != "" {
|
|
remaining := limit - len(hits)
|
|
likeHits, likeErr := vaultSearchLike(db, vaultPath, vaultName, likeQuery, remaining+len(seen))
|
|
if likeErr != nil && ftsErr != nil {
|
|
// Both failed — return a combined error.
|
|
return nil, fmt.Errorf("fts: %v; like: %v", ftsErr, likeErr)
|
|
}
|
|
for _, h := range likeHits {
|
|
if len(hits) >= limit {
|
|
break
|
|
}
|
|
if _, ok := seen[h.RelPath]; !ok {
|
|
seen[h.RelPath] = struct{}{}
|
|
hits = append(hits, h)
|
|
}
|
|
}
|
|
}
|
|
|
|
if hits == nil {
|
|
hits = []VaultSearchHit{}
|
|
}
|
|
return hits, nil
|
|
}
|
|
|
|
// vaultSearchFTSContent queries files_fts with a MATCH and correlates results
|
|
// back to the files table.
|
|
//
|
|
// Design note: with content='' (contentless FTS5), SELECT on columns returns ''.
|
|
// We get the rowid from the FTS5 match and look up files.rel_path via rowid.
|
|
// This works correctly when content_text was populated by a profiler that did NOT
|
|
// delete+reinsert the FTS row (i.e. profilers do direct INSERT/UPDATE of content_text
|
|
// without changing the rowid). For the current VaultIndexWrite implementation
|
|
// (which inserts content_text='' and profilers update it in-place), the rowids
|
|
// remain stable after profiling.
|
|
func vaultSearchFTSContent(db *sql.DB, vaultPath, vaultName, safeQuery string, limit int) ([]VaultSearchHit, error) {
|
|
// Get matching rowids from FTS5.
|
|
const qRowids = `
|
|
SELECT rowid
|
|
FROM files_fts
|
|
WHERE files_fts MATCH ?
|
|
ORDER BY rank
|
|
LIMIT ?`
|
|
|
|
rows, err := db.Query(qRowids, safeQuery, limit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var rowids []int64
|
|
for rows.Next() {
|
|
var rid int64
|
|
if err := rows.Scan(&rid); err != nil {
|
|
return nil, err
|
|
}
|
|
rowids = append(rowids, rid)
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
if len(rowids) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
// Look up files by rowid. files uses a TEXT PK so its rowid is implicit.
|
|
// Snippet is empty for contentless FTS5 (snippet() returns NULL there).
|
|
var hits []VaultSearchHit
|
|
for _, rid := range rowids {
|
|
var h VaultSearchHit
|
|
err := db.QueryRow(`
|
|
SELECT rel_path, size, mtime, mime, bucket, sub_bucket
|
|
FROM files WHERE rowid = ?`, rid,
|
|
).Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket)
|
|
if err != nil {
|
|
// rowid mismatch (happens after update cycles) — skip gracefully.
|
|
continue
|
|
}
|
|
h.VaultPath = vaultPath
|
|
h.VaultName = vaultName
|
|
h.Snippet = ""
|
|
hits = append(hits, h)
|
|
}
|
|
return hits, nil
|
|
}
|
|
|
|
// vaultSearchLike searches files.rel_path with LIKE, ordered by mtime DESC.
|
|
func vaultSearchLike(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
|
|
const qLike = `
|
|
SELECT rel_path, size, mtime, mime, bucket, sub_bucket
|
|
FROM files
|
|
WHERE rel_path LIKE '%' || ? || '%'
|
|
ORDER BY mtime DESC
|
|
LIMIT ?`
|
|
|
|
rows, err := db.Query(qLike, query, limit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var hits []VaultSearchHit
|
|
for rows.Next() {
|
|
var h VaultSearchHit
|
|
if err := rows.Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket); err != nil {
|
|
return nil, err
|
|
}
|
|
h.VaultPath = vaultPath
|
|
h.VaultName = vaultName
|
|
h.Snippet = ""
|
|
hits = append(hits, h)
|
|
}
|
|
return hits, rows.Err()
|
|
}
|
|
|
|
// resolveVaultName returns the basename of vaultPath after resolving symlinks.
|
|
// Falls back to filepath.Base if EvalSymlinks fails.
|
|
func resolveVaultName(vaultPath string) string {
|
|
resolved, err := filepath.EvalSymlinks(vaultPath)
|
|
if err != nil {
|
|
resolved = vaultPath
|
|
}
|
|
return filepath.Base(resolved)
|
|
}
|
|
|
|
// safeFTSQuery wraps the query in double-quotes if it does not already contain
|
|
// FTS5 boolean operators (AND, OR, NOT) or column prefixes (containing ":").
|
|
// This prevents FTS5 syntax errors on tokens like "foo:bar:" or "hello-world".
|
|
func safeFTSQuery(query string) string {
|
|
q := strings.TrimSpace(query)
|
|
if q == "" {
|
|
return q
|
|
}
|
|
upper := strings.ToUpper(q)
|
|
// If user already uses explicit operators or column prefix, pass through.
|
|
if strings.ContainsAny(q, ":") ||
|
|
strings.Contains(upper, " AND ") ||
|
|
strings.Contains(upper, " OR ") ||
|
|
strings.Contains(upper, " NOT ") {
|
|
return q
|
|
}
|
|
// Escape any double-quotes in the query before wrapping.
|
|
escaped := strings.ReplaceAll(q, `"`, `""`)
|
|
return `"` + escaped + `"`
|
|
}
|
|
|
|
// isFTSSyntaxError returns true when the error looks like an FTS5 query parser error.
|
|
func isFTSSyntaxError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
msg := strings.ToLower(err.Error())
|
|
return strings.Contains(msg, "syntax error") ||
|
|
strings.Contains(msg, "no such column") ||
|
|
strings.Contains(msg, "fts5: syntax error")
|
|
}
|
|
|
|
// simplifyForLike extracts a clean substring from query suitable for LIKE matching.
|
|
// When the query contains FTS5 special characters (colons, double-quotes, operators),
|
|
// only the first word-like sequence of alphanumeric/underscore/hyphen characters is
|
|
// used. This ensures the LIKE fallback remains useful even when the FTS5 query is
|
|
// syntactically complex or contains column-prefix syntax like "foo:bar:".
|
|
func simplifyForLike(query string) string {
|
|
q := strings.TrimSpace(query)
|
|
var token strings.Builder
|
|
for _, r := range q {
|
|
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
|
|
token.WriteRune(r)
|
|
} else if token.Len() > 0 {
|
|
break
|
|
}
|
|
}
|
|
return token.String()
|
|
}
|