Files
egutierrez e3c8979e8d chore: auto-commit (95 archivos)
- cmd/fn/doctor.go
- cmd/fn/main.go
- cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt
- cpp/apps/primitives_gallery/playground/tables/data_table.cpp
- cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp
- cpp/apps/primitives_gallery/playground/tables/data_table_logic.h
- cpp/apps/primitives_gallery/playground/tables/self_test.cpp
- cpp/apps/primitives_gallery/playground/tables/tql.cpp
- cpp/apps/primitives_gallery/playground/tables/viz.cpp
- cpp/apps/primitives_gallery/playground/tables/viz.h
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 00:50:34 +02:00

266 lines
8.8 KiB
Go

package infra
import (
"database/sql"
"fmt"
"path/filepath"
"strings"
)
// VaultSearchHit is a single result returned by VaultSearch.
type VaultSearchHit struct {
VaultPath string `json:"vault_path"`
VaultName string `json:"vault_name"` // basename of VaultPath (after resolving symlinks)
RelPath string `json:"rel_path"`
Size int64 `json:"size"`
Mtime int64 `json:"mtime"`
Mime string `json:"mime"`
Bucket string `json:"bucket"`
SubBucket string `json:"sub_bucket"`
Snippet string `json:"snippet"` // FTS5 snippet or empty if match is only by rel_path (fallback)
}
// VaultSearch searches vault_index.db inside vaultPath for files matching query.
//
// Behaviour:
// 1. Opens vault_index.db via VaultIndexOpen.
// 2. If limit <= 0, defaults to 50.
// 3. Runs a FTS5 MATCH query over files_fts to find content matches (when content_text
// is populated by profilers). Because the FTS5 table uses content='' (contentless),
// column values are not stored; results are correlated back to files via a LIKE
// match on rel_path for path tokens, or via an IN clause of matched rowids for
// content_text matches.
// 4. Also searches files.rel_path with LIKE to find path matches.
// 5. Results from both searches are merged (deduplication by rel_path).
// 6. If both FTS5 and LIKE queries fail, returns the error.
// 7. VaultName is derived from the basename of vaultPath (after resolving symlinks).
func VaultSearch(vaultPath, query string, limit int) ([]VaultSearchHit, error) {
if limit <= 0 {
limit = 50
}
db, err := VaultIndexOpen(vaultPath)
if err != nil {
return nil, fmt.Errorf("vault_search: open index: %w", err)
}
defer db.Close()
vaultName := resolveVaultName(vaultPath)
hits, err := vaultSearchCombined(db, vaultPath, vaultName, query, limit)
if err != nil {
return nil, fmt.Errorf("vault_search: %w", err)
}
return hits, nil
}
// vaultSearchCombined runs the search using two strategies and merges deduplicated results:
// 1. FTS5 MATCH on files_fts (for content_text when populated by profilers).
// Correlation back to files uses rowid (reliable for fresh indexes) or falls back.
// 2. LIKE on files.rel_path (always reliable for path searching).
//
// Results are deduplicated by rel_path, up to limit entries.
func vaultSearchCombined(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
seen := make(map[string]struct{})
var hits []VaultSearchHit
// Strategy 1: FTS5 MATCH on content_text (populated by profilers).
// With contentless FTS5 (content=''), column values are NOT retrievable via SELECT.
// We get matching rowids from FTS5, then look up files by rowid.
// This is reliable for content_text matches because VaultIndexWrite inserts
// content_text rows independently of the path rows (profilers update them).
// NOTE: for rel_path token matching, strategy 2 (LIKE) is more reliable.
ftsQuery := safeFTSQuery(query)
ftsHits, ftsErr := vaultSearchFTSContent(db, vaultPath, vaultName, ftsQuery, limit)
if ftsErr == nil {
for _, h := range ftsHits {
if len(hits) >= limit {
break
}
if _, ok := seen[h.RelPath]; !ok {
seen[h.RelPath] = struct{}{}
hits = append(hits, h)
}
}
}
// If FTS5 failed with a syntax error, that's expected for bad queries — continue.
// If it failed with a non-syntax error, still continue to LIKE fallback.
// Strategy 2: LIKE on rel_path — reliable path search.
// When query contains FTS5 special chars (e.g. "foo:bar:"), extract the first
// word-like token so the LIKE pattern is still useful.
likeQuery := simplifyForLike(query)
if len(hits) < limit && likeQuery != "" {
remaining := limit - len(hits)
likeHits, likeErr := vaultSearchLike(db, vaultPath, vaultName, likeQuery, remaining+len(seen))
if likeErr != nil && ftsErr != nil {
// Both failed — return a combined error.
return nil, fmt.Errorf("fts: %v; like: %v", ftsErr, likeErr)
}
for _, h := range likeHits {
if len(hits) >= limit {
break
}
if _, ok := seen[h.RelPath]; !ok {
seen[h.RelPath] = struct{}{}
hits = append(hits, h)
}
}
}
if hits == nil {
hits = []VaultSearchHit{}
}
return hits, nil
}
// vaultSearchFTSContent queries files_fts with a MATCH and correlates results
// back to the files table.
//
// Design note: with content='' (contentless FTS5), SELECT on columns returns ''.
// We get the rowid from the FTS5 match and look up files.rel_path via rowid.
// This works correctly when content_text was populated by a profiler that did NOT
// delete+reinsert the FTS row (i.e. profilers do direct INSERT/UPDATE of content_text
// without changing the rowid). For the current VaultIndexWrite implementation
// (which inserts content_text='' and profilers update it in-place), the rowids
// remain stable after profiling.
func vaultSearchFTSContent(db *sql.DB, vaultPath, vaultName, safeQuery string, limit int) ([]VaultSearchHit, error) {
// Get matching rowids from FTS5.
const qRowids = `
SELECT rowid
FROM files_fts
WHERE files_fts MATCH ?
ORDER BY rank
LIMIT ?`
rows, err := db.Query(qRowids, safeQuery, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var rowids []int64
for rows.Next() {
var rid int64
if err := rows.Scan(&rid); err != nil {
return nil, err
}
rowids = append(rowids, rid)
}
if err := rows.Err(); err != nil {
return nil, err
}
if len(rowids) == 0 {
return nil, nil
}
// Look up files by rowid. files uses a TEXT PK so its rowid is implicit.
// Snippet is empty for contentless FTS5 (snippet() returns NULL there).
var hits []VaultSearchHit
for _, rid := range rowids {
var h VaultSearchHit
err := db.QueryRow(`
SELECT rel_path, size, mtime, mime, bucket, sub_bucket
FROM files WHERE rowid = ?`, rid,
).Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket)
if err != nil {
// rowid mismatch (happens after update cycles) — skip gracefully.
continue
}
h.VaultPath = vaultPath
h.VaultName = vaultName
h.Snippet = ""
hits = append(hits, h)
}
return hits, nil
}
// vaultSearchLike searches files.rel_path with LIKE, ordered by mtime DESC.
func vaultSearchLike(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) {
const qLike = `
SELECT rel_path, size, mtime, mime, bucket, sub_bucket
FROM files
WHERE rel_path LIKE '%' || ? || '%'
ORDER BY mtime DESC
LIMIT ?`
rows, err := db.Query(qLike, query, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var hits []VaultSearchHit
for rows.Next() {
var h VaultSearchHit
if err := rows.Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket); err != nil {
return nil, err
}
h.VaultPath = vaultPath
h.VaultName = vaultName
h.Snippet = ""
hits = append(hits, h)
}
return hits, rows.Err()
}
// resolveVaultName returns the basename of vaultPath after resolving symlinks.
// Falls back to filepath.Base if EvalSymlinks fails.
func resolveVaultName(vaultPath string) string {
resolved, err := filepath.EvalSymlinks(vaultPath)
if err != nil {
resolved = vaultPath
}
return filepath.Base(resolved)
}
// safeFTSQuery wraps the query in double-quotes if it does not already contain
// FTS5 boolean operators (AND, OR, NOT) or column prefixes (containing ":").
// This prevents FTS5 syntax errors on tokens like "foo:bar:" or "hello-world".
func safeFTSQuery(query string) string {
q := strings.TrimSpace(query)
if q == "" {
return q
}
upper := strings.ToUpper(q)
// If user already uses explicit operators or column prefix, pass through.
if strings.ContainsAny(q, ":") ||
strings.Contains(upper, " AND ") ||
strings.Contains(upper, " OR ") ||
strings.Contains(upper, " NOT ") {
return q
}
// Escape any double-quotes in the query before wrapping.
escaped := strings.ReplaceAll(q, `"`, `""`)
return `"` + escaped + `"`
}
// isFTSSyntaxError returns true when the error looks like an FTS5 query parser error.
func isFTSSyntaxError(err error) bool {
if err == nil {
return false
}
msg := strings.ToLower(err.Error())
return strings.Contains(msg, "syntax error") ||
strings.Contains(msg, "no such column") ||
strings.Contains(msg, "fts5: syntax error")
}
// simplifyForLike extracts a clean substring from query suitable for LIKE matching.
// When the query contains FTS5 special characters (colons, double-quotes, operators),
// only the first word-like sequence of alphanumeric/underscore/hyphen characters is
// used. This ensures the LIKE fallback remains useful even when the FTS5 query is
// syntactically complex or contains column-prefix syntax like "foo:bar:".
func simplifyForLike(query string) string {
q := strings.TrimSpace(query)
var token strings.Builder
for _, r := range q {
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
token.WriteRune(r)
} else if token.Len() > 0 {
break
}
}
return token.String()
}