package infra import ( "database/sql" "fmt" "path/filepath" "strings" ) // VaultSearchHit is a single result returned by VaultSearch. type VaultSearchHit struct { VaultPath string `json:"vault_path"` VaultName string `json:"vault_name"` // basename of VaultPath (after resolving symlinks) RelPath string `json:"rel_path"` Size int64 `json:"size"` Mtime int64 `json:"mtime"` Mime string `json:"mime"` Bucket string `json:"bucket"` SubBucket string `json:"sub_bucket"` Snippet string `json:"snippet"` // FTS5 snippet or empty if match is only by rel_path (fallback) } // VaultSearch searches vault_index.db inside vaultPath for files matching query. // // Behaviour: // 1. Opens vault_index.db via VaultIndexOpen. // 2. If limit <= 0, defaults to 50. // 3. Runs a FTS5 MATCH query over files_fts to find content matches (when content_text // is populated by profilers). Because the FTS5 table uses content='' (contentless), // column values are not stored; results are correlated back to files via a LIKE // match on rel_path for path tokens, or via an IN clause of matched rowids for // content_text matches. // 4. Also searches files.rel_path with LIKE to find path matches. // 5. Results from both searches are merged (deduplication by rel_path). // 6. If both FTS5 and LIKE queries fail, returns the error. // 7. VaultName is derived from the basename of vaultPath (after resolving symlinks). func VaultSearch(vaultPath, query string, limit int) ([]VaultSearchHit, error) { if limit <= 0 { limit = 50 } db, err := VaultIndexOpen(vaultPath) if err != nil { return nil, fmt.Errorf("vault_search: open index: %w", err) } defer db.Close() vaultName := resolveVaultName(vaultPath) hits, err := vaultSearchCombined(db, vaultPath, vaultName, query, limit) if err != nil { return nil, fmt.Errorf("vault_search: %w", err) } return hits, nil } // vaultSearchCombined runs the search using two strategies and merges deduplicated results: // 1. FTS5 MATCH on files_fts (for content_text when populated by profilers). // Correlation back to files uses rowid (reliable for fresh indexes) or falls back. // 2. LIKE on files.rel_path (always reliable for path searching). // // Results are deduplicated by rel_path, up to limit entries. func vaultSearchCombined(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) { seen := make(map[string]struct{}) var hits []VaultSearchHit // Strategy 1: FTS5 MATCH on content_text (populated by profilers). // With contentless FTS5 (content=''), column values are NOT retrievable via SELECT. // We get matching rowids from FTS5, then look up files by rowid. // This is reliable for content_text matches because VaultIndexWrite inserts // content_text rows independently of the path rows (profilers update them). // NOTE: for rel_path token matching, strategy 2 (LIKE) is more reliable. ftsQuery := safeFTSQuery(query) ftsHits, ftsErr := vaultSearchFTSContent(db, vaultPath, vaultName, ftsQuery, limit) if ftsErr == nil { for _, h := range ftsHits { if len(hits) >= limit { break } if _, ok := seen[h.RelPath]; !ok { seen[h.RelPath] = struct{}{} hits = append(hits, h) } } } // If FTS5 failed with a syntax error, that's expected for bad queries — continue. // If it failed with a non-syntax error, still continue to LIKE fallback. // Strategy 2: LIKE on rel_path — reliable path search. // When query contains FTS5 special chars (e.g. "foo:bar:"), extract the first // word-like token so the LIKE pattern is still useful. likeQuery := simplifyForLike(query) if len(hits) < limit && likeQuery != "" { remaining := limit - len(hits) likeHits, likeErr := vaultSearchLike(db, vaultPath, vaultName, likeQuery, remaining+len(seen)) if likeErr != nil && ftsErr != nil { // Both failed — return a combined error. return nil, fmt.Errorf("fts: %v; like: %v", ftsErr, likeErr) } for _, h := range likeHits { if len(hits) >= limit { break } if _, ok := seen[h.RelPath]; !ok { seen[h.RelPath] = struct{}{} hits = append(hits, h) } } } if hits == nil { hits = []VaultSearchHit{} } return hits, nil } // vaultSearchFTSContent queries files_fts with a MATCH and correlates results // back to the files table. // // Design note: with content='' (contentless FTS5), SELECT on columns returns ''. // We get the rowid from the FTS5 match and look up files.rel_path via rowid. // This works correctly when content_text was populated by a profiler that did NOT // delete+reinsert the FTS row (i.e. profilers do direct INSERT/UPDATE of content_text // without changing the rowid). For the current VaultIndexWrite implementation // (which inserts content_text='' and profilers update it in-place), the rowids // remain stable after profiling. func vaultSearchFTSContent(db *sql.DB, vaultPath, vaultName, safeQuery string, limit int) ([]VaultSearchHit, error) { // Get matching rowids from FTS5. const qRowids = ` SELECT rowid FROM files_fts WHERE files_fts MATCH ? ORDER BY rank LIMIT ?` rows, err := db.Query(qRowids, safeQuery, limit) if err != nil { return nil, err } defer rows.Close() var rowids []int64 for rows.Next() { var rid int64 if err := rows.Scan(&rid); err != nil { return nil, err } rowids = append(rowids, rid) } if err := rows.Err(); err != nil { return nil, err } if len(rowids) == 0 { return nil, nil } // Look up files by rowid. files uses a TEXT PK so its rowid is implicit. // Snippet is empty for contentless FTS5 (snippet() returns NULL there). var hits []VaultSearchHit for _, rid := range rowids { var h VaultSearchHit err := db.QueryRow(` SELECT rel_path, size, mtime, mime, bucket, sub_bucket FROM files WHERE rowid = ?`, rid, ).Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket) if err != nil { // rowid mismatch (happens after update cycles) — skip gracefully. continue } h.VaultPath = vaultPath h.VaultName = vaultName h.Snippet = "" hits = append(hits, h) } return hits, nil } // vaultSearchLike searches files.rel_path with LIKE, ordered by mtime DESC. func vaultSearchLike(db *sql.DB, vaultPath, vaultName, query string, limit int) ([]VaultSearchHit, error) { const qLike = ` SELECT rel_path, size, mtime, mime, bucket, sub_bucket FROM files WHERE rel_path LIKE '%' || ? || '%' ORDER BY mtime DESC LIMIT ?` rows, err := db.Query(qLike, query, limit) if err != nil { return nil, err } defer rows.Close() var hits []VaultSearchHit for rows.Next() { var h VaultSearchHit if err := rows.Scan(&h.RelPath, &h.Size, &h.Mtime, &h.Mime, &h.Bucket, &h.SubBucket); err != nil { return nil, err } h.VaultPath = vaultPath h.VaultName = vaultName h.Snippet = "" hits = append(hits, h) } return hits, rows.Err() } // resolveVaultName returns the basename of vaultPath after resolving symlinks. // Falls back to filepath.Base if EvalSymlinks fails. func resolveVaultName(vaultPath string) string { resolved, err := filepath.EvalSymlinks(vaultPath) if err != nil { resolved = vaultPath } return filepath.Base(resolved) } // safeFTSQuery wraps the query in double-quotes if it does not already contain // FTS5 boolean operators (AND, OR, NOT) or column prefixes (containing ":"). // This prevents FTS5 syntax errors on tokens like "foo:bar:" or "hello-world". func safeFTSQuery(query string) string { q := strings.TrimSpace(query) if q == "" { return q } upper := strings.ToUpper(q) // If user already uses explicit operators or column prefix, pass through. if strings.ContainsAny(q, ":") || strings.Contains(upper, " AND ") || strings.Contains(upper, " OR ") || strings.Contains(upper, " NOT ") { return q } // Escape any double-quotes in the query before wrapping. escaped := strings.ReplaceAll(q, `"`, `""`) return `"` + escaped + `"` } // isFTSSyntaxError returns true when the error looks like an FTS5 query parser error. func isFTSSyntaxError(err error) bool { if err == nil { return false } msg := strings.ToLower(err.Error()) return strings.Contains(msg, "syntax error") || strings.Contains(msg, "no such column") || strings.Contains(msg, "fts5: syntax error") } // simplifyForLike extracts a clean substring from query suitable for LIKE matching. // When the query contains FTS5 special characters (colons, double-quotes, operators), // only the first word-like sequence of alphanumeric/underscore/hyphen characters is // used. This ensures the LIKE fallback remains useful even when the FTS5 query is // syntactically complex or contains column-prefix syntax like "foo:bar:". func simplifyForLike(query string) string { q := strings.TrimSpace(query) var token strings.Builder for _, r := range q { if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { token.WriteRune(r) } else if token.Len() > 0 { break } } return token.String() }