chore: sync from fn-registry agent
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// sanitizeFTS5 takes free-form input from an LLM and produces a query the
|
||||
// SQLite FTS5 parser will accept. Rules from CLAUDE.md:
|
||||
//
|
||||
// - After `column:` the value must be a single ASCII alnum/underscore token.
|
||||
// Any other char (`-`, `.`, `:`, space) breaks the parser.
|
||||
// - Multi-word values must be wrapped in double quotes.
|
||||
//
|
||||
// Strategy: if the caller already wrote `column:value`, quote `value` if it
|
||||
// contains anything but `[A-Za-z0-9_]`. Otherwise treat the whole input as a
|
||||
// free-text phrase and split on whitespace, quoting tokens that need it.
|
||||
//
|
||||
// Returns a query suitable to pass to FTS5 MATCH. Empty input returns "".
|
||||
func sanitizeFTS5(q string) string {
|
||||
q = strings.TrimSpace(q)
|
||||
if q == "" {
|
||||
return ""
|
||||
}
|
||||
// If the query contains FTS5 operators we leave it alone except for token
|
||||
// quoting per `column:` clauses. This is a heuristic — power users can
|
||||
// craft their own queries.
|
||||
if hasOperator(q) {
|
||||
return quoteColumnClauses(q)
|
||||
}
|
||||
// Free text: split, quote each token if needed, join with implicit AND.
|
||||
parts := strings.Fields(q)
|
||||
for i, p := range parts {
|
||||
parts[i] = ftsQuote(p)
|
||||
}
|
||||
return strings.Join(parts, " ")
|
||||
}
|
||||
|
||||
func hasOperator(q string) bool {
|
||||
upper := strings.ToUpper(q)
|
||||
if strings.Contains(q, ":") {
|
||||
return true
|
||||
}
|
||||
if strings.Contains(upper, " OR ") || strings.Contains(upper, " AND ") || strings.Contains(upper, " NEAR(") || strings.Contains(upper, " NOT ") {
|
||||
return true
|
||||
}
|
||||
if strings.Contains(q, "*") || strings.Contains(q, "(") || strings.Contains(q, "\"") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// quoteColumnClauses scans a query and ensures any `column:value` clause has
|
||||
// a quoted value when value contains non-alnum chars.
|
||||
func quoteColumnClauses(q string) string {
|
||||
var b strings.Builder
|
||||
tokens := tokenize(q)
|
||||
for i, t := range tokens {
|
||||
if i > 0 {
|
||||
b.WriteByte(' ')
|
||||
}
|
||||
colon := strings.IndexByte(t, ':')
|
||||
if colon == -1 || colon == len(t)-1 {
|
||||
b.WriteString(t)
|
||||
continue
|
||||
}
|
||||
head := t[:colon+1]
|
||||
val := t[colon+1:]
|
||||
// Already quoted or starts with paren/star — leave alone.
|
||||
if strings.HasPrefix(val, "\"") || strings.HasPrefix(val, "(") {
|
||||
b.WriteString(t)
|
||||
continue
|
||||
}
|
||||
// Strip trailing star for prefix queries to assess the body.
|
||||
body := strings.TrimSuffix(val, "*")
|
||||
if isFTSSafeToken(body) {
|
||||
b.WriteString(t)
|
||||
continue
|
||||
}
|
||||
b.WriteString(head)
|
||||
b.WriteString(ftsQuote(val))
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// tokenize splits q on whitespace but preserves quoted strings as one token.
|
||||
func tokenize(q string) []string {
|
||||
var out []string
|
||||
var cur strings.Builder
|
||||
inQ := false
|
||||
for _, r := range q {
|
||||
switch {
|
||||
case r == '"':
|
||||
inQ = !inQ
|
||||
cur.WriteRune(r)
|
||||
case unicode.IsSpace(r) && !inQ:
|
||||
if cur.Len() > 0 {
|
||||
out = append(out, cur.String())
|
||||
cur.Reset()
|
||||
}
|
||||
default:
|
||||
cur.WriteRune(r)
|
||||
}
|
||||
}
|
||||
if cur.Len() > 0 {
|
||||
out = append(out, cur.String())
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func isFTSSafeToken(s string) bool {
|
||||
if s == "" {
|
||||
return false
|
||||
}
|
||||
for _, r := range s {
|
||||
if !(r == '_' || (r >= '0' && r <= '9') || (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z')) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ftsQuote wraps a token in double quotes for FTS5, escaping inner quotes.
|
||||
func ftsQuote(s string) string {
|
||||
if isFTSSafeToken(s) {
|
||||
return s
|
||||
}
|
||||
return "\"" + strings.ReplaceAll(s, "\"", "\"\"") + "\""
|
||||
}
|
||||
Reference in New Issue
Block a user