package main import ( "strings" "unicode" ) // sanitizeFTS5 takes free-form input from an LLM and produces a query the // SQLite FTS5 parser will accept. Rules from CLAUDE.md: // // - After `column:` the value must be a single ASCII alnum/underscore token. // Any other char (`-`, `.`, `:`, space) breaks the parser. // - Multi-word values must be wrapped in double quotes. // // Strategy: if the caller already wrote `column:value`, quote `value` if it // contains anything but `[A-Za-z0-9_]`. Otherwise treat the whole input as a // free-text phrase and split on whitespace, quoting tokens that need it. // // Returns a query suitable to pass to FTS5 MATCH. Empty input returns "". func sanitizeFTS5(q string) string { q = strings.TrimSpace(q) if q == "" { return "" } // If the query contains FTS5 operators we leave it alone except for token // quoting per `column:` clauses. This is a heuristic — power users can // craft their own queries. if hasOperator(q) { return quoteColumnClauses(q) } // Free text: split, quote each token if needed, join with implicit AND. parts := strings.Fields(q) for i, p := range parts { parts[i] = ftsQuote(p) } return strings.Join(parts, " ") } func hasOperator(q string) bool { upper := strings.ToUpper(q) if strings.Contains(q, ":") { return true } if strings.Contains(upper, " OR ") || strings.Contains(upper, " AND ") || strings.Contains(upper, " NEAR(") || strings.Contains(upper, " NOT ") { return true } if strings.Contains(q, "*") || strings.Contains(q, "(") || strings.Contains(q, "\"") { return true } return false } // quoteColumnClauses scans a query and ensures any `column:value` clause has // a quoted value when value contains non-alnum chars. func quoteColumnClauses(q string) string { var b strings.Builder tokens := tokenize(q) for i, t := range tokens { if i > 0 { b.WriteByte(' ') } colon := strings.IndexByte(t, ':') if colon == -1 || colon == len(t)-1 { b.WriteString(t) continue } head := t[:colon+1] val := t[colon+1:] // Already quoted or starts with paren/star — leave alone. if strings.HasPrefix(val, "\"") || strings.HasPrefix(val, "(") { b.WriteString(t) continue } // Strip trailing star for prefix queries to assess the body. body := strings.TrimSuffix(val, "*") if isFTSSafeToken(body) { b.WriteString(t) continue } b.WriteString(head) b.WriteString(ftsQuote(val)) } return b.String() } // tokenize splits q on whitespace but preserves quoted strings as one token. func tokenize(q string) []string { var out []string var cur strings.Builder inQ := false for _, r := range q { switch { case r == '"': inQ = !inQ cur.WriteRune(r) case unicode.IsSpace(r) && !inQ: if cur.Len() > 0 { out = append(out, cur.String()) cur.Reset() } default: cur.WriteRune(r) } } if cur.Len() > 0 { out = append(out, cur.String()) } return out } func isFTSSafeToken(s string) bool { if s == "" { return false } for _, r := range s { if !(r == '_' || (r >= '0' && r <= '9') || (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z')) { return false } } return true } // ftsQuote wraps a token in double quotes for FTS5, escaping inner quotes. func ftsQuote(s string) string { if isFTSSafeToken(s) { return s } return "\"" + strings.ReplaceAll(s, "\"", "\"\"") + "\"" }