package sanitize import "strings" // Mode controls how the sanitizer handles detected patterns. type Mode int const ( ModeWarn Mode = iota // report warnings but don't modify the message ModeStrip // remove matched patterns from the message ModeReject // reject the message entirely if any pattern matches ) func (m Mode) String() string { switch m { case ModeWarn: return "warn" case ModeStrip: return "strip" case ModeReject: return "reject" default: return "unknown" } } // ParseMode converts a string to a Mode. Returns ModeWarn for unrecognized values. func ParseMode(s string) Mode { switch strings.ToLower(s) { case "strip": return ModeStrip case "reject": return ModeReject default: return ModeWarn } } // Options configures the sanitizer behavior. type Options struct { Mode Mode // how to handle detections MinSeverity Severity // only act on patterns at or above this severity Patterns []Pattern // patterns to check (nil = DefaultPatterns) DisabledPatterns []string // pattern names to skip } // Warning represents a detected prompt injection pattern in the input. type Warning struct { PatternName string // which pattern matched Severity Severity // threat level Matched string // the text that matched (first match only) } // Result holds the output of a Sanitize call. type Result struct { Output string // the (possibly modified) message Warnings []Warning // all detected patterns Rejected bool // true if the message was rejected (ModeReject + match found) } // Sanitize checks the input for prompt injection patterns and returns // the result according to the configured mode. // // This is a pure function: no I/O, no side effects. func Sanitize(input string, opts Options) Result { patterns := opts.Patterns if patterns == nil { patterns = DefaultPatterns() } disabled := make(map[string]bool, len(opts.DisabledPatterns)) for _, name := range opts.DisabledPatterns { disabled[name] = true } var warnings []Warning output := input for _, p := range patterns { if disabled[p.Name] { continue } if p.Severity < opts.MinSeverity { continue } loc := p.Regex.FindStringIndex(output) if loc == nil { continue } matched := output[loc[0]:loc[1]] warnings = append(warnings, Warning{ PatternName: p.Name, Severity: p.Severity, Matched: matched, }) if opts.Mode == ModeStrip { output = p.Regex.ReplaceAllString(output, "") } } result := Result{ Output: output, Warnings: warnings, } if opts.Mode == ModeReject && len(warnings) > 0 { result.Rejected = true } return result } // HasHighSeverity returns true if any warning is SeverityHigh. func (r Result) HasHighSeverity() bool { for _, w := range r.Warnings { if w.Severity == SeverityHigh { return true } } return false } // MaxSeverity returns the highest severity among all warnings. // Returns SeverityLow if there are no warnings. func (r Result) MaxSeverity() Severity { max := SeverityLow for _, w := range r.Warnings { if w.Severity > max { max = w.Severity } } return max }