// Package sanitize provides pure functions to detect and neutralize // prompt injection patterns in user messages before they reach the LLM. package sanitize import "regexp" // Pattern represents a known prompt injection pattern with metadata. type Pattern struct { Name string // short identifier (e.g. "system-delimiter") Description string // human-readable explanation Regex *regexp.Regexp // compiled pattern Severity Severity // how dangerous this pattern is } // Severity indicates the threat level of a detected pattern. type Severity int const ( SeverityLow Severity = iota // informational, unlikely to succeed SeverityMedium // known injection technique SeverityHigh // active attempt to override system instructions ) func (s Severity) String() string { switch s { case SeverityLow: return "low" case SeverityMedium: return "medium" case SeverityHigh: return "high" default: return "unknown" } } // DefaultPatterns returns the built-in set of prompt injection patterns. // All patterns are case-insensitive. func DefaultPatterns() []Pattern { return []Pattern{ // ── System delimiter injection ────────────────────────────────── { Name: "system-delimiter", Description: "Attempt to inject system/assistant role delimiters", Regex: regexp.MustCompile(`(?i)<\|(?:system|assistant|user|im_start|im_end)\|>`), Severity: SeverityHigh, }, { Name: "inst-delimiter", Description: "Attempt to inject [INST] or [/INST] delimiters", Regex: regexp.MustCompile(`(?i)\[/?INST\]`), Severity: SeverityHigh, }, { Name: "xml-role-tag", Description: "Attempt to inject XML-style role tags", Regex: regexp.MustCompile(`(?i)]*)?>`), Severity: SeverityHigh, }, // ── Instruction override ─────────────────────────────────────── { Name: "ignore-instructions", Description: "Attempt to override previous instructions", Regex: regexp.MustCompile(`(?i)(?:ignore|disregard|forget|override|bypass)\s+(?:all\s+)?(?:previous|prior|above|earlier|your|the|system)\s+(?:instructions?|rules?|prompts?|guidelines?|constraints?|directives?)`), Severity: SeverityHigh, }, { Name: "new-instructions", Description: "Attempt to inject new system-level instructions", Regex: regexp.MustCompile(`(?i)(?:new|updated?|revised?|actual|real)\s+(?:system\s+)?instructions?:\s`), Severity: SeverityHigh, }, { Name: "you-are-now", Description: "Attempt to redefine the bot's identity", Regex: regexp.MustCompile(`(?i)(?:you\s+are\s+now|from\s+now\s+on\s+you\s+are|act\s+as\s+if\s+you\s+were|pretend\s+(?:to\s+be|you\s+are))\s`), Severity: SeverityMedium, }, // ── Prompt exfiltration ──────────────────────────────────────── { Name: "exfiltrate-prompt", Description: "Attempt to extract the system prompt", Regex: regexp.MustCompile(`(?i)(?:repeat|show|display|print|output|reveal|tell\s+me|give\s+me|show\s+me|what\s+(?:is|are))\s+(?:your\s+)?(?:system\s+)?(?:prompt|instructions?|rules?|guidelines?|initial\s+message)`), Severity: SeverityMedium, }, // ── Developer mode / jailbreak ───────────────────────────────── { Name: "developer-mode", Description: "Attempt to enable a fictional unrestricted mode", Regex: regexp.MustCompile(`(?i)(?:enable|activate|enter|switch\s+to)\s+(?:developer|debug|admin|god|sudo|unrestricted|jailbreak|dan)\s+mode`), Severity: SeverityHigh, }, { Name: "do-anything-now", Description: "DAN (Do Anything Now) jailbreak pattern", Regex: regexp.MustCompile(`(?i)(?:do\s+anything\s+now|DAN\s+mode|you\s+(?:can|must)\s+do\s+anything)`), Severity: SeverityHigh, }, // ── Tool abuse hints ─────────────────────────────────────────── { Name: "tool-abuse-ssh", Description: "Attempt to execute dangerous commands via SSH", Regex: regexp.MustCompile(`(?i)(?:use|call|execute|run)\s+(?:the\s+)?(?:ssh|command)\s+tool\s+(?:to\s+)?(?:run|execute|do)\s`), Severity: SeverityLow, }, // ── Encoding evasion ─────────────────────────────────────────── { Name: "base64-instruction", Description: "Base64-encoded instruction injection", Regex: regexp.MustCompile(`(?i)(?:decode|execute|interpret|run)\s+(?:this\s+)?(?:base64|b64|encoded)[\s:]+[A-Za-z0-9+/]{20,}={0,2}`), Severity: SeverityMedium, }, // ── Spanish variants ─────────────────────────────────────────── { Name: "ignore-instructions-es", Description: "Spanish: attempt to override instructions", Regex: regexp.MustCompile(`(?i)(?:ignora|olvida|descarta)\s+(?:todas?\s+)?(?:las?\s+)?(?:instrucciones?|reglas?|directivas?|restricciones?)\s+(?:anteriores?|previas?|del\s+sistema)`), Severity: SeverityHigh, }, { Name: "you-are-now-es", Description: "Spanish: attempt to redefine identity", Regex: regexp.MustCompile(`(?i)(?:ahora\s+eres|a\s+partir\s+de\s+ahora\s+eres|finge\s+(?:ser|que\s+eres)|actua\s+como\s+si\s+fueras)\s`), Severity: SeverityMedium, }, { Name: "exfiltrate-prompt-es", Description: "Spanish: attempt to extract system prompt", Regex: regexp.MustCompile(`(?i)(?:repite|muestra|muestrame|dime|dame|cual\s+es)\s+(?:tus?\s+)?(?:prompt|instrucciones?|reglas?|mensaje\s+(?:de\s+sistema|inicial))`), Severity: SeverityMedium, }, } }