Files
unibots/pkg/sanitize/patterns.go
T
agent fc644ecd6e feat: import agents_and_robots platform as unibots (Matrix-out, unibus transport)
Reemplaza el scaffold del echobot por la plataforma completa de bots traida
desde ~/DataProyects/Github/agents_and_robots tras la operacion Matrix-out:
los bots ya no hablan por Matrix sino por el bus unibus (modelo todo-rooms +
E2E via shell/transportunibus sobre github.com/enmanuel/unibus/pkg/client).

- go.mod: replace de unibus -> ../unibus y de fn-registry -> ../../../.. (paths
  relativos reajustados a la nueva ubicacion dentro de fn_registry).
- app.md: bump a 0.2.0, descripcion + arquitectura + comandos + gotchas reales.
- modulo Go conservado como github.com/enmanuel/agents (sin reescribir imports).

agents_and_robots queda archivado como museo de la era Matrix.
2026-06-07 11:50:13 +02:00

140 lines
6.0 KiB
Go

// Package sanitize provides pure functions to detect and neutralize
// prompt injection patterns in user messages before they reach the LLM.
package sanitize
import "regexp"
// Pattern represents a known prompt injection pattern with metadata.
type Pattern struct {
Name string // short identifier (e.g. "system-delimiter")
Description string // human-readable explanation
Regex *regexp.Regexp // compiled pattern
Severity Severity // how dangerous this pattern is
}
// Severity indicates the threat level of a detected pattern.
type Severity int
const (
SeverityLow Severity = iota // informational, unlikely to succeed
SeverityMedium // known injection technique
SeverityHigh // active attempt to override system instructions
)
func (s Severity) String() string {
switch s {
case SeverityLow:
return "low"
case SeverityMedium:
return "medium"
case SeverityHigh:
return "high"
default:
return "unknown"
}
}
// DefaultPatterns returns the built-in set of prompt injection patterns.
// All patterns are case-insensitive.
func DefaultPatterns() []Pattern {
return []Pattern{
// ── System delimiter injection ──────────────────────────────────
{
Name: "system-delimiter",
Description: "Attempt to inject system/assistant role delimiters",
Regex: regexp.MustCompile(`(?i)<\|(?:system|assistant|user|im_start|im_end)\|>`),
Severity: SeverityHigh,
},
{
Name: "inst-delimiter",
Description: "Attempt to inject [INST] or [/INST] delimiters",
Regex: regexp.MustCompile(`(?i)\[/?INST\]`),
Severity: SeverityHigh,
},
{
Name: "xml-role-tag",
Description: "Attempt to inject XML-style role tags",
Regex: regexp.MustCompile(`(?i)</?(?:system|assistant|human|user)(?:\s[^>]*)?>`),
Severity: SeverityHigh,
},
// ── Instruction override ───────────────────────────────────────
{
Name: "ignore-instructions",
Description: "Attempt to override previous instructions",
Regex: regexp.MustCompile(`(?i)(?:ignore|disregard|forget|override|bypass)\s+(?:all\s+)?(?:previous|prior|above|earlier|your|the|system)\s+(?:instructions?|rules?|prompts?|guidelines?|constraints?|directives?)`),
Severity: SeverityHigh,
},
{
Name: "new-instructions",
Description: "Attempt to inject new system-level instructions",
Regex: regexp.MustCompile(`(?i)(?:new|updated?|revised?|actual|real)\s+(?:system\s+)?instructions?:\s`),
Severity: SeverityHigh,
},
{
Name: "you-are-now",
Description: "Attempt to redefine the bot's identity",
Regex: regexp.MustCompile(`(?i)(?:you\s+are\s+now|from\s+now\s+on\s+you\s+are|act\s+as\s+if\s+you\s+were|pretend\s+(?:to\s+be|you\s+are))\s`),
Severity: SeverityMedium,
},
// ── Prompt exfiltration ────────────────────────────────────────
{
Name: "exfiltrate-prompt",
Description: "Attempt to extract the system prompt",
Regex: regexp.MustCompile(`(?i)(?:repeat|show|display|print|output|reveal|tell\s+me|give\s+me|show\s+me|what\s+(?:is|are))\s+(?:your\s+)?(?:system\s+)?(?:prompt|instructions?|rules?|guidelines?|initial\s+message)`),
Severity: SeverityMedium,
},
// ── Developer mode / jailbreak ─────────────────────────────────
{
Name: "developer-mode",
Description: "Attempt to enable a fictional unrestricted mode",
Regex: regexp.MustCompile(`(?i)(?:enable|activate|enter|switch\s+to)\s+(?:developer|debug|admin|god|sudo|unrestricted|jailbreak|dan)\s+mode`),
Severity: SeverityHigh,
},
{
Name: "do-anything-now",
Description: "DAN (Do Anything Now) jailbreak pattern",
Regex: regexp.MustCompile(`(?i)(?:do\s+anything\s+now|DAN\s+mode|you\s+(?:can|must)\s+do\s+anything)`),
Severity: SeverityHigh,
},
// ── Tool abuse hints ───────────────────────────────────────────
{
Name: "tool-abuse-ssh",
Description: "Attempt to execute dangerous commands via SSH",
Regex: regexp.MustCompile(`(?i)(?:use|call|execute|run)\s+(?:the\s+)?(?:ssh|command)\s+tool\s+(?:to\s+)?(?:run|execute|do)\s`),
Severity: SeverityLow,
},
// ── Encoding evasion ───────────────────────────────────────────
{
Name: "base64-instruction",
Description: "Base64-encoded instruction injection",
Regex: regexp.MustCompile(`(?i)(?:decode|execute|interpret|run)\s+(?:this\s+)?(?:base64|b64|encoded)[\s:]+[A-Za-z0-9+/]{20,}={0,2}`),
Severity: SeverityMedium,
},
// ── Spanish variants ───────────────────────────────────────────
{
Name: "ignore-instructions-es",
Description: "Spanish: attempt to override instructions",
Regex: regexp.MustCompile(`(?i)(?:ignora|olvida|descarta)\s+(?:todas?\s+)?(?:las?\s+)?(?:instrucciones?|reglas?|directivas?|restricciones?)\s+(?:anteriores?|previas?|del\s+sistema)`),
Severity: SeverityHigh,
},
{
Name: "you-are-now-es",
Description: "Spanish: attempt to redefine identity",
Regex: regexp.MustCompile(`(?i)(?:ahora\s+eres|a\s+partir\s+de\s+ahora\s+eres|finge\s+(?:ser|que\s+eres)|actua\s+como\s+si\s+fueras)\s`),
Severity: SeverityMedium,
},
{
Name: "exfiltrate-prompt-es",
Description: "Spanish: attempt to extract system prompt",
Regex: regexp.MustCompile(`(?i)(?:repite|muestra|muestrame|dime|dame|cual\s+es)\s+(?:tus?\s+)?(?:prompt|instrucciones?|reglas?|mensaje\s+(?:de\s+sistema|inicial))`),
Severity: SeverityMedium,
},
}
}