diff --git a/agents/runtime.go b/agents/runtime.go index 56dad09..fa21eb7 100644 --- a/agents/runtime.go +++ b/agents/runtime.go @@ -22,6 +22,7 @@ import ( "github.com/enmanuel/agents/pkg/memory" "github.com/enmanuel/agents/pkg/orchestration" "github.com/enmanuel/agents/pkg/personality" + "github.com/enmanuel/agents/pkg/sanitize" "github.com/enmanuel/agents/shell/bus" "github.com/enmanuel/agents/shell/effects" shellknowledge "github.com/enmanuel/agents/shell/knowledge" @@ -83,6 +84,9 @@ type Agent struct { // Knowledge store — non-nil when knowledge is enabled knowledgeStore *shellknowledge.FileStore + // Sanitization options — nil when sanitization is disabled + sanitizeOpts *sanitize.Options + // Bus — set via SetBus() when running under the unified launcher agentBus *bus.Bus } @@ -246,6 +250,20 @@ func New(cfg *config.AgentConfig, rules []decision.Rule, logger *slog.Logger) (* roomCtx: roomCtx, } + // Configure sanitization if enabled + if cfg.Security.Sanitize.Enabled { + minSev := parseSeverity(cfg.Security.Sanitize.MinSeverity) + a.sanitizeOpts = &sanitize.Options{ + Mode: sanitize.ParseMode(cfg.Security.Sanitize.Mode), + MinSeverity: minSev, + DisabledPatterns: cfg.Security.Sanitize.DisabledPatterns, + } + logger.Info("input sanitization enabled", + "mode", a.sanitizeOpts.Mode, + "min_severity", minSev, + ) + } + // Register built-in command handlers a.registerBuiltinCommands() @@ -424,6 +442,16 @@ func (a *Agent) handleTaskEvent(ctx context.Context, msg bus.AgentMessage) { "\n\nPlease provide an improved or complementary answer." } + // Sanitize orchestrated input + sanitized, rejected := a.sanitizeInput(msgCtx.Content, roomID, msgCtx.SenderID) + if rejected { + a.logger.Warn("orchestrated task rejected by sanitizer", + "task_id", task.TaskID, "sender", task.OriginalSender) + _ = a.matrix.SendMarkdown(ctx, roomID, "El mensaje fue rechazado por el filtro de seguridad.") + return + } + msgCtx.Content = sanitized + // Load memory and run LLM a.ensureWindowLoaded(ctx, roomID) a.appendToWindow(roomID, coretypes.Message{ @@ -580,6 +608,17 @@ func (a *Agent) handleEvent(ctx context.Context, msgCtx decision.MessageContext, // executeActions expands LLM actions and runs the effects runner. func (a *Agent) executeActions(ctx context.Context, roomID string, msgCtx decision.MessageContext, actions []decision.Action) { + // Sanitize user input before sending to LLM + sanitized, rejected := a.sanitizeInput(msgCtx.Content, roomID, msgCtx.SenderID) + if rejected { + a.runner.Execute(ctx, roomID, []decision.Action{{ + Kind: decision.ActionKindReply, + Reply: &decision.ReplyAction{Content: "Tu mensaje fue rechazado por el filtro de seguridad.", InReplyTo: msgCtx.EventID}, + }}) + return + } + msgCtx.Content = sanitized + expanded := make([]decision.Action, 0, len(actions)) for _, act := range actions { if act.Kind == decision.ActionKindLLM { @@ -806,6 +845,40 @@ func (a *Agent) persistMessage(ctx context.Context, roomID string, role coretype } } +// parseSeverity converts a config string to sanitize.Severity. +func parseSeverity(s string) sanitize.Severity { + switch s { + case "high": + return sanitize.SeverityHigh + case "low": + return sanitize.SeverityLow + default: + return sanitize.SeverityMedium + } +} + +// sanitizeInput runs prompt injection detection on the message content. +// Returns the (possibly modified) content and true if the message should be rejected. +func (a *Agent) sanitizeInput(content, roomID, senderID string) (string, bool) { + if a.sanitizeOpts == nil { + return content, false + } + + result := sanitize.Sanitize(content, *a.sanitizeOpts) + + for _, w := range result.Warnings { + a.logger.Warn("prompt_injection_detected", + "pattern", w.PatternName, + "severity", w.Severity, + "matched", w.Matched, + "sender", senderID, + "room", roomID, + ) + } + + return result.Output, result.Rejected +} + // buildToolRegistry creates a Registry with tools enabled in the agent's config. func buildToolRegistry( cfg *config.AgentConfig, diff --git a/internal/config/schema.go b/internal/config/schema.go index 0d209cc..a7f2fae 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -280,9 +280,18 @@ type SSHTargetCfg struct { // ── Security ────────────────────────────────────────────────────────────── type SecurityCfg struct { - Roles map[string]RoleCfg `yaml:"roles"` - Audit AuditCfg `yaml:"audit"` - Secrets SecretsCfg `yaml:"secrets"` + Roles map[string]RoleCfg `yaml:"roles"` + Audit AuditCfg `yaml:"audit"` + Secrets SecretsCfg `yaml:"secrets"` + Sanitize SanitizeCfg `yaml:"sanitize"` +} + +// SanitizeCfg controls prompt injection detection on incoming messages. +type SanitizeCfg struct { + Enabled bool `yaml:"enabled"` // enable sanitization (default false) + Mode string `yaml:"mode"` // warn | strip | reject (default warn) + MinSeverity string `yaml:"min_severity"` // low | medium | high (default medium) + DisabledPatterns []string `yaml:"disabled_patterns"` // pattern names to skip } type RoleCfg struct {