feat: integrar sanitizacion de input en runtime y config
- agents/runtime.go: campo sanitizeOpts en Agent, sanitizeInput() que llama a sanitize.Sanitize() y loguea warnings. Integrado en executeActions() y handleTaskEvent() antes de enviar al LLM. En modo reject, responde al usuario y corta el flujo. - internal/config/schema.go: nuevo tipo SanitizeCfg dentro de SecurityCfg con campos enabled, mode, min_severity, disabled_patterns. Protegido por feature flag prompt-injection-hardening (OFF). Se activa por agente via security.sanitize.enabled en config.yaml. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -22,6 +22,7 @@ import (
|
|||||||
"github.com/enmanuel/agents/pkg/memory"
|
"github.com/enmanuel/agents/pkg/memory"
|
||||||
"github.com/enmanuel/agents/pkg/orchestration"
|
"github.com/enmanuel/agents/pkg/orchestration"
|
||||||
"github.com/enmanuel/agents/pkg/personality"
|
"github.com/enmanuel/agents/pkg/personality"
|
||||||
|
"github.com/enmanuel/agents/pkg/sanitize"
|
||||||
"github.com/enmanuel/agents/shell/bus"
|
"github.com/enmanuel/agents/shell/bus"
|
||||||
"github.com/enmanuel/agents/shell/effects"
|
"github.com/enmanuel/agents/shell/effects"
|
||||||
shellknowledge "github.com/enmanuel/agents/shell/knowledge"
|
shellknowledge "github.com/enmanuel/agents/shell/knowledge"
|
||||||
@@ -83,6 +84,9 @@ type Agent struct {
|
|||||||
// Knowledge store — non-nil when knowledge is enabled
|
// Knowledge store — non-nil when knowledge is enabled
|
||||||
knowledgeStore *shellknowledge.FileStore
|
knowledgeStore *shellknowledge.FileStore
|
||||||
|
|
||||||
|
// Sanitization options — nil when sanitization is disabled
|
||||||
|
sanitizeOpts *sanitize.Options
|
||||||
|
|
||||||
// Bus — set via SetBus() when running under the unified launcher
|
// Bus — set via SetBus() when running under the unified launcher
|
||||||
agentBus *bus.Bus
|
agentBus *bus.Bus
|
||||||
}
|
}
|
||||||
@@ -246,6 +250,20 @@ func New(cfg *config.AgentConfig, rules []decision.Rule, logger *slog.Logger) (*
|
|||||||
roomCtx: roomCtx,
|
roomCtx: roomCtx,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Configure sanitization if enabled
|
||||||
|
if cfg.Security.Sanitize.Enabled {
|
||||||
|
minSev := parseSeverity(cfg.Security.Sanitize.MinSeverity)
|
||||||
|
a.sanitizeOpts = &sanitize.Options{
|
||||||
|
Mode: sanitize.ParseMode(cfg.Security.Sanitize.Mode),
|
||||||
|
MinSeverity: minSev,
|
||||||
|
DisabledPatterns: cfg.Security.Sanitize.DisabledPatterns,
|
||||||
|
}
|
||||||
|
logger.Info("input sanitization enabled",
|
||||||
|
"mode", a.sanitizeOpts.Mode,
|
||||||
|
"min_severity", minSev,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// Register built-in command handlers
|
// Register built-in command handlers
|
||||||
a.registerBuiltinCommands()
|
a.registerBuiltinCommands()
|
||||||
|
|
||||||
@@ -424,6 +442,16 @@ func (a *Agent) handleTaskEvent(ctx context.Context, msg bus.AgentMessage) {
|
|||||||
"\n\nPlease provide an improved or complementary answer."
|
"\n\nPlease provide an improved or complementary answer."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sanitize orchestrated input
|
||||||
|
sanitized, rejected := a.sanitizeInput(msgCtx.Content, roomID, msgCtx.SenderID)
|
||||||
|
if rejected {
|
||||||
|
a.logger.Warn("orchestrated task rejected by sanitizer",
|
||||||
|
"task_id", task.TaskID, "sender", task.OriginalSender)
|
||||||
|
_ = a.matrix.SendMarkdown(ctx, roomID, "El mensaje fue rechazado por el filtro de seguridad.")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
msgCtx.Content = sanitized
|
||||||
|
|
||||||
// Load memory and run LLM
|
// Load memory and run LLM
|
||||||
a.ensureWindowLoaded(ctx, roomID)
|
a.ensureWindowLoaded(ctx, roomID)
|
||||||
a.appendToWindow(roomID, coretypes.Message{
|
a.appendToWindow(roomID, coretypes.Message{
|
||||||
@@ -580,6 +608,17 @@ func (a *Agent) handleEvent(ctx context.Context, msgCtx decision.MessageContext,
|
|||||||
|
|
||||||
// executeActions expands LLM actions and runs the effects runner.
|
// executeActions expands LLM actions and runs the effects runner.
|
||||||
func (a *Agent) executeActions(ctx context.Context, roomID string, msgCtx decision.MessageContext, actions []decision.Action) {
|
func (a *Agent) executeActions(ctx context.Context, roomID string, msgCtx decision.MessageContext, actions []decision.Action) {
|
||||||
|
// Sanitize user input before sending to LLM
|
||||||
|
sanitized, rejected := a.sanitizeInput(msgCtx.Content, roomID, msgCtx.SenderID)
|
||||||
|
if rejected {
|
||||||
|
a.runner.Execute(ctx, roomID, []decision.Action{{
|
||||||
|
Kind: decision.ActionKindReply,
|
||||||
|
Reply: &decision.ReplyAction{Content: "Tu mensaje fue rechazado por el filtro de seguridad.", InReplyTo: msgCtx.EventID},
|
||||||
|
}})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
msgCtx.Content = sanitized
|
||||||
|
|
||||||
expanded := make([]decision.Action, 0, len(actions))
|
expanded := make([]decision.Action, 0, len(actions))
|
||||||
for _, act := range actions {
|
for _, act := range actions {
|
||||||
if act.Kind == decision.ActionKindLLM {
|
if act.Kind == decision.ActionKindLLM {
|
||||||
@@ -806,6 +845,40 @@ func (a *Agent) persistMessage(ctx context.Context, roomID string, role coretype
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseSeverity converts a config string to sanitize.Severity.
|
||||||
|
func parseSeverity(s string) sanitize.Severity {
|
||||||
|
switch s {
|
||||||
|
case "high":
|
||||||
|
return sanitize.SeverityHigh
|
||||||
|
case "low":
|
||||||
|
return sanitize.SeverityLow
|
||||||
|
default:
|
||||||
|
return sanitize.SeverityMedium
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// sanitizeInput runs prompt injection detection on the message content.
|
||||||
|
// Returns the (possibly modified) content and true if the message should be rejected.
|
||||||
|
func (a *Agent) sanitizeInput(content, roomID, senderID string) (string, bool) {
|
||||||
|
if a.sanitizeOpts == nil {
|
||||||
|
return content, false
|
||||||
|
}
|
||||||
|
|
||||||
|
result := sanitize.Sanitize(content, *a.sanitizeOpts)
|
||||||
|
|
||||||
|
for _, w := range result.Warnings {
|
||||||
|
a.logger.Warn("prompt_injection_detected",
|
||||||
|
"pattern", w.PatternName,
|
||||||
|
"severity", w.Severity,
|
||||||
|
"matched", w.Matched,
|
||||||
|
"sender", senderID,
|
||||||
|
"room", roomID,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.Output, result.Rejected
|
||||||
|
}
|
||||||
|
|
||||||
// buildToolRegistry creates a Registry with tools enabled in the agent's config.
|
// buildToolRegistry creates a Registry with tools enabled in the agent's config.
|
||||||
func buildToolRegistry(
|
func buildToolRegistry(
|
||||||
cfg *config.AgentConfig,
|
cfg *config.AgentConfig,
|
||||||
|
|||||||
@@ -280,9 +280,18 @@ type SSHTargetCfg struct {
|
|||||||
// ── Security ──────────────────────────────────────────────────────────────
|
// ── Security ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
type SecurityCfg struct {
|
type SecurityCfg struct {
|
||||||
Roles map[string]RoleCfg `yaml:"roles"`
|
Roles map[string]RoleCfg `yaml:"roles"`
|
||||||
Audit AuditCfg `yaml:"audit"`
|
Audit AuditCfg `yaml:"audit"`
|
||||||
Secrets SecretsCfg `yaml:"secrets"`
|
Secrets SecretsCfg `yaml:"secrets"`
|
||||||
|
Sanitize SanitizeCfg `yaml:"sanitize"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SanitizeCfg controls prompt injection detection on incoming messages.
|
||||||
|
type SanitizeCfg struct {
|
||||||
|
Enabled bool `yaml:"enabled"` // enable sanitization (default false)
|
||||||
|
Mode string `yaml:"mode"` // warn | strip | reject (default warn)
|
||||||
|
MinSeverity string `yaml:"min_severity"` // low | medium | high (default medium)
|
||||||
|
DisabledPatterns []string `yaml:"disabled_patterns"` // pattern names to skip
|
||||||
}
|
}
|
||||||
|
|
||||||
type RoleCfg struct {
|
type RoleCfg struct {
|
||||||
|
|||||||
Reference in New Issue
Block a user