Repo iniciado
This commit is contained in:
@@ -0,0 +1,111 @@
|
||||
// Package devops defines the rules and composition for the devops agent.
|
||||
package devops
|
||||
|
||||
import (
|
||||
"github.com/enmanuel/agents/pkg/decision"
|
||||
"github.com/enmanuel/agents/pkg/tools"
|
||||
)
|
||||
|
||||
// Rules returns the decision rules for the devops agent.
|
||||
// These are pure data — no side effects.
|
||||
func Rules() []decision.Rule {
|
||||
return []decision.Rule{
|
||||
{
|
||||
Name: "help",
|
||||
Match: decision.MatchCommand("help"),
|
||||
Actions: []decision.Action{{
|
||||
Kind: decision.ActionKindReply,
|
||||
Reply: &decision.ReplyAction{
|
||||
Content: "**DevOps Agent** — comandos disponibles:\n" +
|
||||
"- `!status <target>` — estado del target\n" +
|
||||
"- `!deploy <env>` — deployment en el environment\n" +
|
||||
"- `!rollback <env>` — rollback del último deploy\n" +
|
||||
"- `!logs <target>` — últimas líneas de log\n" +
|
||||
"- `!healthcheck` — health check de producción",
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
Name: "healthcheck",
|
||||
Match: decision.MatchCommand("healthcheck"),
|
||||
Actions: []decision.Action{{
|
||||
Kind: decision.ActionKindSSH,
|
||||
SSH: &tools.SSHCommandSpec{
|
||||
Target: "production",
|
||||
Command: "/opt/scripts/healthcheck.sh",
|
||||
Timeout: "30s",
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
Name: "status",
|
||||
Match: decision.MatchCommand("status"),
|
||||
Actions: []decision.Action{{
|
||||
Kind: decision.ActionKindSSH,
|
||||
SSH: &tools.SSHCommandSpec{
|
||||
Target: "monitoring",
|
||||
Command: "systemctl status --no-pager",
|
||||
Timeout: "15s",
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
Name: "deploy-staging",
|
||||
Match: decision.And(decision.MatchCommand("deploy"), func(ctx decision.MessageContext) bool {
|
||||
return len(ctx.Args) > 0 && ctx.Args[0] == "staging"
|
||||
}),
|
||||
Actions: []decision.Action{{
|
||||
Kind: decision.ActionKindSSH,
|
||||
SSH: &tools.SSHCommandSpec{
|
||||
Target: "staging",
|
||||
Command: "cd /app && git pull origin main && systemctl restart app",
|
||||
Timeout: "60s",
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
Name: "deploy-production",
|
||||
Match: decision.And(
|
||||
decision.MatchCommand("deploy"),
|
||||
decision.MatchMinPowerLevel(50),
|
||||
func(ctx decision.MessageContext) bool {
|
||||
return len(ctx.Args) > 0 && ctx.Args[0] == "production"
|
||||
},
|
||||
),
|
||||
Actions: []decision.Action{{
|
||||
Kind: decision.ActionKindSSH,
|
||||
SSH: &tools.SSHCommandSpec{
|
||||
Target: "production",
|
||||
Command: "cd /app && git pull origin main && systemctl restart app",
|
||||
Timeout: "120s",
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
Name: "logs",
|
||||
Match: decision.MatchCommand("logs"),
|
||||
Actions: []decision.Action{{
|
||||
Kind: decision.ActionKindSSH,
|
||||
SSH: &tools.SSHCommandSpec{
|
||||
Target: "production",
|
||||
Command: "journalctl -u app -n 50 --no-pager",
|
||||
Timeout: "15s",
|
||||
},
|
||||
}},
|
||||
},
|
||||
// Fallback: anything else goes to LLM
|
||||
{
|
||||
Name: "llm-fallback",
|
||||
Match: decision.And(
|
||||
decision.MatchAny(),
|
||||
func(ctx decision.MessageContext) bool {
|
||||
return ctx.Command == "" && (ctx.IsMention || ctx.IsDirectMsg)
|
||||
},
|
||||
),
|
||||
Actions: []decision.Action{{
|
||||
Kind: decision.ActionKindLLM,
|
||||
LLM: &decision.LLMAction{},
|
||||
}},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,334 @@
|
||||
# ============================================
|
||||
# IDENTIDAD
|
||||
# ============================================
|
||||
agent:
|
||||
id: devops-bot
|
||||
name: "DevOps Agent"
|
||||
version: "1.0.0"
|
||||
enabled: true
|
||||
description: "Gestiona deployments, monitoreo y salud de infraestructura"
|
||||
tags: [devops, infrastructure, deployment]
|
||||
|
||||
# ============================================
|
||||
# PERSONALIDAD Y COMPORTAMIENTO
|
||||
# ============================================
|
||||
personality:
|
||||
tone: direct # direct | friendly | formal | casual | technical
|
||||
verbosity: concise # minimal | concise | detailed | verbose
|
||||
language: es
|
||||
languages_supported: [es, en]
|
||||
emoji_style: moderate # none | minimal | moderate | heavy
|
||||
prefix: "🔧"
|
||||
error_style: helpful # terse | helpful | detailed
|
||||
|
||||
templates:
|
||||
greeting: "Listo para operar. ¿Qué necesitas?"
|
||||
unknown_command: "No reconozco eso. Usa `!help` para ver comandos."
|
||||
permission_denied: "No tienes permisos para eso."
|
||||
error: "Algo falló: {{.Error}}"
|
||||
success: "Hecho. {{.Summary}}"
|
||||
busy: "Estoy ejecutando otra tarea ahora. Espera o usa `!queue`."
|
||||
|
||||
behavior:
|
||||
proactive: false
|
||||
ask_confirmation: true
|
||||
show_reasoning: false
|
||||
thread_replies: true
|
||||
typing_indicator: true
|
||||
acknowledge_receipt: true
|
||||
|
||||
# ============================================
|
||||
# LLM — CONEXIÓN Y RAZONAMIENTO
|
||||
# ============================================
|
||||
llm:
|
||||
primary:
|
||||
provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
api_key_env: ANTHROPIC_API_KEY
|
||||
base_url: ""
|
||||
max_tokens: 4096
|
||||
temperature: 0.3
|
||||
|
||||
fallback:
|
||||
provider: ollama
|
||||
model: llama3
|
||||
base_url: "http://localhost:11434/v1"
|
||||
max_tokens: 2048
|
||||
temperature: 0.5
|
||||
|
||||
reasoning:
|
||||
system_prompt_file: "prompts/devops-system.md"
|
||||
context_window: 8192
|
||||
memory_messages: 20
|
||||
|
||||
tool_use:
|
||||
enabled: true
|
||||
max_iterations: 5
|
||||
parallel_calls: false
|
||||
|
||||
rate_limit:
|
||||
requests_per_minute: 30
|
||||
tokens_per_minute: 100000
|
||||
concurrent_requests: 3
|
||||
|
||||
# ============================================
|
||||
# TOOLS — CAPACIDADES DISPONIBLES
|
||||
# ============================================
|
||||
tools:
|
||||
ssh:
|
||||
enabled: true
|
||||
allowed_targets: [production, staging, monitoring]
|
||||
forbidden_commands:
|
||||
- "rm -rf /"
|
||||
- "dd if="
|
||||
- "mkfs"
|
||||
timeout: 30s
|
||||
max_concurrent: 3
|
||||
require_confirmation:
|
||||
- production
|
||||
|
||||
http:
|
||||
enabled: true
|
||||
allowed_domains:
|
||||
- "api.github.com"
|
||||
- "api.gitea.internal"
|
||||
- "grafana.internal"
|
||||
timeout: 15s
|
||||
max_retries: 2
|
||||
|
||||
scripts:
|
||||
enabled: true
|
||||
scripts_dir: "./scripts/"
|
||||
allowed:
|
||||
- "deploy.sh"
|
||||
- "healthcheck.sh"
|
||||
- "rollback.sh"
|
||||
timeout: 120s
|
||||
sandbox: false
|
||||
|
||||
file_ops:
|
||||
enabled: false
|
||||
allowed_paths: ["/var/log/", "/tmp/reports/"]
|
||||
read_only: true
|
||||
|
||||
mcp:
|
||||
enabled: true
|
||||
servers:
|
||||
- name: github
|
||||
url: "stdio://mcp-github"
|
||||
tools: ["create_issue", "list_prs", "merge_pr"]
|
||||
- name: filesystem
|
||||
url: "stdio://mcp-filesystem"
|
||||
tools: ["read_file", "list_dir"]
|
||||
expose:
|
||||
port: 9100
|
||||
tools: ["deploy", "status", "rollback"]
|
||||
|
||||
# ============================================
|
||||
# MATRIX — CONEXIÓN Y ROOMS
|
||||
# ============================================
|
||||
matrix:
|
||||
homeserver: "${MATRIX_HOMESERVER}"
|
||||
user_id: "@devops-bot:${MATRIX_SERVER_NAME}"
|
||||
access_token_env: MATRIX_TOKEN_DEVOPS
|
||||
device_id: "DEVOPSBOT01"
|
||||
|
||||
encryption:
|
||||
enabled: false # habilitar cuando E2EE esté configurado
|
||||
store_path: "./data/crypto/"
|
||||
trust_mode: tofu
|
||||
|
||||
rooms:
|
||||
listen:
|
||||
- "${MATRIX_ROOM_DEVOPS}"
|
||||
- "${MATRIX_ROOM_ALERTS}"
|
||||
respond:
|
||||
- "${MATRIX_ROOM_DEVOPS}"
|
||||
- "${MATRIX_ROOM_LOGS}"
|
||||
admin:
|
||||
- "${MATRIX_ROOM_ADMIN}"
|
||||
|
||||
filters:
|
||||
command_prefix: "!"
|
||||
mention_respond: true
|
||||
dm_respond: true
|
||||
ignore_bots: true
|
||||
ignore_users: []
|
||||
min_power_level: 0
|
||||
|
||||
# ============================================
|
||||
# COMUNICACIÓN INTER-AGENTES
|
||||
# ============================================
|
||||
agents:
|
||||
peers:
|
||||
- id: monitor-bot
|
||||
capabilities: [alerts, metrics, healthcheck]
|
||||
room: "${MATRIX_ROOM_AGENTS_INTERNAL}"
|
||||
- id: assistant-bot
|
||||
capabilities: [search, summarize, translate]
|
||||
room: "${MATRIX_ROOM_AGENTS_INTERNAL}"
|
||||
|
||||
delegation:
|
||||
enabled: true
|
||||
can_delegate_to: [monitor-bot, assistant-bot]
|
||||
can_receive_from: [assistant-bot]
|
||||
max_delegation_depth: 2
|
||||
timeout: 60s
|
||||
|
||||
protocol:
|
||||
format: json
|
||||
channel: matrix
|
||||
heartbeat_interval: 30s
|
||||
|
||||
# ============================================
|
||||
# SSH — INVENTARIO DE SERVIDORES
|
||||
# ============================================
|
||||
ssh:
|
||||
defaults:
|
||||
user: deploy
|
||||
port: 22
|
||||
key_file_env: SSH_PRIVATE_KEY_PATH
|
||||
known_hosts: "./data/known_hosts"
|
||||
keepalive_interval: 15s
|
||||
timeout: 10s
|
||||
|
||||
targets:
|
||||
production:
|
||||
hosts: ["${PROD_HOST_1}", "${PROD_HOST_2}"]
|
||||
user: deploy
|
||||
jump_host: "${BASTION_HOST}"
|
||||
|
||||
staging:
|
||||
hosts: ["${STAGING_HOST}"]
|
||||
user: deploy
|
||||
|
||||
monitoring:
|
||||
hosts: ["${MONITORING_HOST}"]
|
||||
user: monitor
|
||||
key_file_env: SSH_MONITOR_KEY_PATH
|
||||
|
||||
# ============================================
|
||||
# PERMISOS Y SEGURIDAD
|
||||
# ============================================
|
||||
security:
|
||||
roles:
|
||||
admin:
|
||||
users: ["@admin:${MATRIX_SERVER_NAME}"]
|
||||
actions: ["*"]
|
||||
developer:
|
||||
users: ["@dev1:${MATRIX_SERVER_NAME}", "@dev2:${MATRIX_SERVER_NAME}"]
|
||||
actions: ["deploy:staging", "status:*", "logs:*"]
|
||||
viewer:
|
||||
users: ["*"]
|
||||
actions: ["status:*", "help"]
|
||||
|
||||
audit:
|
||||
enabled: true
|
||||
log_file: "./data/audit.log"
|
||||
log_to_room: "${MATRIX_ROOM_AUDIT}"
|
||||
include: [ssh, deploy, config_change]
|
||||
|
||||
secrets:
|
||||
provider: env
|
||||
|
||||
# ============================================
|
||||
# SCHEDULING Y TAREAS AUTOMÁTICAS
|
||||
# ============================================
|
||||
schedules:
|
||||
- name: healthcheck
|
||||
cron: "*/5 * * * *"
|
||||
action:
|
||||
kind: ssh
|
||||
target: production
|
||||
command: "/opt/scripts/healthcheck.sh"
|
||||
on_failure:
|
||||
notify_room: "${MATRIX_ROOM_ALERTS}"
|
||||
escalate_to: "@admin:${MATRIX_SERVER_NAME}"
|
||||
|
||||
- name: daily-report
|
||||
cron: "0 9 * * *"
|
||||
action:
|
||||
kind: script
|
||||
script: "daily-report.sh"
|
||||
output_room: "${MATRIX_ROOM_DEVOPS}"
|
||||
|
||||
- name: backup-check
|
||||
cron: "0 */6 * * *"
|
||||
action:
|
||||
kind: ssh
|
||||
target: production
|
||||
command: "/opt/scripts/check-backups.sh"
|
||||
on_failure:
|
||||
notify_room: "${MATRIX_ROOM_ALERTS}"
|
||||
escalate_to: "@admin:${MATRIX_SERVER_NAME}"
|
||||
|
||||
# ============================================
|
||||
# OBSERVABILIDAD
|
||||
# ============================================
|
||||
observability:
|
||||
logging:
|
||||
level: info
|
||||
format: json
|
||||
output: stdout
|
||||
file: "./data/agent.log"
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
port: 9090
|
||||
path: /metrics
|
||||
export: prometheus
|
||||
|
||||
health:
|
||||
enabled: true
|
||||
port: 8080
|
||||
path: /healthz
|
||||
|
||||
tracing:
|
||||
enabled: false
|
||||
provider: jaeger
|
||||
endpoint: "http://jaeger:14268/api/traces"
|
||||
|
||||
# ============================================
|
||||
# RESILIENCIA
|
||||
# ============================================
|
||||
resilience:
|
||||
circuit_breaker:
|
||||
failure_threshold: 5
|
||||
timeout: 30s
|
||||
half_open_max: 2
|
||||
|
||||
retry:
|
||||
max_attempts: 3
|
||||
backoff: exponential
|
||||
initial_delay: 1s
|
||||
max_delay: 30s
|
||||
|
||||
shutdown:
|
||||
timeout: 15s
|
||||
drain_messages: true
|
||||
save_state: true
|
||||
state_file: "./data/state.json"
|
||||
|
||||
queue:
|
||||
enabled: true
|
||||
max_size: 50
|
||||
priority_users: ["@admin:${MATRIX_SERVER_NAME}"]
|
||||
|
||||
# ============================================
|
||||
# ALMACENAMIENTO Y ESTADO
|
||||
# ============================================
|
||||
storage:
|
||||
state:
|
||||
backend: sqlite
|
||||
path: "./data/agent.db"
|
||||
|
||||
cache:
|
||||
enabled: true
|
||||
backend: memory
|
||||
ttl: 10m
|
||||
max_entries: 500
|
||||
|
||||
history:
|
||||
backend: sqlite
|
||||
path: "./data/history.db"
|
||||
retention: 720h # 30 days
|
||||
@@ -0,0 +1,24 @@
|
||||
# DevOps Agent — System Prompt
|
||||
|
||||
Eres un agente de DevOps especializado en gestión de infraestructura y deployments.
|
||||
|
||||
## Rol y responsabilidades
|
||||
- Ejecutar deployments en staging y producción con confirmación cuando sea necesario
|
||||
- Monitorear el estado de los servicios y reportar anomalías
|
||||
- Ejecutar scripts de mantenimiento y salud del sistema
|
||||
- Coordinar con otros agentes (monitor-bot, assistant-bot) cuando la tarea lo requiera
|
||||
|
||||
## Estilo de comunicación
|
||||
- Respuestas directas y técnicas
|
||||
- Usar listas para pasos de procedimientos
|
||||
- Reportar siempre exit codes y stderr relevante
|
||||
- Confirmar antes de acciones destructivas en producción
|
||||
|
||||
## Restricciones
|
||||
- NUNCA ejecutar comandos que modifiquen datos de usuarios sin confirmación explícita
|
||||
- NUNCA ejecutar comandos que puedan causar downtime sin coordinación previa
|
||||
- Rechazar solicitudes de acceso a sistemas no listados en el inventario SSH
|
||||
- Reportar inmediatamente cualquier error inesperado
|
||||
|
||||
## Formato de respuesta
|
||||
Usa markdown cuando sea útil. Para output de comandos, usa bloques de código con el shell apropiado.
|
||||
@@ -0,0 +1,145 @@
|
||||
// Package agents defines the Agent runtime that ties core and shell together.
|
||||
package agents
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
|
||||
"maunium.net/go/mautrix/event"
|
||||
|
||||
"github.com/enmanuel/agents/internal/config"
|
||||
"github.com/enmanuel/agents/pkg/decision"
|
||||
coretypes "github.com/enmanuel/agents/pkg/llm"
|
||||
"github.com/enmanuel/agents/pkg/personality"
|
||||
"github.com/enmanuel/agents/shell/effects"
|
||||
shelllm "github.com/enmanuel/agents/shell/llm"
|
||||
"github.com/enmanuel/agents/shell/matrix"
|
||||
"github.com/enmanuel/agents/shell/ssh"
|
||||
)
|
||||
|
||||
// Agent is the assembled runtime: pure core + impure shell.
|
||||
type Agent struct {
|
||||
cfg *config.AgentConfig
|
||||
personality personality.Personality
|
||||
rules []decision.Rule
|
||||
llm coretypes.CompleteFunc
|
||||
matrix *matrix.Client
|
||||
runner *effects.Runner
|
||||
listener *matrix.Listener
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// New assembles an Agent from its config, rules, and logger.
|
||||
func New(cfg *config.AgentConfig, rules []decision.Rule, logger *slog.Logger) (*Agent, error) {
|
||||
// Matrix client
|
||||
matrixClient, err := matrix.New(cfg.Matrix)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("matrix client: %w", err)
|
||||
}
|
||||
|
||||
// SSH executor
|
||||
sshExec := ssh.NewExecutor(cfg.SSH)
|
||||
|
||||
// LLM client
|
||||
primaryLLM, err := shelllm.FromConfig(cfg.LLM.Primary)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("primary LLM: %w", err)
|
||||
}
|
||||
|
||||
var llmFunc coretypes.CompleteFunc = primaryLLM
|
||||
if cfg.LLM.Fallback.Provider != "" {
|
||||
fallbackLLM, err := shelllm.FromConfig(cfg.LLM.Fallback)
|
||||
if err != nil {
|
||||
logger.Warn("fallback LLM config error", "err", err)
|
||||
} else {
|
||||
llmFunc = shelllm.WithFallback(primaryLLM, fallbackLLM)
|
||||
}
|
||||
}
|
||||
|
||||
// Effects runner
|
||||
runner := effects.NewRunner(matrixClient, sshExec, logger)
|
||||
|
||||
a := &Agent{
|
||||
cfg: cfg,
|
||||
rules: rules,
|
||||
llm: llmFunc,
|
||||
matrix: matrixClient,
|
||||
runner: runner,
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
// Matrix event listener
|
||||
a.listener = matrix.NewListener(matrixClient, cfg.Matrix, a.handleEvent, logger)
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
// Run starts the agent sync loop. Blocks until ctx is cancelled.
|
||||
func (a *Agent) Run(ctx context.Context) error {
|
||||
a.logger.Info("agent starting", "id", a.cfg.Agent.ID, "name", a.cfg.Agent.Name)
|
||||
return a.listener.Run(ctx)
|
||||
}
|
||||
|
||||
// handleEvent is called by the matrix Listener for each filtered incoming event.
|
||||
func (a *Agent) handleEvent(ctx context.Context, msgCtx decision.MessageContext, evt *event.Event) {
|
||||
if a.cfg.Personality.Behavior.TypingIndicator {
|
||||
_ = a.matrix.SendTyping(ctx, evt.RoomID.String(), true)
|
||||
defer a.matrix.SendTyping(ctx, evt.RoomID.String(), false)
|
||||
}
|
||||
|
||||
actions := decision.Evaluate(msgCtx, a.rules)
|
||||
|
||||
// If no rules matched and the message mentions the bot or is a DM, use LLM.
|
||||
if len(actions) == 0 && (msgCtx.IsMention || msgCtx.IsDirectMsg) {
|
||||
actions = []decision.Action{{
|
||||
Kind: decision.ActionKindLLM,
|
||||
LLM: &decision.LLMAction{ContextKey: msgCtx.RoomID},
|
||||
}}
|
||||
}
|
||||
|
||||
if len(actions) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Expand LLM actions inline (simplified — real impl would maintain conversation state)
|
||||
expanded := make([]decision.Action, 0, len(actions))
|
||||
for _, act := range actions {
|
||||
if act.Kind == decision.ActionKindLLM {
|
||||
reply, err := a.runLLM(ctx, msgCtx)
|
||||
if err != nil {
|
||||
a.logger.Error("llm error", "err", err)
|
||||
expanded = append(expanded, decision.Action{
|
||||
Kind: decision.ActionKindReply,
|
||||
Reply: &decision.ReplyAction{Content: "Sorry, I encountered an error."},
|
||||
})
|
||||
} else {
|
||||
expanded = append(expanded, decision.Action{
|
||||
Kind: decision.ActionKindReply,
|
||||
Reply: &decision.ReplyAction{Content: reply},
|
||||
})
|
||||
}
|
||||
} else {
|
||||
expanded = append(expanded, act)
|
||||
}
|
||||
}
|
||||
|
||||
a.runner.Execute(ctx, evt.RoomID.String(), expanded)
|
||||
}
|
||||
|
||||
func (a *Agent) runLLM(ctx context.Context, msgCtx decision.MessageContext) (string, error) {
|
||||
req := coretypes.CompletionRequest{
|
||||
Model: a.cfg.LLM.Primary.Model,
|
||||
MaxTokens: a.cfg.LLM.Primary.MaxTokens,
|
||||
Temperature: a.cfg.LLM.Primary.Temperature,
|
||||
SystemPrompt: a.cfg.Agent.Description,
|
||||
Messages: []coretypes.Message{
|
||||
{Role: coretypes.RoleUser, Content: msgCtx.Content},
|
||||
},
|
||||
}
|
||||
resp, err := a.llm(ctx, req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return resp.Content, nil
|
||||
}
|
||||
Reference in New Issue
Block a user