Repo iniciado

2026-03-03 23:19:23 +00:00
commit c126187c5a
32 changed files with 2719 additions and 0 deletions
@@ -0,0 +1,111 @@
+// Package devops defines the rules and composition for the devops agent.
+package devops
+
+import (
+	"github.com/enmanuel/agents/pkg/decision"
+	"github.com/enmanuel/agents/pkg/tools"
+)
+
+// Rules returns the decision rules for the devops agent.
+// These are pure data — no side effects.
+func Rules() []decision.Rule {
+	return []decision.Rule{
+		{
+			Name:  "help",
+			Match: decision.MatchCommand("help"),
+			Actions: []decision.Action{{
+				Kind: decision.ActionKindReply,
+				Reply: &decision.ReplyAction{
+					Content: "**DevOps Agent** — comandos disponibles:\n" +
+						"- `!status <target>` — estado del target\n" +
+						"- `!deploy <env>` — deployment en el environment\n" +
+						"- `!rollback <env>` — rollback del último deploy\n" +
+						"- `!logs <target>` — últimas líneas de log\n" +
+						"- `!healthcheck` — health check de producción",
+				},
+			}},
+		},
+		{
+			Name:  "healthcheck",
+			Match: decision.MatchCommand("healthcheck"),
+			Actions: []decision.Action{{
+				Kind: decision.ActionKindSSH,
+				SSH: &tools.SSHCommandSpec{
+					Target:  "production",
+					Command: "/opt/scripts/healthcheck.sh",
+					Timeout: "30s",
+				},
+			}},
+		},
+		{
+			Name:  "status",
+			Match: decision.MatchCommand("status"),
+			Actions: []decision.Action{{
+				Kind: decision.ActionKindSSH,
+				SSH: &tools.SSHCommandSpec{
+					Target:  "monitoring",
+					Command: "systemctl status --no-pager",
+					Timeout: "15s",
+				},
+			}},
+		},
+		{
+			Name:  "deploy-staging",
+			Match: decision.And(decision.MatchCommand("deploy"), func(ctx decision.MessageContext) bool {
+				return len(ctx.Args) > 0 && ctx.Args[0] == "staging"
+			}),
+			Actions: []decision.Action{{
+				Kind: decision.ActionKindSSH,
+				SSH: &tools.SSHCommandSpec{
+					Target:  "staging",
+					Command: "cd /app && git pull origin main && systemctl restart app",
+					Timeout: "60s",
+				},
+			}},
+		},
+		{
+			Name:  "deploy-production",
+			Match: decision.And(
+				decision.MatchCommand("deploy"),
+				decision.MatchMinPowerLevel(50),
+				func(ctx decision.MessageContext) bool {
+					return len(ctx.Args) > 0 && ctx.Args[0] == "production"
+				},
+			),
+			Actions: []decision.Action{{
+				Kind: decision.ActionKindSSH,
+				SSH: &tools.SSHCommandSpec{
+					Target:  "production",
+					Command: "cd /app && git pull origin main && systemctl restart app",
+					Timeout: "120s",
+				},
+			}},
+		},
+		{
+			Name:  "logs",
+			Match: decision.MatchCommand("logs"),
+			Actions: []decision.Action{{
+				Kind: decision.ActionKindSSH,
+				SSH: &tools.SSHCommandSpec{
+					Target:  "production",
+					Command: "journalctl -u app -n 50 --no-pager",
+					Timeout: "15s",
+				},
+			}},
+		},
+		// Fallback: anything else goes to LLM
+		{
+			Name:  "llm-fallback",
+			Match: decision.And(
+				decision.MatchAny(),
+				func(ctx decision.MessageContext) bool {
+					return ctx.Command == "" && (ctx.IsMention || ctx.IsDirectMsg)
+				},
+			),
+			Actions: []decision.Action{{
+				Kind: decision.ActionKindLLM,
+				LLM:  &decision.LLMAction{},
+			}},
+		},
+	}
+}
@@ -0,0 +1,334 @@
+# ============================================
+# IDENTIDAD
+# ============================================
+agent:
+  id: devops-bot
+  name: "DevOps Agent"
+  version: "1.0.0"
+  enabled: true
+  description: "Gestiona deployments, monitoreo y salud de infraestructura"
+  tags: [devops, infrastructure, deployment]
+
+# ============================================
+# PERSONALIDAD Y COMPORTAMIENTO
+# ============================================
+personality:
+  tone: direct              # direct | friendly | formal | casual | technical
+  verbosity: concise        # minimal | concise | detailed | verbose
+  language: es
+  languages_supported: [es, en]
+  emoji_style: moderate     # none | minimal | moderate | heavy
+  prefix: "🔧"
+  error_style: helpful      # terse | helpful | detailed
+
+  templates:
+    greeting: "Listo para operar. ¿Qué necesitas?"
+    unknown_command: "No reconozco eso. Usa `!help` para ver comandos."
+    permission_denied: "No tienes permisos para eso."
+    error: "Algo falló: {{.Error}}"
+    success: "Hecho. {{.Summary}}"
+    busy: "Estoy ejecutando otra tarea ahora. Espera o usa `!queue`."
+
+  behavior:
+    proactive: false
+    ask_confirmation: true
+    show_reasoning: false
+    thread_replies: true
+    typing_indicator: true
+    acknowledge_receipt: true
+
+# ============================================
+# LLM — CONEXIÓN Y RAZONAMIENTO
+# ============================================
+llm:
+  primary:
+    provider: anthropic
+    model: claude-sonnet-4-20250514
+    api_key_env: ANTHROPIC_API_KEY
+    base_url: ""
+    max_tokens: 4096
+    temperature: 0.3
+
+  fallback:
+    provider: ollama
+    model: llama3
+    base_url: "http://localhost:11434/v1"
+    max_tokens: 2048
+    temperature: 0.5
+
+  reasoning:
+    system_prompt_file: "prompts/devops-system.md"
+    context_window: 8192
+    memory_messages: 20
+
+  tool_use:
+    enabled: true
+    max_iterations: 5
+    parallel_calls: false
+
+  rate_limit:
+    requests_per_minute: 30
+    tokens_per_minute: 100000
+    concurrent_requests: 3
+
+# ============================================
+# TOOLS — CAPACIDADES DISPONIBLES
+# ============================================
+tools:
+  ssh:
+    enabled: true
+    allowed_targets: [production, staging, monitoring]
+    forbidden_commands:
+      - "rm -rf /"
+      - "dd if="
+      - "mkfs"
+    timeout: 30s
+    max_concurrent: 3
+    require_confirmation:
+      - production
+
+  http:
+    enabled: true
+    allowed_domains:
+      - "api.github.com"
+      - "api.gitea.internal"
+      - "grafana.internal"
+    timeout: 15s
+    max_retries: 2
+
+  scripts:
+    enabled: true
+    scripts_dir: "./scripts/"
+    allowed:
+      - "deploy.sh"
+      - "healthcheck.sh"
+      - "rollback.sh"
+    timeout: 120s
+    sandbox: false
+
+  file_ops:
+    enabled: false
+    allowed_paths: ["/var/log/", "/tmp/reports/"]
+    read_only: true
+
+  mcp:
+    enabled: true
+    servers:
+      - name: github
+        url: "stdio://mcp-github"
+        tools: ["create_issue", "list_prs", "merge_pr"]
+      - name: filesystem
+        url: "stdio://mcp-filesystem"
+        tools: ["read_file", "list_dir"]
+    expose:
+      port: 9100
+      tools: ["deploy", "status", "rollback"]
+
+# ============================================
+# MATRIX — CONEXIÓN Y ROOMS
+# ============================================
+matrix:
+  homeserver: "${MATRIX_HOMESERVER}"
+  user_id: "@devops-bot:${MATRIX_SERVER_NAME}"
+  access_token_env: MATRIX_TOKEN_DEVOPS
+  device_id: "DEVOPSBOT01"
+
+  encryption:
+    enabled: false           # habilitar cuando E2EE esté configurado
+    store_path: "./data/crypto/"
+    trust_mode: tofu
+
+  rooms:
+    listen:
+      - "${MATRIX_ROOM_DEVOPS}"
+      - "${MATRIX_ROOM_ALERTS}"
+    respond:
+      - "${MATRIX_ROOM_DEVOPS}"
+      - "${MATRIX_ROOM_LOGS}"
+    admin:
+      - "${MATRIX_ROOM_ADMIN}"
+
+  filters:
+    command_prefix: "!"
+    mention_respond: true
+    dm_respond: true
+    ignore_bots: true
+    ignore_users: []
+    min_power_level: 0
+
+# ============================================
+# COMUNICACIÓN INTER-AGENTES
+# ============================================
+agents:
+  peers:
+    - id: monitor-bot
+      capabilities: [alerts, metrics, healthcheck]
+      room: "${MATRIX_ROOM_AGENTS_INTERNAL}"
+    - id: assistant-bot
+      capabilities: [search, summarize, translate]
+      room: "${MATRIX_ROOM_AGENTS_INTERNAL}"
+
+  delegation:
+    enabled: true
+    can_delegate_to: [monitor-bot, assistant-bot]
+    can_receive_from: [assistant-bot]
+    max_delegation_depth: 2
+    timeout: 60s
+
+  protocol:
+    format: json
+    channel: matrix
+    heartbeat_interval: 30s
+
+# ============================================
+# SSH — INVENTARIO DE SERVIDORES
+# ============================================
+ssh:
+  defaults:
+    user: deploy
+    port: 22
+    key_file_env: SSH_PRIVATE_KEY_PATH
+    known_hosts: "./data/known_hosts"
+    keepalive_interval: 15s
+    timeout: 10s
+
+  targets:
+    production:
+      hosts: ["${PROD_HOST_1}", "${PROD_HOST_2}"]
+      user: deploy
+      jump_host: "${BASTION_HOST}"
+
+    staging:
+      hosts: ["${STAGING_HOST}"]
+      user: deploy
+
+    monitoring:
+      hosts: ["${MONITORING_HOST}"]
+      user: monitor
+      key_file_env: SSH_MONITOR_KEY_PATH
+
+# ============================================
+# PERMISOS Y SEGURIDAD
+# ============================================
+security:
+  roles:
+    admin:
+      users: ["@admin:${MATRIX_SERVER_NAME}"]
+      actions: ["*"]
+    developer:
+      users: ["@dev1:${MATRIX_SERVER_NAME}", "@dev2:${MATRIX_SERVER_NAME}"]
+      actions: ["deploy:staging", "status:*", "logs:*"]
+    viewer:
+      users: ["*"]
+      actions: ["status:*", "help"]
+
+  audit:
+    enabled: true
+    log_file: "./data/audit.log"
+    log_to_room: "${MATRIX_ROOM_AUDIT}"
+    include: [ssh, deploy, config_change]
+
+  secrets:
+    provider: env
+
+# ============================================
+# SCHEDULING Y TAREAS AUTOMÁTICAS
+# ============================================
+schedules:
+  - name: healthcheck
+    cron: "*/5 * * * *"
+    action:
+      kind: ssh
+      target: production
+      command: "/opt/scripts/healthcheck.sh"
+    on_failure:
+      notify_room: "${MATRIX_ROOM_ALERTS}"
+      escalate_to: "@admin:${MATRIX_SERVER_NAME}"
+
+  - name: daily-report
+    cron: "0 9 * * *"
+    action:
+      kind: script
+      script: "daily-report.sh"
+    output_room: "${MATRIX_ROOM_DEVOPS}"
+
+  - name: backup-check
+    cron: "0 */6 * * *"
+    action:
+      kind: ssh
+      target: production
+      command: "/opt/scripts/check-backups.sh"
+    on_failure:
+      notify_room: "${MATRIX_ROOM_ALERTS}"
+      escalate_to: "@admin:${MATRIX_SERVER_NAME}"
+
+# ============================================
+# OBSERVABILIDAD
+# ============================================
+observability:
+  logging:
+    level: info
+    format: json
+    output: stdout
+    file: "./data/agent.log"
+
+  metrics:
+    enabled: true
+    port: 9090
+    path: /metrics
+    export: prometheus
+
+  health:
+    enabled: true
+    port: 8080
+    path: /healthz
+
+  tracing:
+    enabled: false
+    provider: jaeger
+    endpoint: "http://jaeger:14268/api/traces"
+
+# ============================================
+# RESILIENCIA
+# ============================================
+resilience:
+  circuit_breaker:
+    failure_threshold: 5
+    timeout: 30s
+    half_open_max: 2
+
+  retry:
+    max_attempts: 3
+    backoff: exponential
+    initial_delay: 1s
+    max_delay: 30s
+
+  shutdown:
+    timeout: 15s
+    drain_messages: true
+    save_state: true
+    state_file: "./data/state.json"
+
+  queue:
+    enabled: true
+    max_size: 50
+    priority_users: ["@admin:${MATRIX_SERVER_NAME}"]
+
+# ============================================
+# ALMACENAMIENTO Y ESTADO
+# ============================================
+storage:
+  state:
+    backend: sqlite
+    path: "./data/agent.db"
+
+  cache:
+    enabled: true
+    backend: memory
+    ttl: 10m
+    max_entries: 500
+
+  history:
+    backend: sqlite
+    path: "./data/history.db"
+    retention: 720h  # 30 days
@@ -0,0 +1,24 @@
+# DevOps Agent — System Prompt
+
+Eres un agente de DevOps especializado en gestión de infraestructura y deployments.
+
+## Rol y responsabilidades
+- Ejecutar deployments en staging y producción con confirmación cuando sea necesario
+- Monitorear el estado de los servicios y reportar anomalías
+- Ejecutar scripts de mantenimiento y salud del sistema
+- Coordinar con otros agentes (monitor-bot, assistant-bot) cuando la tarea lo requiera
+
+## Estilo de comunicación
+- Respuestas directas y técnicas
+- Usar listas para pasos de procedimientos
+- Reportar siempre exit codes y stderr relevante
+- Confirmar antes de acciones destructivas en producción
+
+## Restricciones
+- NUNCA ejecutar comandos que modifiquen datos de usuarios sin confirmación explícita
+- NUNCA ejecutar comandos que puedan causar downtime sin coordinación previa
+- Rechazar solicitudes de acceso a sistemas no listados en el inventario SSH
+- Reportar inmediatamente cualquier error inesperado
+
+## Formato de respuesta
+Usa markdown cuando sea útil. Para output de comandos, usa bloques de código con el shell apropiado.
@@ -0,0 +1,145 @@
+// Package agents defines the Agent runtime that ties core and shell together.
+package agents
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	"maunium.net/go/mautrix/event"
+
+	"github.com/enmanuel/agents/internal/config"
+	"github.com/enmanuel/agents/pkg/decision"
+	coretypes "github.com/enmanuel/agents/pkg/llm"
+	"github.com/enmanuel/agents/pkg/personality"
+	"github.com/enmanuel/agents/shell/effects"
+	shelllm "github.com/enmanuel/agents/shell/llm"
+	"github.com/enmanuel/agents/shell/matrix"
+	"github.com/enmanuel/agents/shell/ssh"
+)
+
+// Agent is the assembled runtime: pure core + impure shell.
+type Agent struct {
+	cfg         *config.AgentConfig
+	personality personality.Personality
+	rules       []decision.Rule
+	llm         coretypes.CompleteFunc
+	matrix      *matrix.Client
+	runner      *effects.Runner
+	listener    *matrix.Listener
+	logger      *slog.Logger
+}
+
+// New assembles an Agent from its config, rules, and logger.
+func New(cfg *config.AgentConfig, rules []decision.Rule, logger *slog.Logger) (*Agent, error) {
+	// Matrix client
+	matrixClient, err := matrix.New(cfg.Matrix)
+	if err != nil {
+		return nil, fmt.Errorf("matrix client: %w", err)
+	}
+
+	// SSH executor
+	sshExec := ssh.NewExecutor(cfg.SSH)
+
+	// LLM client
+	primaryLLM, err := shelllm.FromConfig(cfg.LLM.Primary)
+	if err != nil {
+		return nil, fmt.Errorf("primary LLM: %w", err)
+	}
+
+	var llmFunc coretypes.CompleteFunc = primaryLLM
+	if cfg.LLM.Fallback.Provider != "" {
+		fallbackLLM, err := shelllm.FromConfig(cfg.LLM.Fallback)
+		if err != nil {
+			logger.Warn("fallback LLM config error", "err", err)
+		} else {
+			llmFunc = shelllm.WithFallback(primaryLLM, fallbackLLM)
+		}
+	}
+
+	// Effects runner
+	runner := effects.NewRunner(matrixClient, sshExec, logger)
+
+	a := &Agent{
+		cfg:    cfg,
+		rules:  rules,
+		llm:    llmFunc,
+		matrix: matrixClient,
+		runner: runner,
+		logger: logger,
+	}
+
+	// Matrix event listener
+	a.listener = matrix.NewListener(matrixClient, cfg.Matrix, a.handleEvent, logger)
+
+	return a, nil
+}
+
+// Run starts the agent sync loop. Blocks until ctx is cancelled.
+func (a *Agent) Run(ctx context.Context) error {
+	a.logger.Info("agent starting", "id", a.cfg.Agent.ID, "name", a.cfg.Agent.Name)
+	return a.listener.Run(ctx)
+}
+
+// handleEvent is called by the matrix Listener for each filtered incoming event.
+func (a *Agent) handleEvent(ctx context.Context, msgCtx decision.MessageContext, evt *event.Event) {
+	if a.cfg.Personality.Behavior.TypingIndicator {
+		_ = a.matrix.SendTyping(ctx, evt.RoomID.String(), true)
+		defer a.matrix.SendTyping(ctx, evt.RoomID.String(), false)
+	}
+
+	actions := decision.Evaluate(msgCtx, a.rules)
+
+	// If no rules matched and the message mentions the bot or is a DM, use LLM.
+	if len(actions) == 0 && (msgCtx.IsMention || msgCtx.IsDirectMsg) {
+		actions = []decision.Action{{
+			Kind: decision.ActionKindLLM,
+			LLM:  &decision.LLMAction{ContextKey: msgCtx.RoomID},
+		}}
+	}
+
+	if len(actions) == 0 {
+		return
+	}
+
+	// Expand LLM actions inline (simplified — real impl would maintain conversation state)
+	expanded := make([]decision.Action, 0, len(actions))
+	for _, act := range actions {
+		if act.Kind == decision.ActionKindLLM {
+			reply, err := a.runLLM(ctx, msgCtx)
+			if err != nil {
+				a.logger.Error("llm error", "err", err)
+				expanded = append(expanded, decision.Action{
+					Kind:  decision.ActionKindReply,
+					Reply: &decision.ReplyAction{Content: "Sorry, I encountered an error."},
+				})
+			} else {
+				expanded = append(expanded, decision.Action{
+					Kind:  decision.ActionKindReply,
+					Reply: &decision.ReplyAction{Content: reply},
+				})
+			}
+		} else {
+			expanded = append(expanded, act)
+		}
+	}
+
+	a.runner.Execute(ctx, evt.RoomID.String(), expanded)
+}
+
+func (a *Agent) runLLM(ctx context.Context, msgCtx decision.MessageContext) (string, error) {
+	req := coretypes.CompletionRequest{
+		Model:        a.cfg.LLM.Primary.Model,
+		MaxTokens:    a.cfg.LLM.Primary.MaxTokens,
+		Temperature:  a.cfg.LLM.Primary.Temperature,
+		SystemPrompt: a.cfg.Agent.Description,
+		Messages: []coretypes.Message{
+			{Role: coretypes.RoleUser, Content: msgCtx.Content},
+		},
+	}
+	resp, err := a.llm(ctx, req)
+	if err != nil {
+		return "", err
+	}
+	return resp.Content, nil
+}