Files
agents_and_robots/agents/devops/config.yaml
T

336 lines
7.8 KiB
YAML

# ============================================
# IDENTIDAD
# ============================================
agent:
id: devops-bot
name: "DevOps Agent"
version: "1.0.0"
enabled: true
description: "Gestiona deployments, monitoreo y salud de infraestructura"
tags: [devops, infrastructure, deployment]
# ============================================
# PERSONALIDAD Y COMPORTAMIENTO
# ============================================
personality:
tone: direct # direct | friendly | formal | casual | technical
verbosity: concise # minimal | concise | detailed | verbose
language: es
languages_supported: [es, en]
emoji_style: moderate # none | minimal | moderate | heavy
prefix: "🔧"
error_style: helpful # terse | helpful | detailed
templates:
greeting: "Listo para operar. ¿Qué necesitas?"
unknown_command: "No reconozco eso. Usa `!help` para ver comandos."
permission_denied: "No tienes permisos para eso."
error: "Algo falló: {{.Error}}"
success: "Hecho. {{.Summary}}"
busy: "Estoy ejecutando otra tarea ahora. Espera o usa `!queue`."
behavior:
proactive: false
ask_confirmation: true
show_reasoning: false
thread_replies: true
typing_indicator: true
acknowledge_receipt: true
# ============================================
# LLM — CONEXIÓN Y RAZONAMIENTO
# ============================================
llm:
primary:
provider: anthropic
model: claude-sonnet-4-20250514
api_key_env: ANTHROPIC_API_KEY
base_url: ""
max_tokens: 4096
temperature: 0.3
fallback:
provider: ollama
model: llama3
base_url: "http://localhost:11434/v1"
max_tokens: 2048
temperature: 0.5
reasoning:
system_prompt_file: "prompts/devops-system.md"
context_window: 8192
memory_messages: 20
tool_use:
enabled: true
max_iterations: 5
parallel_calls: false
rate_limit:
requests_per_minute: 30
tokens_per_minute: 100000
concurrent_requests: 3
# ============================================
# TOOLS — CAPACIDADES DISPONIBLES
# ============================================
tools:
ssh:
enabled: true
allowed_targets: [production, staging, monitoring]
forbidden_commands:
- "rm -rf /"
- "dd if="
- "mkfs"
timeout: 30s
max_concurrent: 3
require_confirmation:
- production
http:
enabled: true
allowed_domains:
- "api.github.com"
- "api.gitea.internal"
- "grafana.internal"
timeout: 15s
max_retries: 2
scripts:
enabled: true
scripts_dir: "./scripts/"
allowed:
- "deploy.sh"
- "healthcheck.sh"
- "rollback.sh"
timeout: 120s
sandbox: false
file_ops:
enabled: false
allowed_paths: ["/var/log/", "/tmp/reports/"]
read_only: true
mcp:
enabled: true
servers:
- name: github
url: "stdio://mcp-github"
tools: ["create_issue", "list_prs", "merge_pr"]
- name: filesystem
url: "stdio://mcp-filesystem"
tools: ["read_file", "list_dir"]
expose:
port: 9100
tools: ["deploy", "status", "rollback"]
# ============================================
# MATRIX — CONEXIÓN Y ROOMS
# ============================================
matrix:
homeserver: "${MATRIX_HOMESERVER}"
user_id: "@devops-bot:${MATRIX_SERVER_NAME}"
access_token_env: MATRIX_TOKEN_DEVOPS
device_id: "DEVOPSBOT01"
encryption:
enabled: false # habilitar cuando E2EE esté configurado
store_path: "./agents/devops/data/crypto/"
pickle_key_env: PICKLE_KEY_DEVOPS_BOT
trust_mode: tofu
rooms:
listen:
- "${MATRIX_ROOM_DEVOPS}"
- "${MATRIX_ROOM_ALERTS}"
respond:
- "${MATRIX_ROOM_DEVOPS}"
- "${MATRIX_ROOM_LOGS}"
admin:
- "${MATRIX_ROOM_ADMIN}"
filters:
command_prefix: "!"
mention_respond: true
dm_respond: true
ignore_bots: true
ignore_users: []
min_power_level: 0
# ============================================
# COMUNICACIÓN INTER-AGENTES
# ============================================
agents:
peers:
- id: monitor-bot
capabilities: [alerts, metrics, healthcheck]
room: "${MATRIX_ROOM_AGENTS_INTERNAL}"
- id: assistant-bot
capabilities: [search, summarize, translate]
room: "${MATRIX_ROOM_AGENTS_INTERNAL}"
delegation:
enabled: true
can_delegate_to: [monitor-bot, assistant-bot]
can_receive_from: [assistant-bot]
max_delegation_depth: 2
timeout: 60s
protocol:
format: json
channel: matrix
heartbeat_interval: 30s
# ============================================
# SSH — INVENTARIO DE SERVIDORES
# ============================================
ssh:
defaults:
user: deploy
port: 22
key_file_env: SSH_PRIVATE_KEY_PATH
known_hosts: "./data/known_hosts"
keepalive_interval: 15s
timeout: 10s
targets:
production:
hosts: ["${PROD_HOST_1}", "${PROD_HOST_2}"]
user: deploy
jump_host: "${BASTION_HOST}"
staging:
hosts: ["${STAGING_HOST}"]
user: deploy
monitoring:
hosts: ["${MONITORING_HOST}"]
user: monitor
key_file_env: SSH_MONITOR_KEY_PATH
# ============================================
# PERMISOS Y SEGURIDAD
# ============================================
security:
roles:
admin:
users: ["@admin:${MATRIX_SERVER_NAME}"]
actions: ["*"]
developer:
users: ["@dev1:${MATRIX_SERVER_NAME}", "@dev2:${MATRIX_SERVER_NAME}"]
actions: ["deploy:staging", "status:*", "logs:*"]
viewer:
users: ["*"]
actions: ["status:*", "help"]
audit:
enabled: true
log_file: "./data/audit.log"
log_to_room: "${MATRIX_ROOM_AUDIT}"
include: [ssh, deploy, config_change]
secrets:
provider: env
# ============================================
# SCHEDULING Y TAREAS AUTOMÁTICAS
# ============================================
schedules:
- name: healthcheck
cron: "*/5 * * * *"
action:
kind: ssh
target: production
command: "/opt/scripts/healthcheck.sh"
on_failure:
notify_room: "${MATRIX_ROOM_ALERTS}"
escalate_to: "@admin:${MATRIX_SERVER_NAME}"
- name: daily-report
cron: "0 9 * * *"
action:
kind: script
script: "daily-report.sh"
output_room: "${MATRIX_ROOM_DEVOPS}"
- name: backup-check
cron: "0 */6 * * *"
action:
kind: ssh
target: production
command: "/opt/scripts/check-backups.sh"
on_failure:
notify_room: "${MATRIX_ROOM_ALERTS}"
escalate_to: "@admin:${MATRIX_SERVER_NAME}"
# ============================================
# OBSERVABILIDAD
# ============================================
observability:
logging:
level: info
format: json
output: stdout
file: "./data/agent.log"
metrics:
enabled: true
port: 9090
path: /metrics
export: prometheus
health:
enabled: true
port: 8080
path: /healthz
tracing:
enabled: false
provider: jaeger
endpoint: "http://jaeger:14268/api/traces"
# ============================================
# RESILIENCIA
# ============================================
resilience:
circuit_breaker:
failure_threshold: 5
timeout: 30s
half_open_max: 2
retry:
max_attempts: 3
backoff: exponential
initial_delay: 1s
max_delay: 30s
shutdown:
timeout: 15s
drain_messages: true
save_state: true
state_file: "./data/state.json"
queue:
enabled: true
max_size: 50
priority_users: ["@admin:${MATRIX_SERVER_NAME}"]
# ============================================
# ALMACENAMIENTO Y ESTADO
# ============================================
storage:
state:
backend: sqlite
path: "./data/agent.db"
cache:
enabled: true
backend: memory
ttl: 10m
max_entries: 500
history:
backend: sqlite
path: "./data/history.db"
retention: 720h # 30 days