feat: implement server-wide management actions and enhance TUI dashboard

This commit is contained in:
2026-03-04 20:51:02 +00:00
parent 150f9d2990
commit ddec55871b
13 changed files with 621 additions and 52 deletions
+37
View File
@@ -4,6 +4,43 @@ Plataforma en Go para gestionar bots Matrix autónomos. Cada bot combina un **co
--- ---
## Inicio rápido
```bash
# 1. Compilar todo
./build.sh
# 2. Cargar variables de entorno
source .env
# 3. Lanzar la TUI interactiva (dashboard)
./bin/dashboard
```
### Dashboard TUI
El dashboard es una interfaz de terminal interactiva (bubbletea) para gestionar los bots del servidor:
```
./bin/dashboard
```
Desde la TUI puedes:
- **Agents** — ver estado de cada agente, iniciar/detener/reiniciar/kill individual, ver logs
- **Server** — operaciones masivas: start all, stop all, restart all, kill all con resumen de estado
### Otros binarios
| Binario | Uso |
|---------|-----|
| `./bin/launcher` | Inicia uno o varios agentes como procesos |
| `./bin/agentctl` | CLI: `list`, `start`, `stop`, `remove` |
| `./bin/register` | Registra bots en Synapse via admin API |
| `./bin/dashboard` | TUI interactiva para gestión de bots |
---
## Principio de diseño ## Principio de diseño
El proyecto usa el patrón **pure core / impure shell**: El proyecto usa el patrón **pure core / impure shell**:
Executable
+30
View File
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -euo pipefail
export PATH="/usr/local/go/bin:$PATH"
BIN="bin"
TAGS="-tags goolm"
LDFLAGS="-ldflags=-s -w"
mkdir -p "$BIN"
echo "==> Compilando todos los binarios en $BIN/ ..."
targets=(
"launcher:./cmd/launcher"
"agentctl:./cmd/agentctl"
"register:./cmd/register"
"dashboard:./cmd/dashboard"
)
for entry in "${targets[@]}"; do
name="${entry%%:*}"
pkg="${entry##*:}"
echo " $name"
go build $TAGS "$LDFLAGS" -o "$BIN/$name" "$pkg"
done
echo ""
echo "==> Listo. Binarios disponibles:"
ls -lh "$BIN"/
+57 -2
View File
@@ -50,9 +50,64 @@ read_pid() {
[[ -f "$f" ]] && cat "$f" || echo 0 [[ -f "$f" ]] && cat "$f" || echo 0
} }
# Map agent ID to its config path by scanning agent directories.
config_path_for() {
local target_id="$1"
for cfg in agents/*/config.yaml; do
[[ -f "$cfg" ]] || continue
local id
id=$(grep -m1 '^ id:' "$cfg" | awk '{print $2}')
if [[ "$id" == "$target_id" ]]; then
echo "$cfg"
return
fi
done
}
# Find all PIDs of launcher processes for a given agent ID.
# Searches for the actual config path in the process command line.
# Returns newline-separated PIDs (may be empty).
find_agent_pids() {
local id="$1"
local cfg; cfg="$(config_path_for "$id")"
if [[ -z "$cfg" ]]; then
return
fi
pgrep -f "launcher.*-c.*${cfg}" 2>/dev/null || true
}
is_running() { is_running() {
local pid; pid="$(read_pid "$1")" local id="$1"
[[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null
# First check PID file
local pid; pid="$(read_pid "$id")"
if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then
return 0
fi
# PID file is stale or missing — search for actual processes
local pids; pids="$(find_agent_pids "$id")"
if [[ -n "$pids" ]]; then
# Update PID file with the first found process
local first_pid; first_pid="$(echo "$pids" | head -1)"
echo "$first_pid" > "$(pid_file "$id")"
return 0
fi
# Truly not running — clean up stale PID file
[[ "$pid" -gt 0 ]] && rm -f "$(pid_file "$id")"
return 1
}
# Count how many instances of an agent are running.
count_instances() {
local id="$1"
local pids; pids="$(find_agent_pids "$id")"
if [[ -z "$pids" ]]; then
echo 0
else
echo "$pids" | wc -l
fi
} }
agent_status() { agent_status() {
+7
View File
@@ -26,6 +26,7 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
fi fi
pid="$(read_pid "$id")" pid="$(read_pid "$id")"
instance_count="$(count_instances "$id")"
((found++)) || true ((found++)) || true
# Uptime: calcular desde el inicio del proceso # Uptime: calcular desde el inicio del proceso
@@ -78,6 +79,12 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
printf "%-22s ${GRN}%-8s${RST} %-12s %-10s %-8s %s\n" \ printf "%-22s ${GRN}%-8s${RST} %-12s %-10s %-8s %s\n" \
"$id" "$pid" "$uptime" "$mem" "${cpu_pct}%" "$log_size" "$id" "$pid" "$uptime" "$mem" "${cpu_pct}%" "$log_size"
# Warn about duplicate instances
if [[ "$instance_count" -gt 1 ]]; then
printf " ${RED}⚠ WARNING: %d instances running!${RST} PIDs: %s\n" \
"$instance_count" "$(find_agent_pids "$id" | tr '\n' ' ')"
fi
done < <(list_agents_raw) done < <(list_agents_raw)
if [[ "$found" -eq 0 ]]; then if [[ "$found" -eq 0 ]]; then
+7 -4
View File
@@ -55,11 +55,14 @@ case "$CMD" in
killed=0 killed=0
for id in "${agents[@]}"; do for id in "${agents[@]}"; do
pid="$(read_pid "$id")" all_pids="$(find_agent_pids "$id")"
if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then if [[ -n "$all_pids" ]]; then
kill -9 "$pid" 2>/dev/null || true cnt="$(echo "$all_pids" | wc -l)"
for p in $all_pids; do
kill -9 "$p" 2>/dev/null || true
done
rm -f "$(pid_file "$id")" rm -f "$(pid_file "$id")"
ok "$id killed (PID $pid)" ok "$id killed ($cnt instance(s), PIDs: $(echo $all_pids | tr '\n' ' '))"
((killed++)) || true ((killed++)) || true
else else
dim " $id (no estaba corriendo)" dim " $id (no estaba corriendo)"
+21 -2
View File
@@ -14,11 +14,30 @@ start_agent() {
local id="$1" cfg="$2" local id="$1" cfg="$2"
local log; log="$(log_file "$id")" local log; log="$(log_file "$id")"
local pid_f; pid_f="$(pid_file "$id")" local pid_f; pid_f="$(pid_file "$id")"
local bin="$REPO_ROOT/bin/launcher"
# Check for duplicate instances already running
local existing; existing="$(count_instances "$id")"
if [[ "$existing" -gt 0 ]]; then
warn "$id already has $existing instance(s) running (orphan processes?)"
warn " Run ./dev-scripts/stop.sh $id first to clean up"
return 1
fi
info "Iniciando $id..." info "Iniciando $id..."
# Lanza el launcher en background, desacoplado del terminal # Build the binary first to avoid go run wrapper PID issues
nohup "$GO" run -tags goolm ./cmd/launcher -c "$cfg" --log-level "${LOG_LEVEL:-info}" \ if [[ ! -x "$bin" ]] || [[ "$(find ./cmd/launcher -newer "$bin" 2>/dev/null | head -1)" ]]; then
info "Compilando launcher..."
mkdir -p "$(dirname "$bin")"
"$GO" build -tags goolm -o "$bin" ./cmd/launcher || {
fail "$id error de compilación — revisa el código"
return 1
}
fi
# Launch the compiled binary directly (no go run wrapper)
nohup "$bin" -c "$cfg" --log-level "${LOG_LEVEL:-info}" \
>> "$log" 2>&1 & >> "$log" 2>&1 &
local pid=$! local pid=$!
+22 -8
View File
@@ -18,23 +18,37 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
continue continue
fi fi
local_pid="$(read_pid "$id")" # Kill ALL instances, not just the one in the PID file
kill -TERM "$local_pid" 2>/dev/null || true all_pids="$(find_agent_pids "$id")"
instance_count="$(echo "$all_pids" | grep -c . 2>/dev/null || echo 0)"
# Espera hasta 5s a que muera limpiamente if [[ "$instance_count" -gt 1 ]]; then
warn "$id has $instance_count instances running — stopping all"
fi
# Send SIGTERM to all instances
for p in $all_pids; do
kill -TERM "$p" 2>/dev/null || true
done
# Wait up to 5s for graceful shutdown
for _ in {1..10}; do for _ in {1..10}; do
kill -0 "$local_pid" 2>/dev/null || break remaining="$(find_agent_pids "$id")"
[[ -z "$remaining" ]] && break
sleep 0.5 sleep 0.5
done done
# SIGKILL si todavía sigue vivo # SIGKILL any survivors
if kill -0 "$local_pid" 2>/dev/null; then survivors="$(find_agent_pids "$id")"
if [[ -n "$survivors" ]]; then
warn "$id no respondió a SIGTERM, enviando SIGKILL..." warn "$id no respondió a SIGTERM, enviando SIGKILL..."
kill -9 "$local_pid" 2>/dev/null || true for p in $survivors; do
kill -9 "$p" 2>/dev/null || true
done
fi fi
rm -f "$(pid_file "$id")" rm -f "$(pid_file "$id")"
ok "$id detenido (PID $local_pid)" ok "$id detenido ($instance_count instance(s) stopped)"
((stopped++)) || true ((stopped++)) || true
done < <(list_agents_raw) done < <(list_agents_raw)
+8
View File
@@ -16,5 +16,13 @@ type MsgActionDone struct {
// MsgLogsLoaded carries log lines for the selected agent. // MsgLogsLoaded carries log lines for the selected agent.
type MsgLogsLoaded struct{ Lines []string } type MsgLogsLoaded struct{ Lines []string }
// MsgServerActionDone reports the result of a server-wide bulk action.
type MsgServerActionDone struct {
Action string
Total int
Failed int
Errors []string
}
// MsgTick triggers a periodic refresh. // MsgTick triggers a periodic refresh.
type MsgTick struct{} type MsgTick struct{}
+17 -4
View File
@@ -10,6 +10,7 @@ const (
ScreenAgentList // list all agents with status ScreenAgentList // list all agents with status
ScreenAgentActions // actions for a selected agent ScreenAgentActions // actions for a selected agent
ScreenLogs // tail log output ScreenLogs // tail log output
ScreenServer // server-wide process management
) )
// Model is the complete TUI state — pure data. // Model is the complete TUI state — pure data.
@@ -34,10 +35,11 @@ type AgentView struct {
Enabled bool Enabled bool
Running bool Running bool
PID int PID int
Uptime string // formatted: "2h 15m" Instances int // number of running instances (>1 means duplicates)
Memory string // formatted: "42 MB" Uptime string // formatted: "2h 15m"
CPU string // formatted: "1.2%" Memory string // formatted: "42 MB"
LogSize string // formatted: "350 KB" CPU string // formatted: "1.2%"
LogSize string // formatted: "350 KB"
} }
// MenuOption represents a selectable menu item. // MenuOption represents a selectable menu item.
@@ -50,10 +52,21 @@ type MenuOption struct {
func MainMenuOptions() []MenuOption { func MainMenuOptions() []MenuOption {
return []MenuOption{ return []MenuOption{
{Label: "Agents", Desc: "Gestionar agentes"}, {Label: "Agents", Desc: "Gestionar agentes"},
{Label: "Server", Desc: "Gestionar servidor"},
{Label: "Quit", Desc: "Salir"}, {Label: "Quit", Desc: "Salir"},
} }
} }
// ServerMenuOptions returns the available server-wide actions.
func ServerMenuOptions() []MenuOption {
return []MenuOption{
{Label: "Start All", Desc: "Iniciar todos los agentes habilitados"},
{Label: "Stop All", Desc: "Detener todos los agentes"},
{Label: "Restart All", Desc: "Reiniciar todos los agentes"},
{Label: "Kill All", Desc: "SIGKILL forzado a todos"},
}
}
// AgentActionOptions returns the available actions based on agent state. // AgentActionOptions returns the available actions based on agent state.
func AgentActionOptions(running bool) []MenuOption { func AgentActionOptions(running bool) []MenuOption {
if running { if running {
+64 -3
View File
@@ -14,6 +14,12 @@ const (
IntentLoadLogs IntentKind = "load_logs" IntentLoadLogs IntentKind = "load_logs"
IntentTick IntentKind = "tick" IntentTick IntentKind = "tick"
IntentQuit IntentKind = "quit" IntentQuit IntentKind = "quit"
// Server-wide bulk operations
IntentStartAll IntentKind = "start_all"
IntentStopAll IntentKind = "stop_all"
IntentRestartAll IntentKind = "restart_all"
IntentKillAll IntentKind = "kill_all"
) )
// Intent is pure data describing a side effect to execute. // Intent is pure data describing a side effect to execute.
@@ -45,9 +51,11 @@ func Update(model Model, msg interface{}) (Model, []Intent) {
case MsgAgentsLoaded: case MsgAgentsLoaded:
model.Agents = m.Agents model.Agents = m.Agents
// Clamp cursor // Clamp cursor only on screens that use the agent list
if model.Cursor >= len(model.Agents) && len(model.Agents) > 0 { if model.Screen == ScreenAgentList {
model.Cursor = len(model.Agents) - 1 if model.Cursor >= len(model.Agents) && len(model.Agents) > 0 {
model.Cursor = len(model.Agents) - 1
}
} }
return model, []Intent{{Kind: IntentTick}} return model, []Intent{{Kind: IntentTick}}
@@ -59,6 +67,14 @@ func Update(model Model, msg interface{}) (Model, []Intent) {
} }
return model, []Intent{{Kind: IntentLoadAgents}} return model, []Intent{{Kind: IntentLoadAgents}}
case MsgServerActionDone:
if m.Failed == 0 {
model.StatusMsg = fmt.Sprintf("%s: %d agents OK", m.Action, m.Total)
} else {
model.StatusMsg = fmt.Sprintf("%s: %d/%d failed", m.Action, m.Failed, m.Total)
}
return model, []Intent{{Kind: IntentLoadAgents}}
case MsgLogsLoaded: case MsgLogsLoaded:
model.LogLines = m.Lines model.LogLines = m.Lines
model.LogScroll = max(0, len(m.Lines)-visibleLogLines(model)) model.LogScroll = max(0, len(m.Lines)-visibleLogLines(model))
@@ -92,6 +108,8 @@ func updateKey(model Model, key KeyMsg) (Model, []Intent) {
return updateAgentActions(model, key) return updateAgentActions(model, key)
case ScreenLogs: case ScreenLogs:
return updateLogs(model, key) return updateLogs(model, key)
case ScreenServer:
return updateServerScreen(model, key)
} }
return model, nil return model, nil
} }
@@ -109,6 +127,11 @@ func updateMainScreen(model Model, key KeyMsg) (Model, []Intent) {
model.Screen = ScreenAgentList model.Screen = ScreenAgentList
model.Cursor = 0 model.Cursor = 0
return model, []Intent{{Kind: IntentLoadAgents}} return model, []Intent{{Kind: IntentLoadAgents}}
case "Server":
model.Screen = ScreenServer
model.Cursor = 0
model.StatusMsg = ""
return model, []Intent{{Kind: IntentLoadAgents}}
case "Quit": case "Quit":
return model, []Intent{{Kind: IntentQuit}} return model, []Intent{{Kind: IntentQuit}}
} }
@@ -210,6 +233,44 @@ func updateLogs(model Model, key KeyMsg) (Model, []Intent) {
return model, nil return model, nil
} }
func updateServerScreen(model Model, key KeyMsg) (Model, []Intent) {
opts := ServerMenuOptions()
switch key.Str {
case "0":
model.Screen = ScreenMain
model.Cursor = 0
model.StatusMsg = ""
case "up", "k":
model.Cursor = clamp(model.Cursor-1, 0, len(opts)-1)
case "down", "j":
model.Cursor = clamp(model.Cursor+1, 0, len(opts)-1)
case "enter":
if model.Cursor < len(opts) {
return executeServerAction(model, opts[model.Cursor].Label)
}
}
return model, nil
}
func executeServerAction(model Model, action string) (Model, []Intent) {
switch action {
case "Start All":
model.StatusMsg = "Starting all agents..."
return model, []Intent{{Kind: IntentStartAll}}
case "Stop All":
model.StatusMsg = "Stopping all agents..."
return model, []Intent{{Kind: IntentStopAll}}
case "Restart All":
model.StatusMsg = "Restarting all agents..."
return model, []Intent{{Kind: IntentRestartAll}}
case "Kill All":
model.StatusMsg = "Killing all agents..."
return model, []Intent{{Kind: IntentKillAll}}
}
return model, nil
}
// ── pure helpers ───────────────────────────────────────────────────────── // ── pure helpers ─────────────────────────────────────────────────────────
func visibleLogLines(m Model) int { func visibleLogLines(m Model) int {
+54
View File
@@ -16,6 +16,8 @@ func View(model Model) string {
return viewAgentActions(model) return viewAgentActions(model)
case ScreenLogs: case ScreenLogs:
return viewLogs(model) return viewLogs(model)
case ScreenServer:
return viewServer(model)
default: default:
return "" return ""
} }
@@ -78,6 +80,10 @@ func viewAgentList(m Model) string {
b.WriteString(fmt.Sprintf(" %s%s %-20s %-8s %s\n", b.WriteString(fmt.Sprintf(" %s%s %-20s %-8s %s\n",
cursor, icon, a.ID, a.Version, status)) cursor, icon, a.ID, a.Version, status))
if a.Instances > 1 {
b.WriteString(fmt.Sprintf(" ⚠ WARNING: %d instances running!\n", a.Instances))
}
} }
if m.StatusMsg != "" { if m.StatusMsg != "" {
@@ -177,6 +183,54 @@ func viewLogs(m Model) string {
return b.String() return b.String()
} }
func viewServer(m Model) string {
var b strings.Builder
b.WriteString("\n Server Management\n")
b.WriteString(" " + strings.Repeat("─", 44) + "\n")
// Summary
running, stopped, disabled := countStatuses(m.Agents)
total := len(m.Agents)
if total > 0 {
b.WriteString(fmt.Sprintf(" %d agents: %d running, %d stopped, %d disabled\n", total, running, stopped, disabled))
} else {
b.WriteString(" Loading...\n")
}
// Agent status list (compact)
if total > 0 {
b.WriteString("\n")
for _, a := range m.Agents {
icon := "○"
if !a.Enabled {
icon = " "
} else if a.Running {
icon = "●"
}
b.WriteString(fmt.Sprintf(" %s %s\n", icon, a.ID))
}
}
b.WriteString("\n")
// Action menu
for i, opt := range ServerMenuOptions() {
cursor := " "
if i == m.Cursor {
cursor = "> "
}
b.WriteString(fmt.Sprintf(" %s%-16s %s\n", cursor, opt.Label, opt.Desc))
}
if m.StatusMsg != "" {
b.WriteString("\n " + m.StatusMsg + "\n")
}
b.WriteString("\n ↑↓ navegar enter ejecutar 0 volver\n")
return b.String()
}
func countStatuses(agents []AgentView) (running, stopped, disabled int) { func countStatuses(agents []AgentView) (running, stopped, disabled int) {
for _, a := range agents { for _, a := range agents {
switch { switch {
+170 -22
View File
@@ -47,11 +47,12 @@ type Manager struct {
runDir string runDir string
agentsGlob string agentsGlob string
binPath string binPath string
envFile string // path to .env file for child processes
} }
// NewManager creates a Manager. binPath can be empty for auto-detection. // NewManager creates a Manager. binPath can be empty for auto-detection.
func NewManager(runDir, agentsGlob, binPath string) *Manager { func NewManager(runDir, agentsGlob, binPath string) *Manager {
return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath} return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath, envFile: ".env"}
} }
// Scan discovers all agents from config files. // Scan discovers all agents from config files.
@@ -81,8 +82,8 @@ func (m *Manager) Scan() ([]AgentInfo, error) {
// Status returns the runtime status for a single agent. // Status returns the runtime status for a single agent.
func (m *Manager) Status(info AgentInfo) AgentStatus { func (m *Manager) Status(info AgentInfo) AgentStatus {
pid := m.readPID(info.ID) pid := m.resolveRunningPID(info.ID)
running := pid > 0 && m.isAlive(pid) running := pid > 0
return AgentStatus{AgentInfo: info, Running: running, PID: pid} return AgentStatus{AgentInfo: info, Running: running, PID: pid}
} }
@@ -101,6 +102,12 @@ func (m *Manager) StatusAll() ([]AgentStatus, error) {
// Start launches an agent process in the background. // Start launches an agent process in the background.
func (m *Manager) Start(info AgentInfo) error { func (m *Manager) Start(info AgentInfo) error {
// Check for orphan instances
if existing := m.findProcessPIDs(info.ID); len(existing) > 0 {
return fmt.Errorf("agent %q already has %d running instance(s) (PIDs: %v) — stop them first",
info.ID, len(existing), existing)
}
if err := os.MkdirAll(m.runDir, 0o755); err != nil { if err := os.MkdirAll(m.runDir, 0o755); err != nil {
return fmt.Errorf("create run dir: %w", err) return fmt.Errorf("create run dir: %w", err)
} }
@@ -113,11 +120,12 @@ func (m *Manager) Start(info AgentInfo) error {
bin := m.resolvedBin() bin := m.resolvedBin()
var cmd *exec.Cmd var cmd *exec.Cmd
if strings.HasPrefix(bin, "go run") { if strings.HasPrefix(bin, "go run") {
cmd = exec.Command("go", "run", "./cmd/launcher", "-c", info.ConfigPath) cmd = exec.Command("go", "run", "-tags", "goolm", "./cmd/launcher", "-c", info.ConfigPath)
} else { } else {
cmd = exec.Command(bin, "-c", info.ConfigPath) cmd = exec.Command(bin, "-c", info.ConfigPath)
} }
cmd.Env = m.buildEnv()
cmd.Stdout = logFile cmd.Stdout = logFile
cmd.Stderr = logFile cmd.Stderr = logFile
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
@@ -135,49 +143,94 @@ func (m *Manager) Start(info AgentInfo) error {
return nil return nil
} }
// Stop sends SIGTERM, waits up to 5s, then SIGKILL if needed. // Stop sends SIGTERM to all instances, waits up to 5s, then SIGKILL if needed.
func (m *Manager) Stop(id string) error { func (m *Manager) Stop(id string) error {
pid := m.readPID(id) pids := m.findProcessPIDs(id)
if pid == 0 || !m.isAlive(pid) { // Also include PID file PID if alive and not already in the list
filePID := m.readPID(id)
if filePID > 0 && m.isAlive(filePID) {
found := false
for _, p := range pids {
if p == filePID {
found = true
break
}
}
if !found {
pids = append(pids, filePID)
}
}
if len(pids) == 0 {
return fmt.Errorf("agent %q is not running", id) return fmt.Errorf("agent %q is not running", id)
} }
if err := syscall.Kill(pid, syscall.SIGTERM); err != nil { // SIGTERM all instances
return fmt.Errorf("SIGTERM: %w", err) for _, pid := range pids {
_ = syscall.Kill(pid, syscall.SIGTERM)
} }
// Wait up to 5 seconds for graceful shutdown. // Wait up to 5 seconds for graceful shutdown.
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
if !m.isAlive(pid) { allDead := true
for _, pid := range pids {
if m.isAlive(pid) {
allDead = false
break
}
}
if allDead {
m.removePID(id) m.removePID(id)
return nil return nil
} }
time.Sleep(500 * time.Millisecond) time.Sleep(500 * time.Millisecond)
} }
// Force kill. // Force kill survivors.
if m.isAlive(pid) { for _, pid := range pids {
_ = syscall.Kill(pid, syscall.SIGKILL) if m.isAlive(pid) {
_ = syscall.Kill(pid, syscall.SIGKILL)
}
} }
m.removePID(id) m.removePID(id)
return nil return nil
} }
// Kill sends SIGKILL immediately. // Kill sends SIGKILL to all instances immediately.
func (m *Manager) Kill(id string) error { func (m *Manager) Kill(id string) error {
pid := m.readPID(id) pids := m.findProcessPIDs(id)
if pid == 0 || !m.isAlive(pid) { filePID := m.readPID(id)
if filePID > 0 && m.isAlive(filePID) {
found := false
for _, p := range pids {
if p == filePID {
found = true
break
}
}
if !found {
pids = append(pids, filePID)
}
}
if len(pids) == 0 {
return fmt.Errorf("agent %q is not running", id) return fmt.Errorf("agent %q is not running", id)
} }
err := syscall.Kill(pid, syscall.SIGKILL)
var lastErr error
for _, pid := range pids {
if err := syscall.Kill(pid, syscall.SIGKILL); err != nil {
lastErr = err
}
}
m.removePID(id) m.removePID(id)
return err return lastErr
} }
// Stats gathers resource usage for a running agent from /proc. // Stats gathers resource usage for a running agent from /proc.
func (m *Manager) Stats(id string) (ProcessStats, error) { func (m *Manager) Stats(id string) (ProcessStats, error) {
pid := m.readPID(id) pid := m.resolveRunningPID(id)
if pid == 0 || !m.isAlive(pid) { if pid == 0 {
return ProcessStats{}, fmt.Errorf("agent %q is not running", id) return ProcessStats{}, fmt.Errorf("agent %q is not running", id)
} }
@@ -256,8 +309,12 @@ func (m *Manager) LogTail(id string, lines int) ([]string, error) {
// IsRunning checks if an agent process is alive. // IsRunning checks if an agent process is alive.
func (m *Manager) IsRunning(id string) bool { func (m *Manager) IsRunning(id string) bool {
pid := m.readPID(id) return m.resolveRunningPID(id) > 0
return pid > 0 && m.isAlive(pid) }
// InstanceCount returns how many launcher processes are running for an agent.
func (m *Manager) InstanceCount(id string) int {
return len(m.findProcessPIDs(id))
} }
// ReadPID returns the PID from the PID file, or 0. // ReadPID returns the PID from the PID file, or 0.
@@ -285,6 +342,70 @@ func (m *Manager) readPID(id string) int {
return pid return pid
} }
// findProcessPIDs searches for running launcher processes for a given agent ID
// using pgrep. Returns all matching PIDs.
func (m *Manager) findProcessPIDs(id string) []int {
// First try to find the config path for this agent
configPath := m.configPathFor(id)
if configPath == "" {
return nil
}
pattern := fmt.Sprintf("launcher.*-c.*%s", configPath)
out, err := exec.Command("pgrep", "-f", pattern).Output()
if err != nil {
return nil
}
var pids []int
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
if p, err := strconv.Atoi(strings.TrimSpace(line)); err == nil && p > 0 {
pids = append(pids, p)
}
}
return pids
}
// configPathFor returns the config file path for the given agent ID.
func (m *Manager) configPathFor(id string) string {
matches, err := filepath.Glob(m.agentsGlob)
if err != nil {
return ""
}
for _, path := range matches {
cfg, err := config.LoadMeta(path)
if err != nil {
continue
}
if cfg.Agent.ID == id {
return path
}
}
return ""
}
// resolveRunningPID returns the PID of the running agent, checking the PID file
// first and falling back to process discovery. It also repairs stale PID files.
func (m *Manager) resolveRunningPID(id string) int {
// Check PID file first
pid := m.readPID(id)
if pid > 0 && m.isAlive(pid) {
return pid
}
// PID file is stale or missing — search for actual processes
pids := m.findProcessPIDs(id)
if len(pids) > 0 {
// Repair the PID file with the first found process
_ = os.WriteFile(m.pidPath(id), []byte(strconv.Itoa(pids[0])), 0o644)
return pids[0]
}
// Clean up stale PID file
if pid > 0 {
m.removePID(id)
}
return 0
}
func (m *Manager) isAlive(pid int) bool { func (m *Manager) isAlive(pid int) bool {
return syscall.Kill(pid, 0) == nil return syscall.Kill(pid, 0) == nil
} }
@@ -293,6 +414,33 @@ func (m *Manager) removePID(id string) {
_ = os.Remove(m.pidPath(id)) _ = os.Remove(m.pidPath(id))
} }
// buildEnv returns the environment for child processes: current env + .env file vars.
func (m *Manager) buildEnv() []string {
env := os.Environ()
if m.envFile == "" {
return env
}
data, err := os.ReadFile(m.envFile)
if err != nil {
return env
}
// Parse KEY=VALUE lines, skip comments and blanks.
seen := make(map[string]bool)
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if idx := strings.Index(line, "="); idx > 0 {
key := line[:idx]
seen[key] = true
env = append(env, line)
}
}
_ = seen // .env values appended last, so they override earlier entries
return env
}
func (m *Manager) resolvedBin() string { func (m *Manager) resolvedBin() string {
if m.binPath != "" { if m.binPath != "" {
return m.binPath return m.binPath
+127 -7
View File
@@ -44,6 +44,18 @@ func (a *Adapter) RunIntent(intent puretui.Intent) tea.Cmd {
case puretui.IntentLoadLogs: case puretui.IntentLoadLogs:
return a.loadLogs(intent.AgentID) return a.loadLogs(intent.AgentID)
case puretui.IntentStartAll:
return a.startAll()
case puretui.IntentStopAll:
return a.stopAll()
case puretui.IntentRestartAll:
return a.restartAll()
case puretui.IntentKillAll:
return a.killAll()
case puretui.IntentTick: case puretui.IntentTick:
return a.tick() return a.tick()
@@ -65,13 +77,14 @@ func (a *Adapter) loadAgents() tea.Cmd {
views := make([]puretui.AgentView, len(statuses)) views := make([]puretui.AgentView, len(statuses))
for i, s := range statuses { for i, s := range statuses {
v := puretui.AgentView{ v := puretui.AgentView{
ID: s.ID, ID: s.ID,
Name: s.Name, Name: s.Name,
Version: s.Version, Version: s.Version,
Desc: s.Desc, Desc: s.Desc,
Enabled: s.Enabled, Enabled: s.Enabled,
Running: s.Running, Running: s.Running,
PID: s.PID, PID: s.PID,
Instances: a.mgr.InstanceCount(s.ID),
} }
if s.Running { if s.Running {
@@ -147,6 +160,113 @@ func (a *Adapter) restartAgent(id string) tea.Cmd {
} }
} }
func (a *Adapter) startAll() tea.Cmd {
return func() tea.Msg {
agents, err := a.mgr.Scan()
if err != nil {
return puretui.MsgServerActionDone{Action: "Start All", Errors: []string{err.Error()}, Failed: 1}
}
var total, failed int
var errs []string
for _, agent := range agents {
if !agent.Enabled {
continue
}
if a.mgr.IsRunning(agent.ID) {
continue
}
total++
if err := a.mgr.Start(agent); err != nil {
failed++
errs = append(errs, fmt.Sprintf("%s: %v", agent.ID, err))
}
}
if total > 0 {
time.Sleep(500 * time.Millisecond)
}
return puretui.MsgServerActionDone{Action: "Start All", Total: total, Failed: failed, Errors: errs}
}
}
func (a *Adapter) stopAll() tea.Cmd {
return func() tea.Msg {
statuses, err := a.mgr.StatusAll()
if err != nil {
return puretui.MsgServerActionDone{Action: "Stop All", Errors: []string{err.Error()}, Failed: 1}
}
var total, failed int
var errs []string
for _, s := range statuses {
if !s.Running {
continue
}
total++
if err := a.mgr.Stop(s.ID); err != nil {
failed++
errs = append(errs, fmt.Sprintf("%s: %v", s.ID, err))
}
}
return puretui.MsgServerActionDone{Action: "Stop All", Total: total, Failed: failed, Errors: errs}
}
}
func (a *Adapter) restartAll() tea.Cmd {
return func() tea.Msg {
agents, err := a.mgr.Scan()
if err != nil {
return puretui.MsgServerActionDone{Action: "Restart All", Errors: []string{err.Error()}, Failed: 1}
}
// Stop all running first
for _, agent := range agents {
if agent.Enabled && a.mgr.IsRunning(agent.ID) {
_ = a.mgr.Stop(agent.ID)
}
}
time.Sleep(300 * time.Millisecond)
// Start all enabled
var total, failed int
var errs []string
for _, agent := range agents {
if !agent.Enabled {
continue
}
total++
if err := a.mgr.Start(agent); err != nil {
failed++
errs = append(errs, fmt.Sprintf("%s: %v", agent.ID, err))
}
}
if total > 0 {
time.Sleep(500 * time.Millisecond)
}
return puretui.MsgServerActionDone{Action: "Restart All", Total: total, Failed: failed, Errors: errs}
}
}
func (a *Adapter) killAll() tea.Cmd {
return func() tea.Msg {
statuses, err := a.mgr.StatusAll()
if err != nil {
return puretui.MsgServerActionDone{Action: "Kill All", Errors: []string{err.Error()}, Failed: 1}
}
var total, failed int
var errs []string
for _, s := range statuses {
if !s.Running {
continue
}
total++
if err := a.mgr.Kill(s.ID); err != nil {
failed++
errs = append(errs, fmt.Sprintf("%s: %v", s.ID, err))
}
}
return puretui.MsgServerActionDone{Action: "Kill All", Total: total, Failed: failed, Errors: errs}
}
}
func (a *Adapter) loadLogs(id string) tea.Cmd { func (a *Adapter) loadLogs(id string) tea.Cmd {
return func() tea.Msg { return func() tea.Msg {
lines, err := a.mgr.LogTail(id, 100) lines, err := a.mgr.LogTail(id, 100)