feat: implement server-wide management actions and enhance TUI dashboard
This commit is contained in:
@@ -4,6 +4,43 @@ Plataforma en Go para gestionar bots Matrix autónomos. Cada bot combina un **co
|
||||
|
||||
---
|
||||
|
||||
## Inicio rápido
|
||||
|
||||
```bash
|
||||
# 1. Compilar todo
|
||||
./build.sh
|
||||
|
||||
# 2. Cargar variables de entorno
|
||||
source .env
|
||||
|
||||
# 3. Lanzar la TUI interactiva (dashboard)
|
||||
./bin/dashboard
|
||||
```
|
||||
|
||||
### Dashboard TUI
|
||||
|
||||
El dashboard es una interfaz de terminal interactiva (bubbletea) para gestionar los bots del servidor:
|
||||
|
||||
```
|
||||
./bin/dashboard
|
||||
```
|
||||
|
||||
Desde la TUI puedes:
|
||||
|
||||
- **Agents** — ver estado de cada agente, iniciar/detener/reiniciar/kill individual, ver logs
|
||||
- **Server** — operaciones masivas: start all, stop all, restart all, kill all con resumen de estado
|
||||
|
||||
### Otros binarios
|
||||
|
||||
| Binario | Uso |
|
||||
|---------|-----|
|
||||
| `./bin/launcher` | Inicia uno o varios agentes como procesos |
|
||||
| `./bin/agentctl` | CLI: `list`, `start`, `stop`, `remove` |
|
||||
| `./bin/register` | Registra bots en Synapse via admin API |
|
||||
| `./bin/dashboard` | TUI interactiva para gestión de bots |
|
||||
|
||||
---
|
||||
|
||||
## Principio de diseño
|
||||
|
||||
El proyecto usa el patrón **pure core / impure shell**:
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
export PATH="/usr/local/go/bin:$PATH"
|
||||
|
||||
BIN="bin"
|
||||
TAGS="-tags goolm"
|
||||
LDFLAGS="-ldflags=-s -w"
|
||||
|
||||
mkdir -p "$BIN"
|
||||
|
||||
echo "==> Compilando todos los binarios en $BIN/ ..."
|
||||
|
||||
targets=(
|
||||
"launcher:./cmd/launcher"
|
||||
"agentctl:./cmd/agentctl"
|
||||
"register:./cmd/register"
|
||||
"dashboard:./cmd/dashboard"
|
||||
)
|
||||
|
||||
for entry in "${targets[@]}"; do
|
||||
name="${entry%%:*}"
|
||||
pkg="${entry##*:}"
|
||||
echo " $name"
|
||||
go build $TAGS "$LDFLAGS" -o "$BIN/$name" "$pkg"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "==> Listo. Binarios disponibles:"
|
||||
ls -lh "$BIN"/
|
||||
+57
-2
@@ -50,9 +50,64 @@ read_pid() {
|
||||
[[ -f "$f" ]] && cat "$f" || echo 0
|
||||
}
|
||||
|
||||
# Map agent ID to its config path by scanning agent directories.
|
||||
config_path_for() {
|
||||
local target_id="$1"
|
||||
for cfg in agents/*/config.yaml; do
|
||||
[[ -f "$cfg" ]] || continue
|
||||
local id
|
||||
id=$(grep -m1 '^ id:' "$cfg" | awk '{print $2}')
|
||||
if [[ "$id" == "$target_id" ]]; then
|
||||
echo "$cfg"
|
||||
return
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Find all PIDs of launcher processes for a given agent ID.
|
||||
# Searches for the actual config path in the process command line.
|
||||
# Returns newline-separated PIDs (may be empty).
|
||||
find_agent_pids() {
|
||||
local id="$1"
|
||||
local cfg; cfg="$(config_path_for "$id")"
|
||||
if [[ -z "$cfg" ]]; then
|
||||
return
|
||||
fi
|
||||
pgrep -f "launcher.*-c.*${cfg}" 2>/dev/null || true
|
||||
}
|
||||
|
||||
is_running() {
|
||||
local pid; pid="$(read_pid "$1")"
|
||||
[[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null
|
||||
local id="$1"
|
||||
|
||||
# First check PID file
|
||||
local pid; pid="$(read_pid "$id")"
|
||||
if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# PID file is stale or missing — search for actual processes
|
||||
local pids; pids="$(find_agent_pids "$id")"
|
||||
if [[ -n "$pids" ]]; then
|
||||
# Update PID file with the first found process
|
||||
local first_pid; first_pid="$(echo "$pids" | head -1)"
|
||||
echo "$first_pid" > "$(pid_file "$id")"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Truly not running — clean up stale PID file
|
||||
[[ "$pid" -gt 0 ]] && rm -f "$(pid_file "$id")"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Count how many instances of an agent are running.
|
||||
count_instances() {
|
||||
local id="$1"
|
||||
local pids; pids="$(find_agent_pids "$id")"
|
||||
if [[ -z "$pids" ]]; then
|
||||
echo 0
|
||||
else
|
||||
echo "$pids" | wc -l
|
||||
fi
|
||||
}
|
||||
|
||||
agent_status() {
|
||||
|
||||
@@ -26,6 +26,7 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
|
||||
fi
|
||||
|
||||
pid="$(read_pid "$id")"
|
||||
instance_count="$(count_instances "$id")"
|
||||
((found++)) || true
|
||||
|
||||
# Uptime: calcular desde el inicio del proceso
|
||||
@@ -78,6 +79,12 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
|
||||
printf "%-22s ${GRN}%-8s${RST} %-12s %-10s %-8s %s\n" \
|
||||
"$id" "$pid" "$uptime" "$mem" "${cpu_pct}%" "$log_size"
|
||||
|
||||
# Warn about duplicate instances
|
||||
if [[ "$instance_count" -gt 1 ]]; then
|
||||
printf " ${RED}⚠ WARNING: %d instances running!${RST} PIDs: %s\n" \
|
||||
"$instance_count" "$(find_agent_pids "$id" | tr '\n' ' ')"
|
||||
fi
|
||||
|
||||
done < <(list_agents_raw)
|
||||
|
||||
if [[ "$found" -eq 0 ]]; then
|
||||
|
||||
@@ -55,11 +55,14 @@ case "$CMD" in
|
||||
|
||||
killed=0
|
||||
for id in "${agents[@]}"; do
|
||||
pid="$(read_pid "$id")"
|
||||
if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then
|
||||
kill -9 "$pid" 2>/dev/null || true
|
||||
all_pids="$(find_agent_pids "$id")"
|
||||
if [[ -n "$all_pids" ]]; then
|
||||
cnt="$(echo "$all_pids" | wc -l)"
|
||||
for p in $all_pids; do
|
||||
kill -9 "$p" 2>/dev/null || true
|
||||
done
|
||||
rm -f "$(pid_file "$id")"
|
||||
ok "$id killed (PID $pid)"
|
||||
ok "$id killed ($cnt instance(s), PIDs: $(echo $all_pids | tr '\n' ' '))"
|
||||
((killed++)) || true
|
||||
else
|
||||
dim " $id (no estaba corriendo)"
|
||||
|
||||
+21
-2
@@ -14,11 +14,30 @@ start_agent() {
|
||||
local id="$1" cfg="$2"
|
||||
local log; log="$(log_file "$id")"
|
||||
local pid_f; pid_f="$(pid_file "$id")"
|
||||
local bin="$REPO_ROOT/bin/launcher"
|
||||
|
||||
# Check for duplicate instances already running
|
||||
local existing; existing="$(count_instances "$id")"
|
||||
if [[ "$existing" -gt 0 ]]; then
|
||||
warn "$id already has $existing instance(s) running (orphan processes?)"
|
||||
warn " Run ./dev-scripts/stop.sh $id first to clean up"
|
||||
return 1
|
||||
fi
|
||||
|
||||
info "Iniciando $id..."
|
||||
|
||||
# Lanza el launcher en background, desacoplado del terminal
|
||||
nohup "$GO" run -tags goolm ./cmd/launcher -c "$cfg" --log-level "${LOG_LEVEL:-info}" \
|
||||
# Build the binary first to avoid go run wrapper PID issues
|
||||
if [[ ! -x "$bin" ]] || [[ "$(find ./cmd/launcher -newer "$bin" 2>/dev/null | head -1)" ]]; then
|
||||
info "Compilando launcher..."
|
||||
mkdir -p "$(dirname "$bin")"
|
||||
"$GO" build -tags goolm -o "$bin" ./cmd/launcher || {
|
||||
fail "$id error de compilación — revisa el código"
|
||||
return 1
|
||||
}
|
||||
fi
|
||||
|
||||
# Launch the compiled binary directly (no go run wrapper)
|
||||
nohup "$bin" -c "$cfg" --log-level "${LOG_LEVEL:-info}" \
|
||||
>> "$log" 2>&1 &
|
||||
|
||||
local pid=$!
|
||||
|
||||
+22
-8
@@ -18,23 +18,37 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do
|
||||
continue
|
||||
fi
|
||||
|
||||
local_pid="$(read_pid "$id")"
|
||||
kill -TERM "$local_pid" 2>/dev/null || true
|
||||
# Kill ALL instances, not just the one in the PID file
|
||||
all_pids="$(find_agent_pids "$id")"
|
||||
instance_count="$(echo "$all_pids" | grep -c . 2>/dev/null || echo 0)"
|
||||
|
||||
# Espera hasta 5s a que muera limpiamente
|
||||
if [[ "$instance_count" -gt 1 ]]; then
|
||||
warn "$id has $instance_count instances running — stopping all"
|
||||
fi
|
||||
|
||||
# Send SIGTERM to all instances
|
||||
for p in $all_pids; do
|
||||
kill -TERM "$p" 2>/dev/null || true
|
||||
done
|
||||
|
||||
# Wait up to 5s for graceful shutdown
|
||||
for _ in {1..10}; do
|
||||
kill -0 "$local_pid" 2>/dev/null || break
|
||||
remaining="$(find_agent_pids "$id")"
|
||||
[[ -z "$remaining" ]] && break
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
# SIGKILL si todavía sigue vivo
|
||||
if kill -0 "$local_pid" 2>/dev/null; then
|
||||
# SIGKILL any survivors
|
||||
survivors="$(find_agent_pids "$id")"
|
||||
if [[ -n "$survivors" ]]; then
|
||||
warn "$id no respondió a SIGTERM, enviando SIGKILL..."
|
||||
kill -9 "$local_pid" 2>/dev/null || true
|
||||
for p in $survivors; do
|
||||
kill -9 "$p" 2>/dev/null || true
|
||||
done
|
||||
fi
|
||||
|
||||
rm -f "$(pid_file "$id")"
|
||||
ok "$id detenido (PID $local_pid)"
|
||||
ok "$id detenido ($instance_count instance(s) stopped)"
|
||||
((stopped++)) || true
|
||||
|
||||
done < <(list_agents_raw)
|
||||
|
||||
@@ -16,5 +16,13 @@ type MsgActionDone struct {
|
||||
// MsgLogsLoaded carries log lines for the selected agent.
|
||||
type MsgLogsLoaded struct{ Lines []string }
|
||||
|
||||
// MsgServerActionDone reports the result of a server-wide bulk action.
|
||||
type MsgServerActionDone struct {
|
||||
Action string
|
||||
Total int
|
||||
Failed int
|
||||
Errors []string
|
||||
}
|
||||
|
||||
// MsgTick triggers a periodic refresh.
|
||||
type MsgTick struct{}
|
||||
|
||||
+17
-4
@@ -10,6 +10,7 @@ const (
|
||||
ScreenAgentList // list all agents with status
|
||||
ScreenAgentActions // actions for a selected agent
|
||||
ScreenLogs // tail log output
|
||||
ScreenServer // server-wide process management
|
||||
)
|
||||
|
||||
// Model is the complete TUI state — pure data.
|
||||
@@ -34,10 +35,11 @@ type AgentView struct {
|
||||
Enabled bool
|
||||
Running bool
|
||||
PID int
|
||||
Uptime string // formatted: "2h 15m"
|
||||
Memory string // formatted: "42 MB"
|
||||
CPU string // formatted: "1.2%"
|
||||
LogSize string // formatted: "350 KB"
|
||||
Instances int // number of running instances (>1 means duplicates)
|
||||
Uptime string // formatted: "2h 15m"
|
||||
Memory string // formatted: "42 MB"
|
||||
CPU string // formatted: "1.2%"
|
||||
LogSize string // formatted: "350 KB"
|
||||
}
|
||||
|
||||
// MenuOption represents a selectable menu item.
|
||||
@@ -50,10 +52,21 @@ type MenuOption struct {
|
||||
func MainMenuOptions() []MenuOption {
|
||||
return []MenuOption{
|
||||
{Label: "Agents", Desc: "Gestionar agentes"},
|
||||
{Label: "Server", Desc: "Gestionar servidor"},
|
||||
{Label: "Quit", Desc: "Salir"},
|
||||
}
|
||||
}
|
||||
|
||||
// ServerMenuOptions returns the available server-wide actions.
|
||||
func ServerMenuOptions() []MenuOption {
|
||||
return []MenuOption{
|
||||
{Label: "Start All", Desc: "Iniciar todos los agentes habilitados"},
|
||||
{Label: "Stop All", Desc: "Detener todos los agentes"},
|
||||
{Label: "Restart All", Desc: "Reiniciar todos los agentes"},
|
||||
{Label: "Kill All", Desc: "SIGKILL forzado a todos"},
|
||||
}
|
||||
}
|
||||
|
||||
// AgentActionOptions returns the available actions based on agent state.
|
||||
func AgentActionOptions(running bool) []MenuOption {
|
||||
if running {
|
||||
|
||||
+64
-3
@@ -14,6 +14,12 @@ const (
|
||||
IntentLoadLogs IntentKind = "load_logs"
|
||||
IntentTick IntentKind = "tick"
|
||||
IntentQuit IntentKind = "quit"
|
||||
|
||||
// Server-wide bulk operations
|
||||
IntentStartAll IntentKind = "start_all"
|
||||
IntentStopAll IntentKind = "stop_all"
|
||||
IntentRestartAll IntentKind = "restart_all"
|
||||
IntentKillAll IntentKind = "kill_all"
|
||||
)
|
||||
|
||||
// Intent is pure data describing a side effect to execute.
|
||||
@@ -45,9 +51,11 @@ func Update(model Model, msg interface{}) (Model, []Intent) {
|
||||
|
||||
case MsgAgentsLoaded:
|
||||
model.Agents = m.Agents
|
||||
// Clamp cursor
|
||||
if model.Cursor >= len(model.Agents) && len(model.Agents) > 0 {
|
||||
model.Cursor = len(model.Agents) - 1
|
||||
// Clamp cursor only on screens that use the agent list
|
||||
if model.Screen == ScreenAgentList {
|
||||
if model.Cursor >= len(model.Agents) && len(model.Agents) > 0 {
|
||||
model.Cursor = len(model.Agents) - 1
|
||||
}
|
||||
}
|
||||
return model, []Intent{{Kind: IntentTick}}
|
||||
|
||||
@@ -59,6 +67,14 @@ func Update(model Model, msg interface{}) (Model, []Intent) {
|
||||
}
|
||||
return model, []Intent{{Kind: IntentLoadAgents}}
|
||||
|
||||
case MsgServerActionDone:
|
||||
if m.Failed == 0 {
|
||||
model.StatusMsg = fmt.Sprintf("%s: %d agents OK", m.Action, m.Total)
|
||||
} else {
|
||||
model.StatusMsg = fmt.Sprintf("%s: %d/%d failed", m.Action, m.Failed, m.Total)
|
||||
}
|
||||
return model, []Intent{{Kind: IntentLoadAgents}}
|
||||
|
||||
case MsgLogsLoaded:
|
||||
model.LogLines = m.Lines
|
||||
model.LogScroll = max(0, len(m.Lines)-visibleLogLines(model))
|
||||
@@ -92,6 +108,8 @@ func updateKey(model Model, key KeyMsg) (Model, []Intent) {
|
||||
return updateAgentActions(model, key)
|
||||
case ScreenLogs:
|
||||
return updateLogs(model, key)
|
||||
case ScreenServer:
|
||||
return updateServerScreen(model, key)
|
||||
}
|
||||
return model, nil
|
||||
}
|
||||
@@ -109,6 +127,11 @@ func updateMainScreen(model Model, key KeyMsg) (Model, []Intent) {
|
||||
model.Screen = ScreenAgentList
|
||||
model.Cursor = 0
|
||||
return model, []Intent{{Kind: IntentLoadAgents}}
|
||||
case "Server":
|
||||
model.Screen = ScreenServer
|
||||
model.Cursor = 0
|
||||
model.StatusMsg = ""
|
||||
return model, []Intent{{Kind: IntentLoadAgents}}
|
||||
case "Quit":
|
||||
return model, []Intent{{Kind: IntentQuit}}
|
||||
}
|
||||
@@ -210,6 +233,44 @@ func updateLogs(model Model, key KeyMsg) (Model, []Intent) {
|
||||
return model, nil
|
||||
}
|
||||
|
||||
func updateServerScreen(model Model, key KeyMsg) (Model, []Intent) {
|
||||
opts := ServerMenuOptions()
|
||||
|
||||
switch key.Str {
|
||||
case "0":
|
||||
model.Screen = ScreenMain
|
||||
model.Cursor = 0
|
||||
model.StatusMsg = ""
|
||||
case "up", "k":
|
||||
model.Cursor = clamp(model.Cursor-1, 0, len(opts)-1)
|
||||
case "down", "j":
|
||||
model.Cursor = clamp(model.Cursor+1, 0, len(opts)-1)
|
||||
case "enter":
|
||||
if model.Cursor < len(opts) {
|
||||
return executeServerAction(model, opts[model.Cursor].Label)
|
||||
}
|
||||
}
|
||||
return model, nil
|
||||
}
|
||||
|
||||
func executeServerAction(model Model, action string) (Model, []Intent) {
|
||||
switch action {
|
||||
case "Start All":
|
||||
model.StatusMsg = "Starting all agents..."
|
||||
return model, []Intent{{Kind: IntentStartAll}}
|
||||
case "Stop All":
|
||||
model.StatusMsg = "Stopping all agents..."
|
||||
return model, []Intent{{Kind: IntentStopAll}}
|
||||
case "Restart All":
|
||||
model.StatusMsg = "Restarting all agents..."
|
||||
return model, []Intent{{Kind: IntentRestartAll}}
|
||||
case "Kill All":
|
||||
model.StatusMsg = "Killing all agents..."
|
||||
return model, []Intent{{Kind: IntentKillAll}}
|
||||
}
|
||||
return model, nil
|
||||
}
|
||||
|
||||
// ── pure helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
func visibleLogLines(m Model) int {
|
||||
|
||||
@@ -16,6 +16,8 @@ func View(model Model) string {
|
||||
return viewAgentActions(model)
|
||||
case ScreenLogs:
|
||||
return viewLogs(model)
|
||||
case ScreenServer:
|
||||
return viewServer(model)
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
@@ -78,6 +80,10 @@ func viewAgentList(m Model) string {
|
||||
|
||||
b.WriteString(fmt.Sprintf(" %s%s %-20s %-8s %s\n",
|
||||
cursor, icon, a.ID, a.Version, status))
|
||||
|
||||
if a.Instances > 1 {
|
||||
b.WriteString(fmt.Sprintf(" ⚠ WARNING: %d instances running!\n", a.Instances))
|
||||
}
|
||||
}
|
||||
|
||||
if m.StatusMsg != "" {
|
||||
@@ -177,6 +183,54 @@ func viewLogs(m Model) string {
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func viewServer(m Model) string {
|
||||
var b strings.Builder
|
||||
|
||||
b.WriteString("\n Server Management\n")
|
||||
b.WriteString(" " + strings.Repeat("─", 44) + "\n")
|
||||
|
||||
// Summary
|
||||
running, stopped, disabled := countStatuses(m.Agents)
|
||||
total := len(m.Agents)
|
||||
if total > 0 {
|
||||
b.WriteString(fmt.Sprintf(" %d agents: %d running, %d stopped, %d disabled\n", total, running, stopped, disabled))
|
||||
} else {
|
||||
b.WriteString(" Loading...\n")
|
||||
}
|
||||
|
||||
// Agent status list (compact)
|
||||
if total > 0 {
|
||||
b.WriteString("\n")
|
||||
for _, a := range m.Agents {
|
||||
icon := "○"
|
||||
if !a.Enabled {
|
||||
icon = " "
|
||||
} else if a.Running {
|
||||
icon = "●"
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" %s %s\n", icon, a.ID))
|
||||
}
|
||||
}
|
||||
|
||||
b.WriteString("\n")
|
||||
|
||||
// Action menu
|
||||
for i, opt := range ServerMenuOptions() {
|
||||
cursor := " "
|
||||
if i == m.Cursor {
|
||||
cursor = "> "
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" %s%-16s %s\n", cursor, opt.Label, opt.Desc))
|
||||
}
|
||||
|
||||
if m.StatusMsg != "" {
|
||||
b.WriteString("\n " + m.StatusMsg + "\n")
|
||||
}
|
||||
|
||||
b.WriteString("\n ↑↓ navegar enter ejecutar 0 volver\n")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func countStatuses(agents []AgentView) (running, stopped, disabled int) {
|
||||
for _, a := range agents {
|
||||
switch {
|
||||
|
||||
+170
-22
@@ -47,11 +47,12 @@ type Manager struct {
|
||||
runDir string
|
||||
agentsGlob string
|
||||
binPath string
|
||||
envFile string // path to .env file for child processes
|
||||
}
|
||||
|
||||
// NewManager creates a Manager. binPath can be empty for auto-detection.
|
||||
func NewManager(runDir, agentsGlob, binPath string) *Manager {
|
||||
return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath}
|
||||
return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath, envFile: ".env"}
|
||||
}
|
||||
|
||||
// Scan discovers all agents from config files.
|
||||
@@ -81,8 +82,8 @@ func (m *Manager) Scan() ([]AgentInfo, error) {
|
||||
|
||||
// Status returns the runtime status for a single agent.
|
||||
func (m *Manager) Status(info AgentInfo) AgentStatus {
|
||||
pid := m.readPID(info.ID)
|
||||
running := pid > 0 && m.isAlive(pid)
|
||||
pid := m.resolveRunningPID(info.ID)
|
||||
running := pid > 0
|
||||
return AgentStatus{AgentInfo: info, Running: running, PID: pid}
|
||||
}
|
||||
|
||||
@@ -101,6 +102,12 @@ func (m *Manager) StatusAll() ([]AgentStatus, error) {
|
||||
|
||||
// Start launches an agent process in the background.
|
||||
func (m *Manager) Start(info AgentInfo) error {
|
||||
// Check for orphan instances
|
||||
if existing := m.findProcessPIDs(info.ID); len(existing) > 0 {
|
||||
return fmt.Errorf("agent %q already has %d running instance(s) (PIDs: %v) — stop them first",
|
||||
info.ID, len(existing), existing)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(m.runDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create run dir: %w", err)
|
||||
}
|
||||
@@ -113,11 +120,12 @@ func (m *Manager) Start(info AgentInfo) error {
|
||||
bin := m.resolvedBin()
|
||||
var cmd *exec.Cmd
|
||||
if strings.HasPrefix(bin, "go run") {
|
||||
cmd = exec.Command("go", "run", "./cmd/launcher", "-c", info.ConfigPath)
|
||||
cmd = exec.Command("go", "run", "-tags", "goolm", "./cmd/launcher", "-c", info.ConfigPath)
|
||||
} else {
|
||||
cmd = exec.Command(bin, "-c", info.ConfigPath)
|
||||
}
|
||||
|
||||
cmd.Env = m.buildEnv()
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
|
||||
@@ -135,49 +143,94 @@ func (m *Manager) Start(info AgentInfo) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop sends SIGTERM, waits up to 5s, then SIGKILL if needed.
|
||||
// Stop sends SIGTERM to all instances, waits up to 5s, then SIGKILL if needed.
|
||||
func (m *Manager) Stop(id string) error {
|
||||
pid := m.readPID(id)
|
||||
if pid == 0 || !m.isAlive(pid) {
|
||||
pids := m.findProcessPIDs(id)
|
||||
// Also include PID file PID if alive and not already in the list
|
||||
filePID := m.readPID(id)
|
||||
if filePID > 0 && m.isAlive(filePID) {
|
||||
found := false
|
||||
for _, p := range pids {
|
||||
if p == filePID {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
pids = append(pids, filePID)
|
||||
}
|
||||
}
|
||||
|
||||
if len(pids) == 0 {
|
||||
return fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
|
||||
if err := syscall.Kill(pid, syscall.SIGTERM); err != nil {
|
||||
return fmt.Errorf("SIGTERM: %w", err)
|
||||
// SIGTERM all instances
|
||||
for _, pid := range pids {
|
||||
_ = syscall.Kill(pid, syscall.SIGTERM)
|
||||
}
|
||||
|
||||
// Wait up to 5 seconds for graceful shutdown.
|
||||
for i := 0; i < 10; i++ {
|
||||
if !m.isAlive(pid) {
|
||||
allDead := true
|
||||
for _, pid := range pids {
|
||||
if m.isAlive(pid) {
|
||||
allDead = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if allDead {
|
||||
m.removePID(id)
|
||||
return nil
|
||||
}
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
|
||||
// Force kill.
|
||||
if m.isAlive(pid) {
|
||||
_ = syscall.Kill(pid, syscall.SIGKILL)
|
||||
// Force kill survivors.
|
||||
for _, pid := range pids {
|
||||
if m.isAlive(pid) {
|
||||
_ = syscall.Kill(pid, syscall.SIGKILL)
|
||||
}
|
||||
}
|
||||
m.removePID(id)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Kill sends SIGKILL immediately.
|
||||
// Kill sends SIGKILL to all instances immediately.
|
||||
func (m *Manager) Kill(id string) error {
|
||||
pid := m.readPID(id)
|
||||
if pid == 0 || !m.isAlive(pid) {
|
||||
pids := m.findProcessPIDs(id)
|
||||
filePID := m.readPID(id)
|
||||
if filePID > 0 && m.isAlive(filePID) {
|
||||
found := false
|
||||
for _, p := range pids {
|
||||
if p == filePID {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
pids = append(pids, filePID)
|
||||
}
|
||||
}
|
||||
|
||||
if len(pids) == 0 {
|
||||
return fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
err := syscall.Kill(pid, syscall.SIGKILL)
|
||||
|
||||
var lastErr error
|
||||
for _, pid := range pids {
|
||||
if err := syscall.Kill(pid, syscall.SIGKILL); err != nil {
|
||||
lastErr = err
|
||||
}
|
||||
}
|
||||
m.removePID(id)
|
||||
return err
|
||||
return lastErr
|
||||
}
|
||||
|
||||
// Stats gathers resource usage for a running agent from /proc.
|
||||
func (m *Manager) Stats(id string) (ProcessStats, error) {
|
||||
pid := m.readPID(id)
|
||||
if pid == 0 || !m.isAlive(pid) {
|
||||
pid := m.resolveRunningPID(id)
|
||||
if pid == 0 {
|
||||
return ProcessStats{}, fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
|
||||
@@ -256,8 +309,12 @@ func (m *Manager) LogTail(id string, lines int) ([]string, error) {
|
||||
|
||||
// IsRunning checks if an agent process is alive.
|
||||
func (m *Manager) IsRunning(id string) bool {
|
||||
pid := m.readPID(id)
|
||||
return pid > 0 && m.isAlive(pid)
|
||||
return m.resolveRunningPID(id) > 0
|
||||
}
|
||||
|
||||
// InstanceCount returns how many launcher processes are running for an agent.
|
||||
func (m *Manager) InstanceCount(id string) int {
|
||||
return len(m.findProcessPIDs(id))
|
||||
}
|
||||
|
||||
// ReadPID returns the PID from the PID file, or 0.
|
||||
@@ -285,6 +342,70 @@ func (m *Manager) readPID(id string) int {
|
||||
return pid
|
||||
}
|
||||
|
||||
// findProcessPIDs searches for running launcher processes for a given agent ID
|
||||
// using pgrep. Returns all matching PIDs.
|
||||
func (m *Manager) findProcessPIDs(id string) []int {
|
||||
// First try to find the config path for this agent
|
||||
configPath := m.configPathFor(id)
|
||||
if configPath == "" {
|
||||
return nil
|
||||
}
|
||||
pattern := fmt.Sprintf("launcher.*-c.*%s", configPath)
|
||||
out, err := exec.Command("pgrep", "-f", pattern).Output()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
var pids []int
|
||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||
if p, err := strconv.Atoi(strings.TrimSpace(line)); err == nil && p > 0 {
|
||||
pids = append(pids, p)
|
||||
}
|
||||
}
|
||||
return pids
|
||||
}
|
||||
|
||||
// configPathFor returns the config file path for the given agent ID.
|
||||
func (m *Manager) configPathFor(id string) string {
|
||||
matches, err := filepath.Glob(m.agentsGlob)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
for _, path := range matches {
|
||||
cfg, err := config.LoadMeta(path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if cfg.Agent.ID == id {
|
||||
return path
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveRunningPID returns the PID of the running agent, checking the PID file
|
||||
// first and falling back to process discovery. It also repairs stale PID files.
|
||||
func (m *Manager) resolveRunningPID(id string) int {
|
||||
// Check PID file first
|
||||
pid := m.readPID(id)
|
||||
if pid > 0 && m.isAlive(pid) {
|
||||
return pid
|
||||
}
|
||||
|
||||
// PID file is stale or missing — search for actual processes
|
||||
pids := m.findProcessPIDs(id)
|
||||
if len(pids) > 0 {
|
||||
// Repair the PID file with the first found process
|
||||
_ = os.WriteFile(m.pidPath(id), []byte(strconv.Itoa(pids[0])), 0o644)
|
||||
return pids[0]
|
||||
}
|
||||
|
||||
// Clean up stale PID file
|
||||
if pid > 0 {
|
||||
m.removePID(id)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *Manager) isAlive(pid int) bool {
|
||||
return syscall.Kill(pid, 0) == nil
|
||||
}
|
||||
@@ -293,6 +414,33 @@ func (m *Manager) removePID(id string) {
|
||||
_ = os.Remove(m.pidPath(id))
|
||||
}
|
||||
|
||||
// buildEnv returns the environment for child processes: current env + .env file vars.
|
||||
func (m *Manager) buildEnv() []string {
|
||||
env := os.Environ()
|
||||
if m.envFile == "" {
|
||||
return env
|
||||
}
|
||||
data, err := os.ReadFile(m.envFile)
|
||||
if err != nil {
|
||||
return env
|
||||
}
|
||||
// Parse KEY=VALUE lines, skip comments and blanks.
|
||||
seen := make(map[string]bool)
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
if idx := strings.Index(line, "="); idx > 0 {
|
||||
key := line[:idx]
|
||||
seen[key] = true
|
||||
env = append(env, line)
|
||||
}
|
||||
}
|
||||
_ = seen // .env values appended last, so they override earlier entries
|
||||
return env
|
||||
}
|
||||
|
||||
func (m *Manager) resolvedBin() string {
|
||||
if m.binPath != "" {
|
||||
return m.binPath
|
||||
|
||||
+127
-7
@@ -44,6 +44,18 @@ func (a *Adapter) RunIntent(intent puretui.Intent) tea.Cmd {
|
||||
case puretui.IntentLoadLogs:
|
||||
return a.loadLogs(intent.AgentID)
|
||||
|
||||
case puretui.IntentStartAll:
|
||||
return a.startAll()
|
||||
|
||||
case puretui.IntentStopAll:
|
||||
return a.stopAll()
|
||||
|
||||
case puretui.IntentRestartAll:
|
||||
return a.restartAll()
|
||||
|
||||
case puretui.IntentKillAll:
|
||||
return a.killAll()
|
||||
|
||||
case puretui.IntentTick:
|
||||
return a.tick()
|
||||
|
||||
@@ -65,13 +77,14 @@ func (a *Adapter) loadAgents() tea.Cmd {
|
||||
views := make([]puretui.AgentView, len(statuses))
|
||||
for i, s := range statuses {
|
||||
v := puretui.AgentView{
|
||||
ID: s.ID,
|
||||
Name: s.Name,
|
||||
Version: s.Version,
|
||||
Desc: s.Desc,
|
||||
Enabled: s.Enabled,
|
||||
Running: s.Running,
|
||||
PID: s.PID,
|
||||
ID: s.ID,
|
||||
Name: s.Name,
|
||||
Version: s.Version,
|
||||
Desc: s.Desc,
|
||||
Enabled: s.Enabled,
|
||||
Running: s.Running,
|
||||
PID: s.PID,
|
||||
Instances: a.mgr.InstanceCount(s.ID),
|
||||
}
|
||||
|
||||
if s.Running {
|
||||
@@ -147,6 +160,113 @@ func (a *Adapter) restartAgent(id string) tea.Cmd {
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Adapter) startAll() tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
agents, err := a.mgr.Scan()
|
||||
if err != nil {
|
||||
return puretui.MsgServerActionDone{Action: "Start All", Errors: []string{err.Error()}, Failed: 1}
|
||||
}
|
||||
var total, failed int
|
||||
var errs []string
|
||||
for _, agent := range agents {
|
||||
if !agent.Enabled {
|
||||
continue
|
||||
}
|
||||
if a.mgr.IsRunning(agent.ID) {
|
||||
continue
|
||||
}
|
||||
total++
|
||||
if err := a.mgr.Start(agent); err != nil {
|
||||
failed++
|
||||
errs = append(errs, fmt.Sprintf("%s: %v", agent.ID, err))
|
||||
}
|
||||
}
|
||||
if total > 0 {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
return puretui.MsgServerActionDone{Action: "Start All", Total: total, Failed: failed, Errors: errs}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Adapter) stopAll() tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
statuses, err := a.mgr.StatusAll()
|
||||
if err != nil {
|
||||
return puretui.MsgServerActionDone{Action: "Stop All", Errors: []string{err.Error()}, Failed: 1}
|
||||
}
|
||||
var total, failed int
|
||||
var errs []string
|
||||
for _, s := range statuses {
|
||||
if !s.Running {
|
||||
continue
|
||||
}
|
||||
total++
|
||||
if err := a.mgr.Stop(s.ID); err != nil {
|
||||
failed++
|
||||
errs = append(errs, fmt.Sprintf("%s: %v", s.ID, err))
|
||||
}
|
||||
}
|
||||
return puretui.MsgServerActionDone{Action: "Stop All", Total: total, Failed: failed, Errors: errs}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Adapter) restartAll() tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
agents, err := a.mgr.Scan()
|
||||
if err != nil {
|
||||
return puretui.MsgServerActionDone{Action: "Restart All", Errors: []string{err.Error()}, Failed: 1}
|
||||
}
|
||||
|
||||
// Stop all running first
|
||||
for _, agent := range agents {
|
||||
if agent.Enabled && a.mgr.IsRunning(agent.ID) {
|
||||
_ = a.mgr.Stop(agent.ID)
|
||||
}
|
||||
}
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
|
||||
// Start all enabled
|
||||
var total, failed int
|
||||
var errs []string
|
||||
for _, agent := range agents {
|
||||
if !agent.Enabled {
|
||||
continue
|
||||
}
|
||||
total++
|
||||
if err := a.mgr.Start(agent); err != nil {
|
||||
failed++
|
||||
errs = append(errs, fmt.Sprintf("%s: %v", agent.ID, err))
|
||||
}
|
||||
}
|
||||
if total > 0 {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
return puretui.MsgServerActionDone{Action: "Restart All", Total: total, Failed: failed, Errors: errs}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Adapter) killAll() tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
statuses, err := a.mgr.StatusAll()
|
||||
if err != nil {
|
||||
return puretui.MsgServerActionDone{Action: "Kill All", Errors: []string{err.Error()}, Failed: 1}
|
||||
}
|
||||
var total, failed int
|
||||
var errs []string
|
||||
for _, s := range statuses {
|
||||
if !s.Running {
|
||||
continue
|
||||
}
|
||||
total++
|
||||
if err := a.mgr.Kill(s.ID); err != nil {
|
||||
failed++
|
||||
errs = append(errs, fmt.Sprintf("%s: %v", s.ID, err))
|
||||
}
|
||||
}
|
||||
return puretui.MsgServerActionDone{Action: "Kill All", Total: total, Failed: failed, Errors: errs}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Adapter) loadLogs(id string) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
lines, err := a.mgr.LogTail(id, 100)
|
||||
|
||||
Reference in New Issue
Block a user