diff --git a/README.md b/README.md index d2d08f6..4535f84 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,43 @@ Plataforma en Go para gestionar bots Matrix autónomos. Cada bot combina un **co --- +## Inicio rápido + +```bash +# 1. Compilar todo +./build.sh + +# 2. Cargar variables de entorno +source .env + +# 3. Lanzar la TUI interactiva (dashboard) +./bin/dashboard +``` + +### Dashboard TUI + +El dashboard es una interfaz de terminal interactiva (bubbletea) para gestionar los bots del servidor: + +``` +./bin/dashboard +``` + +Desde la TUI puedes: + +- **Agents** — ver estado de cada agente, iniciar/detener/reiniciar/kill individual, ver logs +- **Server** — operaciones masivas: start all, stop all, restart all, kill all con resumen de estado + +### Otros binarios + +| Binario | Uso | +|---------|-----| +| `./bin/launcher` | Inicia uno o varios agentes como procesos | +| `./bin/agentctl` | CLI: `list`, `start`, `stop`, `remove` | +| `./bin/register` | Registra bots en Synapse via admin API | +| `./bin/dashboard` | TUI interactiva para gestión de bots | + +--- + ## Principio de diseño El proyecto usa el patrón **pure core / impure shell**: diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..6d723a9 --- /dev/null +++ b/build.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +export PATH="/usr/local/go/bin:$PATH" + +BIN="bin" +TAGS="-tags goolm" +LDFLAGS="-ldflags=-s -w" + +mkdir -p "$BIN" + +echo "==> Compilando todos los binarios en $BIN/ ..." + +targets=( + "launcher:./cmd/launcher" + "agentctl:./cmd/agentctl" + "register:./cmd/register" + "dashboard:./cmd/dashboard" +) + +for entry in "${targets[@]}"; do + name="${entry%%:*}" + pkg="${entry##*:}" + echo " $name" + go build $TAGS "$LDFLAGS" -o "$BIN/$name" "$pkg" +done + +echo "" +echo "==> Listo. Binarios disponibles:" +ls -lh "$BIN"/ diff --git a/dev-scripts/_common.sh b/dev-scripts/_common.sh index b4a4363..eb75b46 100755 --- a/dev-scripts/_common.sh +++ b/dev-scripts/_common.sh @@ -50,9 +50,64 @@ read_pid() { [[ -f "$f" ]] && cat "$f" || echo 0 } +# Map agent ID to its config path by scanning agent directories. +config_path_for() { + local target_id="$1" + for cfg in agents/*/config.yaml; do + [[ -f "$cfg" ]] || continue + local id + id=$(grep -m1 '^ id:' "$cfg" | awk '{print $2}') + if [[ "$id" == "$target_id" ]]; then + echo "$cfg" + return + fi + done +} + +# Find all PIDs of launcher processes for a given agent ID. +# Searches for the actual config path in the process command line. +# Returns newline-separated PIDs (may be empty). +find_agent_pids() { + local id="$1" + local cfg; cfg="$(config_path_for "$id")" + if [[ -z "$cfg" ]]; then + return + fi + pgrep -f "launcher.*-c.*${cfg}" 2>/dev/null || true +} + is_running() { - local pid; pid="$(read_pid "$1")" - [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null + local id="$1" + + # First check PID file + local pid; pid="$(read_pid "$id")" + if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then + return 0 + fi + + # PID file is stale or missing — search for actual processes + local pids; pids="$(find_agent_pids "$id")" + if [[ -n "$pids" ]]; then + # Update PID file with the first found process + local first_pid; first_pid="$(echo "$pids" | head -1)" + echo "$first_pid" > "$(pid_file "$id")" + return 0 + fi + + # Truly not running — clean up stale PID file + [[ "$pid" -gt 0 ]] && rm -f "$(pid_file "$id")" + return 1 +} + +# Count how many instances of an agent are running. +count_instances() { + local id="$1" + local pids; pids="$(find_agent_pids "$id")" + if [[ -z "$pids" ]]; then + echo 0 + else + echo "$pids" | wc -l + fi } agent_status() { diff --git a/dev-scripts/ps.sh b/dev-scripts/ps.sh index af3423a..82f932f 100755 --- a/dev-scripts/ps.sh +++ b/dev-scripts/ps.sh @@ -26,6 +26,7 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do fi pid="$(read_pid "$id")" + instance_count="$(count_instances "$id")" ((found++)) || true # Uptime: calcular desde el inicio del proceso @@ -78,6 +79,12 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do printf "%-22s ${GRN}%-8s${RST} %-12s %-10s %-8s %s\n" \ "$id" "$pid" "$uptime" "$mem" "${cpu_pct}%" "$log_size" + # Warn about duplicate instances + if [[ "$instance_count" -gt 1 ]]; then + printf " ${RED}⚠ WARNING: %d instances running!${RST} PIDs: %s\n" \ + "$instance_count" "$(find_agent_pids "$id" | tr '\n' ' ')" + fi + done < <(list_agents_raw) if [[ "$found" -eq 0 ]]; then diff --git a/dev-scripts/server.sh b/dev-scripts/server.sh index bbb6c41..9fde507 100755 --- a/dev-scripts/server.sh +++ b/dev-scripts/server.sh @@ -55,11 +55,14 @@ case "$CMD" in killed=0 for id in "${agents[@]}"; do - pid="$(read_pid "$id")" - if [[ "$pid" -gt 0 ]] && kill -0 "$pid" 2>/dev/null; then - kill -9 "$pid" 2>/dev/null || true + all_pids="$(find_agent_pids "$id")" + if [[ -n "$all_pids" ]]; then + cnt="$(echo "$all_pids" | wc -l)" + for p in $all_pids; do + kill -9 "$p" 2>/dev/null || true + done rm -f "$(pid_file "$id")" - ok "$id killed (PID $pid)" + ok "$id killed ($cnt instance(s), PIDs: $(echo $all_pids | tr '\n' ' '))" ((killed++)) || true else dim " $id (no estaba corriendo)" diff --git a/dev-scripts/start.sh b/dev-scripts/start.sh index f8cac5b..0d7d7b9 100755 --- a/dev-scripts/start.sh +++ b/dev-scripts/start.sh @@ -14,11 +14,30 @@ start_agent() { local id="$1" cfg="$2" local log; log="$(log_file "$id")" local pid_f; pid_f="$(pid_file "$id")" + local bin="$REPO_ROOT/bin/launcher" + + # Check for duplicate instances already running + local existing; existing="$(count_instances "$id")" + if [[ "$existing" -gt 0 ]]; then + warn "$id already has $existing instance(s) running (orphan processes?)" + warn " Run ./dev-scripts/stop.sh $id first to clean up" + return 1 + fi info "Iniciando $id..." - # Lanza el launcher en background, desacoplado del terminal - nohup "$GO" run -tags goolm ./cmd/launcher -c "$cfg" --log-level "${LOG_LEVEL:-info}" \ + # Build the binary first to avoid go run wrapper PID issues + if [[ ! -x "$bin" ]] || [[ "$(find ./cmd/launcher -newer "$bin" 2>/dev/null | head -1)" ]]; then + info "Compilando launcher..." + mkdir -p "$(dirname "$bin")" + "$GO" build -tags goolm -o "$bin" ./cmd/launcher || { + fail "$id error de compilación — revisa el código" + return 1 + } + fi + + # Launch the compiled binary directly (no go run wrapper) + nohup "$bin" -c "$cfg" --log-level "${LOG_LEVEL:-info}" \ >> "$log" 2>&1 & local pid=$! diff --git a/dev-scripts/stop.sh b/dev-scripts/stop.sh index 3325e1e..36907de 100755 --- a/dev-scripts/stop.sh +++ b/dev-scripts/stop.sh @@ -18,23 +18,37 @@ while IFS='|' read -r id _version _enabled _desc _cfg; do continue fi - local_pid="$(read_pid "$id")" - kill -TERM "$local_pid" 2>/dev/null || true + # Kill ALL instances, not just the one in the PID file + all_pids="$(find_agent_pids "$id")" + instance_count="$(echo "$all_pids" | grep -c . 2>/dev/null || echo 0)" - # Espera hasta 5s a que muera limpiamente + if [[ "$instance_count" -gt 1 ]]; then + warn "$id has $instance_count instances running — stopping all" + fi + + # Send SIGTERM to all instances + for p in $all_pids; do + kill -TERM "$p" 2>/dev/null || true + done + + # Wait up to 5s for graceful shutdown for _ in {1..10}; do - kill -0 "$local_pid" 2>/dev/null || break + remaining="$(find_agent_pids "$id")" + [[ -z "$remaining" ]] && break sleep 0.5 done - # SIGKILL si todavía sigue vivo - if kill -0 "$local_pid" 2>/dev/null; then + # SIGKILL any survivors + survivors="$(find_agent_pids "$id")" + if [[ -n "$survivors" ]]; then warn "$id no respondió a SIGTERM, enviando SIGKILL..." - kill -9 "$local_pid" 2>/dev/null || true + for p in $survivors; do + kill -9 "$p" 2>/dev/null || true + done fi rm -f "$(pid_file "$id")" - ok "$id detenido (PID $local_pid)" + ok "$id detenido ($instance_count instance(s) stopped)" ((stopped++)) || true done < <(list_agents_raw) diff --git a/pkg/tui/messages.go b/pkg/tui/messages.go index 9539122..0156cd5 100644 --- a/pkg/tui/messages.go +++ b/pkg/tui/messages.go @@ -16,5 +16,13 @@ type MsgActionDone struct { // MsgLogsLoaded carries log lines for the selected agent. type MsgLogsLoaded struct{ Lines []string } +// MsgServerActionDone reports the result of a server-wide bulk action. +type MsgServerActionDone struct { + Action string + Total int + Failed int + Errors []string +} + // MsgTick triggers a periodic refresh. type MsgTick struct{} diff --git a/pkg/tui/model.go b/pkg/tui/model.go index 7b2f91c..cc083e4 100644 --- a/pkg/tui/model.go +++ b/pkg/tui/model.go @@ -10,6 +10,7 @@ const ( ScreenAgentList // list all agents with status ScreenAgentActions // actions for a selected agent ScreenLogs // tail log output + ScreenServer // server-wide process management ) // Model is the complete TUI state — pure data. @@ -34,10 +35,11 @@ type AgentView struct { Enabled bool Running bool PID int - Uptime string // formatted: "2h 15m" - Memory string // formatted: "42 MB" - CPU string // formatted: "1.2%" - LogSize string // formatted: "350 KB" + Instances int // number of running instances (>1 means duplicates) + Uptime string // formatted: "2h 15m" + Memory string // formatted: "42 MB" + CPU string // formatted: "1.2%" + LogSize string // formatted: "350 KB" } // MenuOption represents a selectable menu item. @@ -50,10 +52,21 @@ type MenuOption struct { func MainMenuOptions() []MenuOption { return []MenuOption{ {Label: "Agents", Desc: "Gestionar agentes"}, + {Label: "Server", Desc: "Gestionar servidor"}, {Label: "Quit", Desc: "Salir"}, } } +// ServerMenuOptions returns the available server-wide actions. +func ServerMenuOptions() []MenuOption { + return []MenuOption{ + {Label: "Start All", Desc: "Iniciar todos los agentes habilitados"}, + {Label: "Stop All", Desc: "Detener todos los agentes"}, + {Label: "Restart All", Desc: "Reiniciar todos los agentes"}, + {Label: "Kill All", Desc: "SIGKILL forzado a todos"}, + } +} + // AgentActionOptions returns the available actions based on agent state. func AgentActionOptions(running bool) []MenuOption { if running { diff --git a/pkg/tui/update.go b/pkg/tui/update.go index 36fef68..39d60e3 100644 --- a/pkg/tui/update.go +++ b/pkg/tui/update.go @@ -14,6 +14,12 @@ const ( IntentLoadLogs IntentKind = "load_logs" IntentTick IntentKind = "tick" IntentQuit IntentKind = "quit" + + // Server-wide bulk operations + IntentStartAll IntentKind = "start_all" + IntentStopAll IntentKind = "stop_all" + IntentRestartAll IntentKind = "restart_all" + IntentKillAll IntentKind = "kill_all" ) // Intent is pure data describing a side effect to execute. @@ -45,9 +51,11 @@ func Update(model Model, msg interface{}) (Model, []Intent) { case MsgAgentsLoaded: model.Agents = m.Agents - // Clamp cursor - if model.Cursor >= len(model.Agents) && len(model.Agents) > 0 { - model.Cursor = len(model.Agents) - 1 + // Clamp cursor only on screens that use the agent list + if model.Screen == ScreenAgentList { + if model.Cursor >= len(model.Agents) && len(model.Agents) > 0 { + model.Cursor = len(model.Agents) - 1 + } } return model, []Intent{{Kind: IntentTick}} @@ -59,6 +67,14 @@ func Update(model Model, msg interface{}) (Model, []Intent) { } return model, []Intent{{Kind: IntentLoadAgents}} + case MsgServerActionDone: + if m.Failed == 0 { + model.StatusMsg = fmt.Sprintf("%s: %d agents OK", m.Action, m.Total) + } else { + model.StatusMsg = fmt.Sprintf("%s: %d/%d failed", m.Action, m.Failed, m.Total) + } + return model, []Intent{{Kind: IntentLoadAgents}} + case MsgLogsLoaded: model.LogLines = m.Lines model.LogScroll = max(0, len(m.Lines)-visibleLogLines(model)) @@ -92,6 +108,8 @@ func updateKey(model Model, key KeyMsg) (Model, []Intent) { return updateAgentActions(model, key) case ScreenLogs: return updateLogs(model, key) + case ScreenServer: + return updateServerScreen(model, key) } return model, nil } @@ -109,6 +127,11 @@ func updateMainScreen(model Model, key KeyMsg) (Model, []Intent) { model.Screen = ScreenAgentList model.Cursor = 0 return model, []Intent{{Kind: IntentLoadAgents}} + case "Server": + model.Screen = ScreenServer + model.Cursor = 0 + model.StatusMsg = "" + return model, []Intent{{Kind: IntentLoadAgents}} case "Quit": return model, []Intent{{Kind: IntentQuit}} } @@ -210,6 +233,44 @@ func updateLogs(model Model, key KeyMsg) (Model, []Intent) { return model, nil } +func updateServerScreen(model Model, key KeyMsg) (Model, []Intent) { + opts := ServerMenuOptions() + + switch key.Str { + case "0": + model.Screen = ScreenMain + model.Cursor = 0 + model.StatusMsg = "" + case "up", "k": + model.Cursor = clamp(model.Cursor-1, 0, len(opts)-1) + case "down", "j": + model.Cursor = clamp(model.Cursor+1, 0, len(opts)-1) + case "enter": + if model.Cursor < len(opts) { + return executeServerAction(model, opts[model.Cursor].Label) + } + } + return model, nil +} + +func executeServerAction(model Model, action string) (Model, []Intent) { + switch action { + case "Start All": + model.StatusMsg = "Starting all agents..." + return model, []Intent{{Kind: IntentStartAll}} + case "Stop All": + model.StatusMsg = "Stopping all agents..." + return model, []Intent{{Kind: IntentStopAll}} + case "Restart All": + model.StatusMsg = "Restarting all agents..." + return model, []Intent{{Kind: IntentRestartAll}} + case "Kill All": + model.StatusMsg = "Killing all agents..." + return model, []Intent{{Kind: IntentKillAll}} + } + return model, nil +} + // ── pure helpers ───────────────────────────────────────────────────────── func visibleLogLines(m Model) int { diff --git a/pkg/tui/view.go b/pkg/tui/view.go index 55f7696..354c7ab 100644 --- a/pkg/tui/view.go +++ b/pkg/tui/view.go @@ -16,6 +16,8 @@ func View(model Model) string { return viewAgentActions(model) case ScreenLogs: return viewLogs(model) + case ScreenServer: + return viewServer(model) default: return "" } @@ -78,6 +80,10 @@ func viewAgentList(m Model) string { b.WriteString(fmt.Sprintf(" %s%s %-20s %-8s %s\n", cursor, icon, a.ID, a.Version, status)) + + if a.Instances > 1 { + b.WriteString(fmt.Sprintf(" ⚠ WARNING: %d instances running!\n", a.Instances)) + } } if m.StatusMsg != "" { @@ -177,6 +183,54 @@ func viewLogs(m Model) string { return b.String() } +func viewServer(m Model) string { + var b strings.Builder + + b.WriteString("\n Server Management\n") + b.WriteString(" " + strings.Repeat("─", 44) + "\n") + + // Summary + running, stopped, disabled := countStatuses(m.Agents) + total := len(m.Agents) + if total > 0 { + b.WriteString(fmt.Sprintf(" %d agents: %d running, %d stopped, %d disabled\n", total, running, stopped, disabled)) + } else { + b.WriteString(" Loading...\n") + } + + // Agent status list (compact) + if total > 0 { + b.WriteString("\n") + for _, a := range m.Agents { + icon := "○" + if !a.Enabled { + icon = " " + } else if a.Running { + icon = "●" + } + b.WriteString(fmt.Sprintf(" %s %s\n", icon, a.ID)) + } + } + + b.WriteString("\n") + + // Action menu + for i, opt := range ServerMenuOptions() { + cursor := " " + if i == m.Cursor { + cursor = "> " + } + b.WriteString(fmt.Sprintf(" %s%-16s %s\n", cursor, opt.Label, opt.Desc)) + } + + if m.StatusMsg != "" { + b.WriteString("\n " + m.StatusMsg + "\n") + } + + b.WriteString("\n ↑↓ navegar enter ejecutar 0 volver\n") + return b.String() +} + func countStatuses(agents []AgentView) (running, stopped, disabled int) { for _, a := range agents { switch { diff --git a/shell/process/manager.go b/shell/process/manager.go index c69fa0a..8beccdd 100644 --- a/shell/process/manager.go +++ b/shell/process/manager.go @@ -47,11 +47,12 @@ type Manager struct { runDir string agentsGlob string binPath string + envFile string // path to .env file for child processes } // NewManager creates a Manager. binPath can be empty for auto-detection. func NewManager(runDir, agentsGlob, binPath string) *Manager { - return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath} + return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath, envFile: ".env"} } // Scan discovers all agents from config files. @@ -81,8 +82,8 @@ func (m *Manager) Scan() ([]AgentInfo, error) { // Status returns the runtime status for a single agent. func (m *Manager) Status(info AgentInfo) AgentStatus { - pid := m.readPID(info.ID) - running := pid > 0 && m.isAlive(pid) + pid := m.resolveRunningPID(info.ID) + running := pid > 0 return AgentStatus{AgentInfo: info, Running: running, PID: pid} } @@ -101,6 +102,12 @@ func (m *Manager) StatusAll() ([]AgentStatus, error) { // Start launches an agent process in the background. func (m *Manager) Start(info AgentInfo) error { + // Check for orphan instances + if existing := m.findProcessPIDs(info.ID); len(existing) > 0 { + return fmt.Errorf("agent %q already has %d running instance(s) (PIDs: %v) — stop them first", + info.ID, len(existing), existing) + } + if err := os.MkdirAll(m.runDir, 0o755); err != nil { return fmt.Errorf("create run dir: %w", err) } @@ -113,11 +120,12 @@ func (m *Manager) Start(info AgentInfo) error { bin := m.resolvedBin() var cmd *exec.Cmd if strings.HasPrefix(bin, "go run") { - cmd = exec.Command("go", "run", "./cmd/launcher", "-c", info.ConfigPath) + cmd = exec.Command("go", "run", "-tags", "goolm", "./cmd/launcher", "-c", info.ConfigPath) } else { cmd = exec.Command(bin, "-c", info.ConfigPath) } + cmd.Env = m.buildEnv() cmd.Stdout = logFile cmd.Stderr = logFile cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} @@ -135,49 +143,94 @@ func (m *Manager) Start(info AgentInfo) error { return nil } -// Stop sends SIGTERM, waits up to 5s, then SIGKILL if needed. +// Stop sends SIGTERM to all instances, waits up to 5s, then SIGKILL if needed. func (m *Manager) Stop(id string) error { - pid := m.readPID(id) - if pid == 0 || !m.isAlive(pid) { + pids := m.findProcessPIDs(id) + // Also include PID file PID if alive and not already in the list + filePID := m.readPID(id) + if filePID > 0 && m.isAlive(filePID) { + found := false + for _, p := range pids { + if p == filePID { + found = true + break + } + } + if !found { + pids = append(pids, filePID) + } + } + + if len(pids) == 0 { return fmt.Errorf("agent %q is not running", id) } - if err := syscall.Kill(pid, syscall.SIGTERM); err != nil { - return fmt.Errorf("SIGTERM: %w", err) + // SIGTERM all instances + for _, pid := range pids { + _ = syscall.Kill(pid, syscall.SIGTERM) } // Wait up to 5 seconds for graceful shutdown. for i := 0; i < 10; i++ { - if !m.isAlive(pid) { + allDead := true + for _, pid := range pids { + if m.isAlive(pid) { + allDead = false + break + } + } + if allDead { m.removePID(id) return nil } time.Sleep(500 * time.Millisecond) } - // Force kill. - if m.isAlive(pid) { - _ = syscall.Kill(pid, syscall.SIGKILL) + // Force kill survivors. + for _, pid := range pids { + if m.isAlive(pid) { + _ = syscall.Kill(pid, syscall.SIGKILL) + } } m.removePID(id) return nil } -// Kill sends SIGKILL immediately. +// Kill sends SIGKILL to all instances immediately. func (m *Manager) Kill(id string) error { - pid := m.readPID(id) - if pid == 0 || !m.isAlive(pid) { + pids := m.findProcessPIDs(id) + filePID := m.readPID(id) + if filePID > 0 && m.isAlive(filePID) { + found := false + for _, p := range pids { + if p == filePID { + found = true + break + } + } + if !found { + pids = append(pids, filePID) + } + } + + if len(pids) == 0 { return fmt.Errorf("agent %q is not running", id) } - err := syscall.Kill(pid, syscall.SIGKILL) + + var lastErr error + for _, pid := range pids { + if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { + lastErr = err + } + } m.removePID(id) - return err + return lastErr } // Stats gathers resource usage for a running agent from /proc. func (m *Manager) Stats(id string) (ProcessStats, error) { - pid := m.readPID(id) - if pid == 0 || !m.isAlive(pid) { + pid := m.resolveRunningPID(id) + if pid == 0 { return ProcessStats{}, fmt.Errorf("agent %q is not running", id) } @@ -256,8 +309,12 @@ func (m *Manager) LogTail(id string, lines int) ([]string, error) { // IsRunning checks if an agent process is alive. func (m *Manager) IsRunning(id string) bool { - pid := m.readPID(id) - return pid > 0 && m.isAlive(pid) + return m.resolveRunningPID(id) > 0 +} + +// InstanceCount returns how many launcher processes are running for an agent. +func (m *Manager) InstanceCount(id string) int { + return len(m.findProcessPIDs(id)) } // ReadPID returns the PID from the PID file, or 0. @@ -285,6 +342,70 @@ func (m *Manager) readPID(id string) int { return pid } +// findProcessPIDs searches for running launcher processes for a given agent ID +// using pgrep. Returns all matching PIDs. +func (m *Manager) findProcessPIDs(id string) []int { + // First try to find the config path for this agent + configPath := m.configPathFor(id) + if configPath == "" { + return nil + } + pattern := fmt.Sprintf("launcher.*-c.*%s", configPath) + out, err := exec.Command("pgrep", "-f", pattern).Output() + if err != nil { + return nil + } + var pids []int + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { + if p, err := strconv.Atoi(strings.TrimSpace(line)); err == nil && p > 0 { + pids = append(pids, p) + } + } + return pids +} + +// configPathFor returns the config file path for the given agent ID. +func (m *Manager) configPathFor(id string) string { + matches, err := filepath.Glob(m.agentsGlob) + if err != nil { + return "" + } + for _, path := range matches { + cfg, err := config.LoadMeta(path) + if err != nil { + continue + } + if cfg.Agent.ID == id { + return path + } + } + return "" +} + +// resolveRunningPID returns the PID of the running agent, checking the PID file +// first and falling back to process discovery. It also repairs stale PID files. +func (m *Manager) resolveRunningPID(id string) int { + // Check PID file first + pid := m.readPID(id) + if pid > 0 && m.isAlive(pid) { + return pid + } + + // PID file is stale or missing — search for actual processes + pids := m.findProcessPIDs(id) + if len(pids) > 0 { + // Repair the PID file with the first found process + _ = os.WriteFile(m.pidPath(id), []byte(strconv.Itoa(pids[0])), 0o644) + return pids[0] + } + + // Clean up stale PID file + if pid > 0 { + m.removePID(id) + } + return 0 +} + func (m *Manager) isAlive(pid int) bool { return syscall.Kill(pid, 0) == nil } @@ -293,6 +414,33 @@ func (m *Manager) removePID(id string) { _ = os.Remove(m.pidPath(id)) } +// buildEnv returns the environment for child processes: current env + .env file vars. +func (m *Manager) buildEnv() []string { + env := os.Environ() + if m.envFile == "" { + return env + } + data, err := os.ReadFile(m.envFile) + if err != nil { + return env + } + // Parse KEY=VALUE lines, skip comments and blanks. + seen := make(map[string]bool) + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + if idx := strings.Index(line, "="); idx > 0 { + key := line[:idx] + seen[key] = true + env = append(env, line) + } + } + _ = seen // .env values appended last, so they override earlier entries + return env +} + func (m *Manager) resolvedBin() string { if m.binPath != "" { return m.binPath diff --git a/shell/tui/adapter.go b/shell/tui/adapter.go index 4ba0c13..500e481 100644 --- a/shell/tui/adapter.go +++ b/shell/tui/adapter.go @@ -44,6 +44,18 @@ func (a *Adapter) RunIntent(intent puretui.Intent) tea.Cmd { case puretui.IntentLoadLogs: return a.loadLogs(intent.AgentID) + case puretui.IntentStartAll: + return a.startAll() + + case puretui.IntentStopAll: + return a.stopAll() + + case puretui.IntentRestartAll: + return a.restartAll() + + case puretui.IntentKillAll: + return a.killAll() + case puretui.IntentTick: return a.tick() @@ -65,13 +77,14 @@ func (a *Adapter) loadAgents() tea.Cmd { views := make([]puretui.AgentView, len(statuses)) for i, s := range statuses { v := puretui.AgentView{ - ID: s.ID, - Name: s.Name, - Version: s.Version, - Desc: s.Desc, - Enabled: s.Enabled, - Running: s.Running, - PID: s.PID, + ID: s.ID, + Name: s.Name, + Version: s.Version, + Desc: s.Desc, + Enabled: s.Enabled, + Running: s.Running, + PID: s.PID, + Instances: a.mgr.InstanceCount(s.ID), } if s.Running { @@ -147,6 +160,113 @@ func (a *Adapter) restartAgent(id string) tea.Cmd { } } +func (a *Adapter) startAll() tea.Cmd { + return func() tea.Msg { + agents, err := a.mgr.Scan() + if err != nil { + return puretui.MsgServerActionDone{Action: "Start All", Errors: []string{err.Error()}, Failed: 1} + } + var total, failed int + var errs []string + for _, agent := range agents { + if !agent.Enabled { + continue + } + if a.mgr.IsRunning(agent.ID) { + continue + } + total++ + if err := a.mgr.Start(agent); err != nil { + failed++ + errs = append(errs, fmt.Sprintf("%s: %v", agent.ID, err)) + } + } + if total > 0 { + time.Sleep(500 * time.Millisecond) + } + return puretui.MsgServerActionDone{Action: "Start All", Total: total, Failed: failed, Errors: errs} + } +} + +func (a *Adapter) stopAll() tea.Cmd { + return func() tea.Msg { + statuses, err := a.mgr.StatusAll() + if err != nil { + return puretui.MsgServerActionDone{Action: "Stop All", Errors: []string{err.Error()}, Failed: 1} + } + var total, failed int + var errs []string + for _, s := range statuses { + if !s.Running { + continue + } + total++ + if err := a.mgr.Stop(s.ID); err != nil { + failed++ + errs = append(errs, fmt.Sprintf("%s: %v", s.ID, err)) + } + } + return puretui.MsgServerActionDone{Action: "Stop All", Total: total, Failed: failed, Errors: errs} + } +} + +func (a *Adapter) restartAll() tea.Cmd { + return func() tea.Msg { + agents, err := a.mgr.Scan() + if err != nil { + return puretui.MsgServerActionDone{Action: "Restart All", Errors: []string{err.Error()}, Failed: 1} + } + + // Stop all running first + for _, agent := range agents { + if agent.Enabled && a.mgr.IsRunning(agent.ID) { + _ = a.mgr.Stop(agent.ID) + } + } + time.Sleep(300 * time.Millisecond) + + // Start all enabled + var total, failed int + var errs []string + for _, agent := range agents { + if !agent.Enabled { + continue + } + total++ + if err := a.mgr.Start(agent); err != nil { + failed++ + errs = append(errs, fmt.Sprintf("%s: %v", agent.ID, err)) + } + } + if total > 0 { + time.Sleep(500 * time.Millisecond) + } + return puretui.MsgServerActionDone{Action: "Restart All", Total: total, Failed: failed, Errors: errs} + } +} + +func (a *Adapter) killAll() tea.Cmd { + return func() tea.Msg { + statuses, err := a.mgr.StatusAll() + if err != nil { + return puretui.MsgServerActionDone{Action: "Kill All", Errors: []string{err.Error()}, Failed: 1} + } + var total, failed int + var errs []string + for _, s := range statuses { + if !s.Running { + continue + } + total++ + if err := a.mgr.Kill(s.ID); err != nil { + failed++ + errs = append(errs, fmt.Sprintf("%s: %v", s.ID, err)) + } + } + return puretui.MsgServerActionDone{Action: "Kill All", Total: total, Failed: failed, Errors: errs} + } +} + func (a *Adapter) loadLogs(id string) tea.Cmd { return func() tea.Msg { lines, err := a.mgr.LogTail(id, 100)