// Package process manages agent processes: discovery, start, stop, kill, stats. // This is the impure shell layer — all I/O happens here. package process import ( "bufio" "fmt" "os" "os/exec" "path/filepath" "strconv" "strings" "syscall" "time" "github.com/enmanuel/agents/internal/config" ) // AgentInfo holds metadata about an agent parsed from its config. type AgentInfo struct { ID string Name string Version string Desc string ConfigPath string Enabled bool } // AgentStatus combines agent metadata with runtime state. type AgentStatus struct { AgentInfo Running bool PID int } // ProcessStats holds resource usage for a running process. type ProcessStats struct { PID int UptimeSecs int64 MemRSSKB int64 CPUPct float64 LogBytes int64 } // Manager handles agent process lifecycle. type Manager struct { runDir string agentsGlob string binPath string } // NewManager creates a Manager. binPath can be empty for auto-detection. func NewManager(runDir, agentsGlob, binPath string) *Manager { return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath} } // Scan discovers all agents from config files. func (m *Manager) Scan() ([]AgentInfo, error) { matches, err := filepath.Glob(m.agentsGlob) if err != nil { return nil, err } var agents []AgentInfo for _, path := range matches { cfg, err := config.LoadMeta(path) if err != nil { continue } agents = append(agents, AgentInfo{ ID: cfg.Agent.ID, Name: cfg.Agent.Name, Version: cfg.Agent.Version, Desc: cfg.Agent.Description, ConfigPath: path, Enabled: cfg.Agent.Enabled, }) } return agents, nil } // Status returns the runtime status for a single agent. func (m *Manager) Status(info AgentInfo) AgentStatus { pid := m.readPID(info.ID) running := pid > 0 && m.isAlive(pid) return AgentStatus{AgentInfo: info, Running: running, PID: pid} } // StatusAll returns status for every discovered agent. func (m *Manager) StatusAll() ([]AgentStatus, error) { agents, err := m.Scan() if err != nil { return nil, err } statuses := make([]AgentStatus, len(agents)) for i, a := range agents { statuses[i] = m.Status(a) } return statuses, nil } // Start launches an agent process in the background. func (m *Manager) Start(info AgentInfo) error { if err := os.MkdirAll(m.runDir, 0o755); err != nil { return fmt.Errorf("create run dir: %w", err) } logFile, err := os.OpenFile(m.logPath(info.ID), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) if err != nil { return fmt.Errorf("open log: %w", err) } bin := m.resolvedBin() var cmd *exec.Cmd if strings.HasPrefix(bin, "go run") { cmd = exec.Command("go", "run", "./cmd/launcher", "-c", info.ConfigPath) } else { cmd = exec.Command(bin, "-c", info.ConfigPath) } cmd.Stdout = logFile cmd.Stderr = logFile cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} if err := cmd.Start(); err != nil { logFile.Close() return fmt.Errorf("exec: %w", err) } if err := os.WriteFile(m.pidPath(info.ID), []byte(strconv.Itoa(cmd.Process.Pid)), 0o644); err != nil { return fmt.Errorf("write PID: %w", err) } go func() { _ = cmd.Wait() }() return nil } // Stop sends SIGTERM, waits up to 5s, then SIGKILL if needed. func (m *Manager) Stop(id string) error { pid := m.readPID(id) if pid == 0 || !m.isAlive(pid) { return fmt.Errorf("agent %q is not running", id) } if err := syscall.Kill(pid, syscall.SIGTERM); err != nil { return fmt.Errorf("SIGTERM: %w", err) } // Wait up to 5 seconds for graceful shutdown. for i := 0; i < 10; i++ { if !m.isAlive(pid) { m.removePID(id) return nil } time.Sleep(500 * time.Millisecond) } // Force kill. if m.isAlive(pid) { _ = syscall.Kill(pid, syscall.SIGKILL) } m.removePID(id) return nil } // Kill sends SIGKILL immediately. func (m *Manager) Kill(id string) error { pid := m.readPID(id) if pid == 0 || !m.isAlive(pid) { return fmt.Errorf("agent %q is not running", id) } err := syscall.Kill(pid, syscall.SIGKILL) m.removePID(id) return err } // Stats gathers resource usage for a running agent from /proc. func (m *Manager) Stats(id string) (ProcessStats, error) { pid := m.readPID(id) if pid == 0 || !m.isAlive(pid) { return ProcessStats{}, fmt.Errorf("agent %q is not running", id) } s := ProcessStats{PID: pid} // Uptime from /proc//stat if data, err := os.ReadFile(fmt.Sprintf("/proc/%d/stat", pid)); err == nil { fields := strings.Fields(string(data)) if len(fields) > 21 { startTicks, _ := strconv.ParseInt(fields[21], 10, 64) clkTck := int64(100) // sysconf(_SC_CLK_TCK) is 100 on Linux if raw, err := os.ReadFile("/proc/stat"); err == nil { for _, line := range strings.Split(string(raw), "\n") { if strings.HasPrefix(line, "btime ") { btime, _ := strconv.ParseInt(strings.Fields(line)[1], 10, 64) procStart := btime + startTicks/clkTck s.UptimeSecs = time.Now().Unix() - procStart break } } } } } // RSS from /proc//status if data, err := os.ReadFile(fmt.Sprintf("/proc/%d/status", pid)); err == nil { for _, line := range strings.Split(string(data), "\n") { if strings.HasPrefix(line, "VmRSS:") { fields := strings.Fields(line) if len(fields) >= 2 { s.MemRSSKB, _ = strconv.ParseInt(fields[1], 10, 64) } break } } } // CPU% from ps (simpler than calculating from /proc/stat deltas) if out, err := exec.Command("ps", "-p", strconv.Itoa(pid), "-o", "pcpu=").Output(); err == nil { s.CPUPct, _ = strconv.ParseFloat(strings.TrimSpace(string(out)), 64) } // Log file size if info, err := os.Stat(m.logPath(id)); err == nil { s.LogBytes = info.Size() } return s, nil } // LogTail returns the last N lines of an agent's log. func (m *Manager) LogTail(id string, lines int) ([]string, error) { f, err := os.Open(m.logPath(id)) if err != nil { return nil, fmt.Errorf("open log: %w", err) } defer f.Close() // Read all lines and keep last N. For large files a reverse scanner // would be better, but agent logs are typically small. var all []string scanner := bufio.NewScanner(f) scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) for scanner.Scan() { all = append(all, scanner.Text()) } if err := scanner.Err(); err != nil { return nil, err } if len(all) > lines { all = all[len(all)-lines:] } return all, nil } // IsRunning checks if an agent process is alive. func (m *Manager) IsRunning(id string) bool { pid := m.readPID(id) return pid > 0 && m.isAlive(pid) } // ReadPID returns the PID from the PID file, or 0. func (m *Manager) ReadPID(id string) int { return m.readPID(id) } // PidPath returns the path to the PID file for an agent. func (m *Manager) PidPath(id string) string { return m.pidPath(id) } // LogPath returns the path to the log file for an agent. func (m *Manager) LogPath(id string) string { return m.logPath(id) } // ── internal helpers ───────────────────────────────────────────────────── func (m *Manager) pidPath(id string) string { return filepath.Join(m.runDir, id+".pid") } func (m *Manager) logPath(id string) string { return filepath.Join(m.runDir, id+".log") } func (m *Manager) readPID(id string) int { raw, err := os.ReadFile(m.pidPath(id)) if err != nil { return 0 } pid, _ := strconv.Atoi(strings.TrimSpace(string(raw))) return pid } func (m *Manager) isAlive(pid int) bool { return syscall.Kill(pid, 0) == nil } func (m *Manager) removePID(id string) { _ = os.Remove(m.pidPath(id)) } func (m *Manager) resolvedBin() string { if m.binPath != "" { return m.binPath } if _, err := os.Stat("bin/launcher"); err == nil { return "bin/launcher" } return "go run ./cmd/launcher" }