refactor: extract process management to shell/process package
Extrae toda la lógica de gestión de procesos (scan, start, stop, kill, stats, log tail) de cmd/agentctl/main.go a shell/process/manager.go como paquete reutilizable siguiendo el patrón impure shell del proyecto. agentctl queda como thin wrapper sobre process.Manager — misma funcionalidad, mismo comportamiento. El nuevo paquete será compartido por el TUI dashboard. Añade funcionalidad nueva al Manager: Stats() para métricas de /proc, LogTail() para últimas N líneas, Stop() con espera graceful + SIGKILL fallback. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,304 @@
|
||||
// Package process manages agent processes: discovery, start, stop, kill, stats.
|
||||
// This is the impure shell layer — all I/O happens here.
|
||||
package process
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/enmanuel/agents/internal/config"
|
||||
)
|
||||
|
||||
// AgentInfo holds metadata about an agent parsed from its config.
|
||||
type AgentInfo struct {
|
||||
ID string
|
||||
Name string
|
||||
Version string
|
||||
Desc string
|
||||
ConfigPath string
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
// AgentStatus combines agent metadata with runtime state.
|
||||
type AgentStatus struct {
|
||||
AgentInfo
|
||||
Running bool
|
||||
PID int
|
||||
}
|
||||
|
||||
// ProcessStats holds resource usage for a running process.
|
||||
type ProcessStats struct {
|
||||
PID int
|
||||
UptimeSecs int64
|
||||
MemRSSKB int64
|
||||
CPUPct float64
|
||||
LogBytes int64
|
||||
}
|
||||
|
||||
// Manager handles agent process lifecycle.
|
||||
type Manager struct {
|
||||
runDir string
|
||||
agentsGlob string
|
||||
binPath string
|
||||
}
|
||||
|
||||
// NewManager creates a Manager. binPath can be empty for auto-detection.
|
||||
func NewManager(runDir, agentsGlob, binPath string) *Manager {
|
||||
return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath}
|
||||
}
|
||||
|
||||
// Scan discovers all agents from config files.
|
||||
func (m *Manager) Scan() ([]AgentInfo, error) {
|
||||
matches, err := filepath.Glob(m.agentsGlob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var agents []AgentInfo
|
||||
for _, path := range matches {
|
||||
cfg, err := config.LoadMeta(path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
agents = append(agents, AgentInfo{
|
||||
ID: cfg.Agent.ID,
|
||||
Name: cfg.Agent.Name,
|
||||
Version: cfg.Agent.Version,
|
||||
Desc: cfg.Agent.Description,
|
||||
ConfigPath: path,
|
||||
Enabled: cfg.Agent.Enabled,
|
||||
})
|
||||
}
|
||||
return agents, nil
|
||||
}
|
||||
|
||||
// Status returns the runtime status for a single agent.
|
||||
func (m *Manager) Status(info AgentInfo) AgentStatus {
|
||||
pid := m.readPID(info.ID)
|
||||
running := pid > 0 && m.isAlive(pid)
|
||||
return AgentStatus{AgentInfo: info, Running: running, PID: pid}
|
||||
}
|
||||
|
||||
// StatusAll returns status for every discovered agent.
|
||||
func (m *Manager) StatusAll() ([]AgentStatus, error) {
|
||||
agents, err := m.Scan()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
statuses := make([]AgentStatus, len(agents))
|
||||
for i, a := range agents {
|
||||
statuses[i] = m.Status(a)
|
||||
}
|
||||
return statuses, nil
|
||||
}
|
||||
|
||||
// Start launches an agent process in the background.
|
||||
func (m *Manager) Start(info AgentInfo) error {
|
||||
if err := os.MkdirAll(m.runDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create run dir: %w", err)
|
||||
}
|
||||
|
||||
logFile, err := os.OpenFile(m.logPath(info.ID), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open log: %w", err)
|
||||
}
|
||||
|
||||
bin := m.resolvedBin()
|
||||
var cmd *exec.Cmd
|
||||
if strings.HasPrefix(bin, "go run") {
|
||||
cmd = exec.Command("go", "run", "./cmd/launcher", "-c", info.ConfigPath)
|
||||
} else {
|
||||
cmd = exec.Command(bin, "-c", info.ConfigPath)
|
||||
}
|
||||
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
logFile.Close()
|
||||
return fmt.Errorf("exec: %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(m.pidPath(info.ID), []byte(strconv.Itoa(cmd.Process.Pid)), 0o644); err != nil {
|
||||
return fmt.Errorf("write PID: %w", err)
|
||||
}
|
||||
|
||||
go func() { _ = cmd.Wait() }()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop sends SIGTERM, waits up to 5s, then SIGKILL if needed.
|
||||
func (m *Manager) Stop(id string) error {
|
||||
pid := m.readPID(id)
|
||||
if pid == 0 || !m.isAlive(pid) {
|
||||
return fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
|
||||
if err := syscall.Kill(pid, syscall.SIGTERM); err != nil {
|
||||
return fmt.Errorf("SIGTERM: %w", err)
|
||||
}
|
||||
|
||||
// Wait up to 5 seconds for graceful shutdown.
|
||||
for i := 0; i < 10; i++ {
|
||||
if !m.isAlive(pid) {
|
||||
m.removePID(id)
|
||||
return nil
|
||||
}
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
|
||||
// Force kill.
|
||||
if m.isAlive(pid) {
|
||||
_ = syscall.Kill(pid, syscall.SIGKILL)
|
||||
}
|
||||
m.removePID(id)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Kill sends SIGKILL immediately.
|
||||
func (m *Manager) Kill(id string) error {
|
||||
pid := m.readPID(id)
|
||||
if pid == 0 || !m.isAlive(pid) {
|
||||
return fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
err := syscall.Kill(pid, syscall.SIGKILL)
|
||||
m.removePID(id)
|
||||
return err
|
||||
}
|
||||
|
||||
// Stats gathers resource usage for a running agent from /proc.
|
||||
func (m *Manager) Stats(id string) (ProcessStats, error) {
|
||||
pid := m.readPID(id)
|
||||
if pid == 0 || !m.isAlive(pid) {
|
||||
return ProcessStats{}, fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
|
||||
s := ProcessStats{PID: pid}
|
||||
|
||||
// Uptime from /proc/<pid>/stat
|
||||
if data, err := os.ReadFile(fmt.Sprintf("/proc/%d/stat", pid)); err == nil {
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) > 21 {
|
||||
startTicks, _ := strconv.ParseInt(fields[21], 10, 64)
|
||||
clkTck := int64(100) // sysconf(_SC_CLK_TCK) is 100 on Linux
|
||||
if raw, err := os.ReadFile("/proc/stat"); err == nil {
|
||||
for _, line := range strings.Split(string(raw), "\n") {
|
||||
if strings.HasPrefix(line, "btime ") {
|
||||
btime, _ := strconv.ParseInt(strings.Fields(line)[1], 10, 64)
|
||||
procStart := btime + startTicks/clkTck
|
||||
s.UptimeSecs = time.Now().Unix() - procStart
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RSS from /proc/<pid>/status
|
||||
if data, err := os.ReadFile(fmt.Sprintf("/proc/%d/status", pid)); err == nil {
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
if strings.HasPrefix(line, "VmRSS:") {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) >= 2 {
|
||||
s.MemRSSKB, _ = strconv.ParseInt(fields[1], 10, 64)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CPU% from ps (simpler than calculating from /proc/stat deltas)
|
||||
if out, err := exec.Command("ps", "-p", strconv.Itoa(pid), "-o", "pcpu=").Output(); err == nil {
|
||||
s.CPUPct, _ = strconv.ParseFloat(strings.TrimSpace(string(out)), 64)
|
||||
}
|
||||
|
||||
// Log file size
|
||||
if info, err := os.Stat(m.logPath(id)); err == nil {
|
||||
s.LogBytes = info.Size()
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// LogTail returns the last N lines of an agent's log.
|
||||
func (m *Manager) LogTail(id string, lines int) ([]string, error) {
|
||||
f, err := os.Open(m.logPath(id))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open log: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Read all lines and keep last N. For large files a reverse scanner
|
||||
// would be better, but agent logs are typically small.
|
||||
var all []string
|
||||
scanner := bufio.NewScanner(f)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||
for scanner.Scan() {
|
||||
all = append(all, scanner.Text())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(all) > lines {
|
||||
all = all[len(all)-lines:]
|
||||
}
|
||||
return all, nil
|
||||
}
|
||||
|
||||
// IsRunning checks if an agent process is alive.
|
||||
func (m *Manager) IsRunning(id string) bool {
|
||||
pid := m.readPID(id)
|
||||
return pid > 0 && m.isAlive(pid)
|
||||
}
|
||||
|
||||
// ReadPID returns the PID from the PID file, or 0.
|
||||
func (m *Manager) ReadPID(id string) int {
|
||||
return m.readPID(id)
|
||||
}
|
||||
|
||||
// PidPath returns the path to the PID file for an agent.
|
||||
func (m *Manager) PidPath(id string) string { return m.pidPath(id) }
|
||||
|
||||
// LogPath returns the path to the log file for an agent.
|
||||
func (m *Manager) LogPath(id string) string { return m.logPath(id) }
|
||||
|
||||
// ── internal helpers ─────────────────────────────────────────────────────
|
||||
|
||||
func (m *Manager) pidPath(id string) string { return filepath.Join(m.runDir, id+".pid") }
|
||||
func (m *Manager) logPath(id string) string { return filepath.Join(m.runDir, id+".log") }
|
||||
|
||||
func (m *Manager) readPID(id string) int {
|
||||
raw, err := os.ReadFile(m.pidPath(id))
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
pid, _ := strconv.Atoi(strings.TrimSpace(string(raw)))
|
||||
return pid
|
||||
}
|
||||
|
||||
func (m *Manager) isAlive(pid int) bool {
|
||||
return syscall.Kill(pid, 0) == nil
|
||||
}
|
||||
|
||||
func (m *Manager) removePID(id string) {
|
||||
_ = os.Remove(m.pidPath(id))
|
||||
}
|
||||
|
||||
func (m *Manager) resolvedBin() string {
|
||||
if m.binPath != "" {
|
||||
return m.binPath
|
||||
}
|
||||
if _, err := os.Stat("bin/launcher"); err == nil {
|
||||
return "bin/launcher"
|
||||
}
|
||||
return "go run ./cmd/launcher"
|
||||
}
|
||||
Reference in New Issue
Block a user