refactor: extract process management to shell/process package

Extrae toda la lógica de gestión de procesos (scan, start, stop, kill, stats,
log tail) de cmd/agentctl/main.go a shell/process/manager.go como paquete
reutilizable siguiendo el patrón impure shell del proyecto.

agentctl queda como thin wrapper sobre process.Manager — misma funcionalidad,
mismo comportamiento. El nuevo paquete será compartido por el TUI dashboard.

Añade funcionalidad nueva al Manager: Stats() para métricas de /proc,
LogTail() para últimas N líneas, Stop() con espera graceful + SIGKILL fallback.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-04 19:38:19 +00:00
parent 791cea7db0
commit 00dac8b77f
2 changed files with 373 additions and 187 deletions
+69 -187
View File
@@ -13,19 +13,15 @@ package main
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"github.com/spf13/cobra"
"github.com/enmanuel/agents/internal/config"
"github.com/enmanuel/agents/shell/process"
)
const (
runDir = "run" // PID + log files
runDir = "run"
agentsGlob = "agents/*/config.yaml"
)
@@ -34,6 +30,8 @@ const (
func main() {
var binPath string
mgr := process.NewManager(runDir, agentsGlob, "")
root := &cobra.Command{
Use: "agentctl",
Short: "Manage Matrix agents",
@@ -46,10 +44,10 @@ func main() {
"Launcher binary path. Defaults to ./bin/launcher, falls back to 'go run ./cmd/launcher'")
root.AddCommand(
listCmd(),
startCmd(&binPath),
stopCmd(),
removeCmd(),
listCmd(mgr),
startCmd(mgr, &binPath),
stopCmd(mgr),
removeCmd(mgr),
avatarCmd(),
displaynameCmd(),
)
@@ -61,29 +59,29 @@ func main() {
// ── list ──────────────────────────────────────────────────────────────────
func listCmd() *cobra.Command {
func listCmd(mgr *process.Manager) *cobra.Command {
return &cobra.Command{
Use: "list",
Short: "List all agents and their current status",
Aliases: []string{"ls"},
RunE: func(cmd *cobra.Command, args []string) error {
agents, err := scanAgents()
statuses, err := mgr.StatusAll()
if err != nil {
return err
}
if len(agents) == 0 {
if len(statuses) == 0 {
fmt.Println("No agents found under agents/*/config.yaml")
return nil
}
fmt.Printf("%-20s %-12s %-8s %s\n", "ID", "STATUS", "VERSION", "DESCRIPTION")
fmt.Println(strings.Repeat("─", 72))
for _, a := range agents {
for _, s := range statuses {
fmt.Printf("%-20s %-12s %-8s %s\n",
a.ID,
statusLabel(a),
a.Version,
truncate(a.Desc, 36),
s.ID,
statusLabel(s),
s.Version,
truncate(s.Desc, 36),
)
}
return nil
@@ -93,41 +91,39 @@ func listCmd() *cobra.Command {
// ── start ─────────────────────────────────────────────────────────────────
func startCmd(binPath *string) *cobra.Command {
func startCmd(mgr *process.Manager, binPath *string) *cobra.Command {
return &cobra.Command{
Use: "start [agent-id...]",
Short: "Start one or all enabled agents",
RunE: func(cmd *cobra.Command, args []string) error {
agents, err := scanAgents()
statuses, err := mgr.StatusAll()
if err != nil {
return err
}
targets := filterTargets(agents, args)
targets := filterTargets(statuses, args)
if len(targets) == 0 {
return fmt.Errorf("no matching agents found")
}
bin := resolvedBin(*binPath)
started := 0
for _, a := range targets {
if !a.Enabled {
fmt.Printf("skip %-20s (disabled in config)\n", a.ID)
for _, s := range targets {
if !s.Enabled {
fmt.Printf("skip %-20s (disabled in config)\n", s.ID)
continue
}
if isRunning(a.ID) {
fmt.Printf("skip %-20s (already running, PID %d)\n", a.ID, readPID(a.ID))
if s.Running {
fmt.Printf("skip %-20s (already running, PID %d)\n", s.ID, s.PID)
continue
}
if err := startAgent(a, bin); err != nil {
fmt.Fprintf(os.Stderr, "fail %-20s %v\n", a.ID, err)
if err := mgr.Start(s.AgentInfo); err != nil {
fmt.Fprintf(os.Stderr, "fail %-20s %v\n", s.ID, err)
continue
}
fmt.Printf("start %-20s PID %d log → %s\n",
a.ID, readPID(a.ID), logPath(a.ID))
s.ID, mgr.ReadPID(s.ID), mgr.LogPath(s.ID))
started++
}
@@ -141,34 +137,33 @@ func startCmd(binPath *string) *cobra.Command {
// ── stop ──────────────────────────────────────────────────────────────────
func stopCmd() *cobra.Command {
func stopCmd(mgr *process.Manager) *cobra.Command {
return &cobra.Command{
Use: "stop [agent-id...]",
Short: "Stop one or all running agents",
RunE: func(cmd *cobra.Command, args []string) error {
agents, err := scanAgents()
statuses, err := mgr.StatusAll()
if err != nil {
return err
}
targets := filterTargets(agents, args)
targets := filterTargets(statuses, args)
if len(targets) == 0 {
return fmt.Errorf("no matching agents found")
}
stopped := 0
for _, a := range targets {
pid := readPID(a.ID)
if pid == 0 || !isRunning(a.ID) {
fmt.Printf("skip %-20s (not running)\n", a.ID)
for _, s := range targets {
if !s.Running {
fmt.Printf("skip %-20s (not running)\n", s.ID)
continue
}
if err := syscall.Kill(pid, syscall.SIGTERM); err != nil {
fmt.Fprintf(os.Stderr, "fail %-20s kill: %v\n", a.ID, err)
pid := s.PID
if err := mgr.Stop(s.ID); err != nil {
fmt.Fprintf(os.Stderr, "fail %-20s %v\n", s.ID, err)
continue
}
removePIDFile(a.ID)
fmt.Printf("stop %-20s sent SIGTERM to PID %d\n", a.ID, pid)
fmt.Printf("stop %-20s stopped PID %d\n", s.ID, pid)
stopped++
}
@@ -182,7 +177,7 @@ func stopCmd() *cobra.Command {
// ── remove ────────────────────────────────────────────────────────────────
func removeCmd() *cobra.Command {
func removeCmd(mgr *process.Manager) *cobra.Command {
return &cobra.Command{
Use: "remove <agent-id>",
Short: "Disable an agent (sets enabled: false). Does not delete data.",
@@ -190,15 +185,15 @@ func removeCmd() *cobra.Command {
RunE: func(cmd *cobra.Command, args []string) error {
id := args[0]
agents, err := scanAgents()
statuses, err := mgr.StatusAll()
if err != nil {
return err
}
var target *agentInfo
for i := range agents {
if agents[i].ID == id {
target = &agents[i]
var target *process.AgentStatus
for i := range statuses {
if statuses[i].ID == id {
target = &statuses[i]
break
}
}
@@ -206,15 +201,14 @@ func removeCmd() *cobra.Command {
return fmt.Errorf("agent %q not found", id)
}
// Stop if running
if isRunning(id) {
pid := readPID(id)
_ = syscall.Kill(pid, syscall.SIGTERM)
removePIDFile(id)
fmt.Printf("stop %-20s sent SIGTERM to PID %d\n", id, pid)
if target.Running {
if err := mgr.Stop(id); err != nil {
fmt.Fprintf(os.Stderr, "warn stop failed: %v\n", err)
} else {
fmt.Printf("stop %-20s stopped PID %d\n", id, target.PID)
}
}
// Disable in config (preserves comments)
if err := setEnabled(target.ConfigPath, false); err != nil {
return fmt.Errorf("update config: %w", err)
}
@@ -226,124 +220,44 @@ func removeCmd() *cobra.Command {
}
}
// ── agent scanning ────────────────────────────────────────────────────────
// ── helpers ───────────────────────────────────────────────────────────────
type agentInfo struct {
ID string
Version string
Enabled bool
Desc string
ConfigPath string
}
func scanAgents() ([]agentInfo, error) {
matches, err := filepath.Glob(agentsGlob)
if err != nil {
return nil, err
}
var agents []agentInfo
for _, path := range matches {
// Use LoadMeta so list works even when env vars aren't set.
cfg, err := config.LoadMeta(path)
if err != nil {
fmt.Fprintf(os.Stderr, "warn skipping %s: %v\n", path, err)
continue
}
agents = append(agents, agentInfo{
ID: cfg.Agent.ID,
Version: cfg.Agent.Version,
Enabled: cfg.Agent.Enabled,
Desc: cfg.Agent.Description,
ConfigPath: path,
})
}
return agents, nil
}
func filterTargets(agents []agentInfo, ids []string) []agentInfo {
func filterTargets(statuses []process.AgentStatus, ids []string) []process.AgentStatus {
if len(ids) == 0 {
return agents // no filter → all
return statuses
}
set := make(map[string]bool, len(ids))
for _, id := range ids {
set[id] = true
}
var out []agentInfo
for _, a := range agents {
if set[a.ID] {
out = append(out, a)
var out []process.AgentStatus
for _, s := range statuses {
if set[s.ID] {
out = append(out, s)
}
}
return out
}
// ── process management ────────────────────────────────────────────────────
func startAgent(a agentInfo, bin string) error {
logFile, err := os.OpenFile(logPath(a.ID), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
if err != nil {
return fmt.Errorf("open log: %w", err)
func statusLabel(s process.AgentStatus) string {
switch {
case !s.Enabled:
return "disabled"
case s.Running:
return "● running"
default:
return "○ stopped"
}
var cmd *exec.Cmd
if strings.HasPrefix(bin, "go run") {
// dev mode: go run ./cmd/launcher -c <config>
cmd = exec.Command("go", "run", "./cmd/launcher", "-c", a.ConfigPath)
} else {
cmd = exec.Command(bin, "-c", a.ConfigPath)
}
cmd.Stdout = logFile
cmd.Stderr = logFile
// Detach from the parent process group so it keeps running after agentctl exits
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
if err := cmd.Start(); err != nil {
logFile.Close()
return fmt.Errorf("exec: %w", err)
}
// Write PID file — the subprocess owns its lifecycle now
if err := os.WriteFile(pidPath(a.ID), []byte(strconv.Itoa(cmd.Process.Pid)), 0o644); err != nil {
return fmt.Errorf("write PID: %w", err)
}
// Detach: don't wait for the process
go func() { _ = cmd.Wait() }()
return nil
}
func isRunning(id string) bool {
pid := readPID(id)
if pid == 0 {
return false
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
err := syscall.Kill(pid, 0) // signal 0 checks existence without killing
return err == nil
return s[:max-1] + "…"
}
func readPID(id string) int {
raw, err := os.ReadFile(pidPath(id))
if err != nil {
return 0
}
pid, _ := strconv.Atoi(strings.TrimSpace(string(raw)))
return pid
}
func removePIDFile(id string) {
_ = os.Remove(pidPath(id))
}
func pidPath(id string) string { return filepath.Join(runDir, id+".pid") }
func logPath(id string) string { return filepath.Join(runDir, id+".log") }
// ── config editing ────────────────────────────────────────────────────────
// setEnabled flips `enabled: true/false` in the agent section of the YAML.
// Uses text replacement to preserve all comments.
func setEnabled(configPath string, enabled bool) error {
raw, err := os.ReadFile(configPath)
if err != nil {
@@ -359,40 +273,8 @@ func setEnabled(configPath string, enabled bool) error {
updated := strings.Replace(string(raw), current, replacement, 1)
if updated == string(raw) {
return nil // already in the desired state
return nil
}
return os.WriteFile(configPath, []byte(updated), 0o644)
}
// ── display helpers ───────────────────────────────────────────────────────
func statusLabel(a agentInfo) string {
switch {
case !a.Enabled:
return "disabled"
case isRunning(a.ID):
return "● running"
default:
return "○ stopped"
}
}
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max-1] + "…"
}
// resolvedBin returns the launcher binary path to use.
// Priority: --bin flag > ./bin/launcher (if exists) > go run fallback.
func resolvedBin(flagVal string) string {
if flagVal != "" {
return flagVal
}
if _, err := os.Stat("bin/launcher"); err == nil {
return "bin/launcher"
}
return "go run ./cmd/launcher"
}