feat: import agents_and_robots platform as unibots (Matrix-out, unibus transport)
Reemplaza el scaffold del echobot por la plataforma completa de bots traida desde ~/DataProyects/Github/agents_and_robots tras la operacion Matrix-out: los bots ya no hablan por Matrix sino por el bus unibus (modelo todo-rooms + E2E via shell/transportunibus sobre github.com/enmanuel/unibus/pkg/client). - go.mod: replace de unibus -> ../unibus y de fn-registry -> ../../../.. (paths relativos reajustados a la nueva ubicacion dentro de fn_registry). - app.md: bump a 0.2.0, descripcion + arquitectura + comandos + gotchas reales. - modulo Go conservado como github.com/enmanuel/agents (sin reescribir imports). agents_and_robots queda archivado como museo de la era Matrix.
This commit is contained in:
@@ -0,0 +1,692 @@
|
||||
// Package process manages agent processes: discovery, start, stop, kill, stats.
|
||||
// This is the impure shell layer — all I/O happens here.
|
||||
package process
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/enmanuel/agents/internal/config"
|
||||
)
|
||||
|
||||
// AgentInfo holds metadata about an agent parsed from its config.
|
||||
type AgentInfo struct {
|
||||
ID string
|
||||
Name string
|
||||
Version string
|
||||
Desc string
|
||||
ConfigPath string
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
// AgentStatus combines agent metadata with runtime state.
|
||||
type AgentStatus struct {
|
||||
AgentInfo
|
||||
Running bool
|
||||
PID int
|
||||
Instances int
|
||||
}
|
||||
|
||||
// ProcessStats holds resource usage for a running process.
|
||||
type ProcessStats struct {
|
||||
PID int
|
||||
UptimeSecs int64
|
||||
MemRSSKB int64
|
||||
CPUPct float64
|
||||
LogBytes int64
|
||||
}
|
||||
|
||||
// processProber abstracts process detection for testing.
|
||||
type processProber interface {
|
||||
// pgrepPIDs runs pgrep -f with the given pattern and returns matching PIDs.
|
||||
pgrepPIDs(pattern string) []int
|
||||
// processComm returns the comm name for a PID (e.g. "launcher", "go").
|
||||
processComm(pid int) string
|
||||
// isAlive checks if a PID is running.
|
||||
isAlive(pid int) bool
|
||||
}
|
||||
|
||||
// osProber is the real implementation using OS calls.
|
||||
type osProber struct{}
|
||||
|
||||
func (osProber) pgrepPIDs(pattern string) []int {
|
||||
out, err := exec.Command("pgrep", "-f", pattern).Output()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
var pids []int
|
||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||
if p, err := strconv.Atoi(strings.TrimSpace(line)); err == nil && p > 0 {
|
||||
pids = append(pids, p)
|
||||
}
|
||||
}
|
||||
return pids
|
||||
}
|
||||
|
||||
func (osProber) processComm(pid int) string {
|
||||
data, err := os.ReadFile(fmt.Sprintf("/proc/%d/comm", pid))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(string(data))
|
||||
}
|
||||
|
||||
func (osProber) isAlive(pid int) bool {
|
||||
return syscall.Kill(pid, 0) == nil
|
||||
}
|
||||
|
||||
const unifiedID = "launcher" // PID/log file ID for the unified launcher
|
||||
|
||||
// Manager handles agent process lifecycle.
|
||||
type Manager struct {
|
||||
runDir string
|
||||
agentsGlob string
|
||||
binPath string
|
||||
envFile string // path to .env file for child processes
|
||||
prober processProber
|
||||
}
|
||||
|
||||
// NewManager creates a Manager. binPath can be empty for auto-detection.
|
||||
func NewManager(runDir, agentsGlob, binPath string) *Manager {
|
||||
return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath, envFile: ".env", prober: osProber{}}
|
||||
}
|
||||
|
||||
// Scan discovers all agents from config files.
|
||||
func (m *Manager) Scan() ([]AgentInfo, error) {
|
||||
matches, err := filepath.Glob(m.agentsGlob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var agents []AgentInfo
|
||||
for _, path := range matches {
|
||||
cfg, err := config.LoadMeta(path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
agents = append(agents, AgentInfo{
|
||||
ID: cfg.Agent.ID,
|
||||
Name: cfg.Agent.Name,
|
||||
Version: cfg.Agent.Version,
|
||||
Desc: cfg.Agent.Description,
|
||||
ConfigPath: path,
|
||||
Enabled: cfg.Agent.Enabled,
|
||||
})
|
||||
}
|
||||
return agents, nil
|
||||
}
|
||||
|
||||
// Status returns the runtime status for a single agent.
|
||||
func (m *Manager) Status(info AgentInfo) AgentStatus {
|
||||
pids := m.findProcessPIDs(info.ID)
|
||||
primary := 0
|
||||
if len(pids) > 0 {
|
||||
primary = pids[0]
|
||||
}
|
||||
return AgentStatus{
|
||||
AgentInfo: info,
|
||||
Running: len(pids) > 0,
|
||||
PID: primary,
|
||||
Instances: len(pids),
|
||||
}
|
||||
}
|
||||
|
||||
// StatusAll returns status for every discovered agent.
|
||||
func (m *Manager) StatusAll() ([]AgentStatus, error) {
|
||||
agents, err := m.Scan()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
statuses := make([]AgentStatus, len(agents))
|
||||
for i, a := range agents {
|
||||
statuses[i] = m.Status(a)
|
||||
}
|
||||
return statuses, nil
|
||||
}
|
||||
|
||||
// Start launches an agent process in the background.
|
||||
// Returns an error if the agent is already running.
|
||||
func (m *Manager) Start(info AgentInfo) error {
|
||||
if pids := m.findProcessPIDs(info.ID); len(pids) > 0 {
|
||||
return fmt.Errorf("agent %q is already running (PID %d)", info.ID, pids[0])
|
||||
}
|
||||
if err := os.MkdirAll(m.runDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create run dir: %w", err)
|
||||
}
|
||||
|
||||
logFile, err := os.OpenFile(m.logPath(info.ID), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open log: %w", err)
|
||||
}
|
||||
|
||||
bin := m.resolvedBin()
|
||||
var cmd *exec.Cmd
|
||||
if strings.HasPrefix(bin, "go run") {
|
||||
cmd = exec.Command("go", "run", "-tags", "goolm", "./cmd/launcher", "-c", info.ConfigPath)
|
||||
} else {
|
||||
cmd = exec.Command(bin, "-c", info.ConfigPath)
|
||||
}
|
||||
|
||||
cmd.Env = m.BuildEnv()
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
logFile.Close()
|
||||
return fmt.Errorf("exec: %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(m.pidPath(info.ID), []byte(strconv.Itoa(cmd.Process.Pid)), 0o644); err != nil {
|
||||
return fmt.Errorf("write PID: %w", err)
|
||||
}
|
||||
|
||||
go func() { _ = cmd.Wait() }()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop sends SIGTERM to all instances, waits up to 5s, then SIGKILL if needed.
|
||||
func (m *Manager) Stop(id string) error {
|
||||
pids := m.findProcessPIDs(id)
|
||||
// Also include PID file PID if alive and not already in the list
|
||||
filePID := m.readPID(id)
|
||||
if filePID > 0 && m.isAlive(filePID) {
|
||||
found := false
|
||||
for _, p := range pids {
|
||||
if p == filePID {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
pids = append(pids, filePID)
|
||||
}
|
||||
}
|
||||
|
||||
if len(pids) == 0 {
|
||||
return fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
|
||||
// SIGTERM all instances
|
||||
for _, pid := range pids {
|
||||
_ = syscall.Kill(pid, syscall.SIGTERM)
|
||||
}
|
||||
|
||||
// Wait up to 5 seconds for graceful shutdown.
|
||||
for i := 0; i < 10; i++ {
|
||||
allDead := true
|
||||
for _, pid := range pids {
|
||||
if m.isAlive(pid) {
|
||||
allDead = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if allDead {
|
||||
m.removePID(id)
|
||||
return nil
|
||||
}
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
|
||||
// Force kill survivors.
|
||||
for _, pid := range pids {
|
||||
if m.isAlive(pid) {
|
||||
_ = syscall.Kill(pid, syscall.SIGKILL)
|
||||
}
|
||||
}
|
||||
m.removePID(id)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Kill sends SIGKILL to all instances immediately.
|
||||
func (m *Manager) Kill(id string) error {
|
||||
pids := m.findProcessPIDs(id)
|
||||
filePID := m.readPID(id)
|
||||
if filePID > 0 && m.isAlive(filePID) {
|
||||
found := false
|
||||
for _, p := range pids {
|
||||
if p == filePID {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
pids = append(pids, filePID)
|
||||
}
|
||||
}
|
||||
|
||||
if len(pids) == 0 {
|
||||
return fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
for _, pid := range pids {
|
||||
if err := syscall.Kill(pid, syscall.SIGKILL); err != nil {
|
||||
lastErr = err
|
||||
}
|
||||
}
|
||||
m.removePID(id)
|
||||
return lastErr
|
||||
}
|
||||
|
||||
// Stats gathers resource usage for a running agent from /proc.
|
||||
func (m *Manager) Stats(id string) (ProcessStats, error) {
|
||||
pid := m.resolveRunningPID(id)
|
||||
if pid == 0 {
|
||||
return ProcessStats{}, fmt.Errorf("agent %q is not running", id)
|
||||
}
|
||||
return m.statsForPID(pid, id), nil
|
||||
}
|
||||
|
||||
// statsForPID gathers resource usage for a specific PID.
|
||||
func (m *Manager) statsForPID(pid int, id string) ProcessStats {
|
||||
s := ProcessStats{PID: pid}
|
||||
|
||||
// Uptime from /proc/<pid>/stat
|
||||
if data, err := os.ReadFile(fmt.Sprintf("/proc/%d/stat", pid)); err == nil {
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) > 21 {
|
||||
startTicks, _ := strconv.ParseInt(fields[21], 10, 64)
|
||||
clkTck := int64(100) // sysconf(_SC_CLK_TCK) is 100 on Linux
|
||||
if raw, err := os.ReadFile("/proc/stat"); err == nil {
|
||||
for _, line := range strings.Split(string(raw), "\n") {
|
||||
if strings.HasPrefix(line, "btime ") {
|
||||
btime, _ := strconv.ParseInt(strings.Fields(line)[1], 10, 64)
|
||||
procStart := btime + startTicks/clkTck
|
||||
s.UptimeSecs = time.Now().Unix() - procStart
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RSS from /proc/<pid>/status
|
||||
if data, err := os.ReadFile(fmt.Sprintf("/proc/%d/status", pid)); err == nil {
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
if strings.HasPrefix(line, "VmRSS:") {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) >= 2 {
|
||||
s.MemRSSKB, _ = strconv.ParseInt(fields[1], 10, 64)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CPU% from ps (simpler than calculating from /proc/stat deltas)
|
||||
if out, err := exec.Command("ps", "-p", strconv.Itoa(pid), "-o", "pcpu=").Output(); err == nil {
|
||||
s.CPUPct, _ = strconv.ParseFloat(strings.TrimSpace(string(out)), 64)
|
||||
}
|
||||
|
||||
// Log file size
|
||||
if info, err := os.Stat(m.logPath(id)); err == nil {
|
||||
s.LogBytes = info.Size()
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// LogTail returns the last N lines of an agent's log.
|
||||
func (m *Manager) LogTail(id string, lines int) ([]string, error) {
|
||||
f, err := os.Open(m.logPath(id))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open log: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Read all lines and keep last N. For large files a reverse scanner
|
||||
// would be better, but agent logs are typically small.
|
||||
var all []string
|
||||
scanner := bufio.NewScanner(f)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||
for scanner.Scan() {
|
||||
all = append(all, scanner.Text())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(all) > lines {
|
||||
all = all[len(all)-lines:]
|
||||
}
|
||||
return all, nil
|
||||
}
|
||||
|
||||
// IsRunning checks if an agent process is alive.
|
||||
func (m *Manager) IsRunning(id string) bool {
|
||||
return m.resolveRunningPID(id) > 0
|
||||
}
|
||||
|
||||
// InstanceCount returns how many launcher processes are running for an agent.
|
||||
func (m *Manager) InstanceCount(id string) int {
|
||||
return len(m.findProcessPIDs(id))
|
||||
}
|
||||
|
||||
// ReadPID returns the PID from the PID file, or 0.
|
||||
func (m *Manager) ReadPID(id string) int {
|
||||
return m.readPID(id)
|
||||
}
|
||||
|
||||
// PidPath returns the path to the PID file for an agent.
|
||||
func (m *Manager) PidPath(id string) string { return m.pidPath(id) }
|
||||
|
||||
// LogPath returns the path to the log file for an agent.
|
||||
func (m *Manager) LogPath(id string) string { return m.logPath(id) }
|
||||
|
||||
// Build compiles all project binaries by running build.sh.
|
||||
// Returns the combined output and any error.
|
||||
func (m *Manager) Build() (string, error) {
|
||||
cmd := exec.Command("bash", "build.sh")
|
||||
cmd.Env = m.BuildEnv()
|
||||
out, err := cmd.CombinedOutput()
|
||||
return string(out), err
|
||||
}
|
||||
|
||||
// ── Unified launcher ─────────────────────────────────────────────────────
|
||||
// The unified launcher runs ALL enabled agents + orchestrator in a single
|
||||
// process. PID → run/launcher.pid, log → run/launcher.log.
|
||||
|
||||
// StartUnified launches the unified launcher (no -c flag → discovers all agents).
|
||||
func (m *Manager) StartUnified() error {
|
||||
if m.IsUnifiedRunning() {
|
||||
return fmt.Errorf("unified launcher is already running (PID %d)", m.readPID(unifiedID))
|
||||
}
|
||||
if err := os.MkdirAll(m.runDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create run dir: %w", err)
|
||||
}
|
||||
|
||||
logFile, err := os.OpenFile(m.logPath(unifiedID), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open log: %w", err)
|
||||
}
|
||||
|
||||
bin := m.resolvedBin()
|
||||
var cmd *exec.Cmd
|
||||
if strings.HasPrefix(bin, "go run") {
|
||||
cmd = exec.Command("go", "run", "-tags", "goolm", "./cmd/launcher", "--log-level", "info")
|
||||
} else {
|
||||
cmd = exec.Command(bin, "--log-level", "info")
|
||||
}
|
||||
|
||||
cmd.Env = m.BuildEnv()
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
logFile.Close()
|
||||
return fmt.Errorf("exec: %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(m.pidPath(unifiedID), []byte(strconv.Itoa(cmd.Process.Pid)), 0o644); err != nil {
|
||||
return fmt.Errorf("write PID: %w", err)
|
||||
}
|
||||
|
||||
go func() { _ = cmd.Wait() }()
|
||||
return nil
|
||||
}
|
||||
|
||||
// StopUnified stops the unified launcher process.
|
||||
func (m *Manager) StopUnified() error {
|
||||
return m.Stop(unifiedID)
|
||||
}
|
||||
|
||||
// KillUnified sends SIGKILL to the unified launcher.
|
||||
func (m *Manager) KillUnified() error {
|
||||
return m.Kill(unifiedID)
|
||||
}
|
||||
|
||||
// IsUnifiedRunning checks if the unified launcher is alive.
|
||||
func (m *Manager) IsUnifiedRunning() bool {
|
||||
pid := m.readPID(unifiedID)
|
||||
if pid > 0 && m.isAlive(pid) {
|
||||
return true
|
||||
}
|
||||
// Fallback: search for launcher running without -c flag
|
||||
pids := m.findUnifiedPIDs()
|
||||
return len(pids) > 0
|
||||
}
|
||||
|
||||
// UnifiedPID returns the PID of the running unified launcher, or 0.
|
||||
func (m *Manager) UnifiedPID() int {
|
||||
pid := m.readPID(unifiedID)
|
||||
if pid > 0 && m.isAlive(pid) {
|
||||
return pid
|
||||
}
|
||||
pids := m.findUnifiedPIDs()
|
||||
if len(pids) > 0 {
|
||||
// Repair PID file
|
||||
_ = os.WriteFile(m.pidPath(unifiedID), []byte(strconv.Itoa(pids[0])), 0o644)
|
||||
return pids[0]
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// UnifiedStats returns resource usage for the unified launcher process.
|
||||
func (m *Manager) UnifiedStats() (ProcessStats, error) {
|
||||
pid := m.UnifiedPID()
|
||||
if pid == 0 {
|
||||
return ProcessStats{}, fmt.Errorf("unified launcher is not running")
|
||||
}
|
||||
return m.statsForPID(pid, unifiedID), nil
|
||||
}
|
||||
|
||||
// UnifiedLogTail returns the last N lines of the unified launcher log.
|
||||
func (m *Manager) UnifiedLogTail(lines int) ([]string, error) {
|
||||
return m.LogTail(unifiedID, lines)
|
||||
}
|
||||
|
||||
// StatusAllUnified returns status for all agents, deriving "running" from
|
||||
// whether the unified launcher is running + the agent is enabled.
|
||||
func (m *Manager) StatusAllUnified() ([]AgentStatus, error) {
|
||||
agents, err := m.Scan()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
launcherRunning := m.IsUnifiedRunning()
|
||||
launcherPID := m.UnifiedPID()
|
||||
|
||||
statuses := make([]AgentStatus, len(agents))
|
||||
for i, a := range agents {
|
||||
running := launcherRunning && a.Enabled
|
||||
pid := 0
|
||||
instances := 0
|
||||
if running {
|
||||
pid = launcherPID
|
||||
instances = 1
|
||||
}
|
||||
statuses[i] = AgentStatus{
|
||||
AgentInfo: a,
|
||||
Running: running,
|
||||
PID: pid,
|
||||
Instances: instances,
|
||||
}
|
||||
}
|
||||
return statuses, nil
|
||||
}
|
||||
|
||||
// ToggleEnabled sets the enabled field in an agent's config.yaml.
|
||||
func (m *Manager) ToggleEnabled(id string, enabled bool) error {
|
||||
agents, err := m.Scan()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, a := range agents {
|
||||
if a.ID == id {
|
||||
return m.setEnabledInConfig(a.ConfigPath, enabled)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("agent %q not found", id)
|
||||
}
|
||||
|
||||
// setEnabledInConfig rewrites the enabled field in a config.yaml.
|
||||
func (m *Manager) setEnabledInConfig(path string, enabled bool) error {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
val := "false"
|
||||
if enabled {
|
||||
val = "true"
|
||||
}
|
||||
|
||||
lines := strings.Split(string(data), "\n")
|
||||
for i, line := range lines {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if strings.HasPrefix(trimmed, "enabled:") {
|
||||
// Preserve indentation
|
||||
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
|
||||
lines[i] = indent + "enabled: " + val
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return os.WriteFile(path, []byte(strings.Join(lines, "\n")), 0o644)
|
||||
}
|
||||
|
||||
// findUnifiedPIDs finds launcher processes running without -c flag.
|
||||
func (m *Manager) findUnifiedPIDs() []int {
|
||||
// Search for launcher processes that do NOT have -c flag
|
||||
raw := m.prober.pgrepPIDs("launcher.*--log-level")
|
||||
var pids []int
|
||||
for _, p := range raw {
|
||||
comm := m.prober.processComm(p)
|
||||
if comm == "go" {
|
||||
continue
|
||||
}
|
||||
pids = append(pids, p)
|
||||
}
|
||||
return pids
|
||||
}
|
||||
|
||||
// ── internal helpers ─────────────────────────────────────────────────────
|
||||
|
||||
func (m *Manager) pidPath(id string) string { return filepath.Join(m.runDir, id+".pid") }
|
||||
func (m *Manager) logPath(id string) string { return filepath.Join(m.runDir, id+".log") }
|
||||
|
||||
func (m *Manager) readPID(id string) int {
|
||||
raw, err := os.ReadFile(m.pidPath(id))
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
pid, _ := strconv.Atoi(strings.TrimSpace(string(raw)))
|
||||
return pid
|
||||
}
|
||||
|
||||
// findProcessPIDs searches for running launcher processes for a given agent ID
|
||||
// using pgrep. Filters out "go run" wrapper PIDs to avoid double-counting.
|
||||
func (m *Manager) findProcessPIDs(id string) []int {
|
||||
configPath := m.configPathFor(id)
|
||||
if configPath == "" {
|
||||
return nil
|
||||
}
|
||||
pattern := fmt.Sprintf("launcher.*-c.*%s", configPath)
|
||||
raw := m.prober.pgrepPIDs(pattern)
|
||||
|
||||
// Filter out the "go" wrapper process that appears when using "go run".
|
||||
var pids []int
|
||||
for _, p := range raw {
|
||||
comm := m.prober.processComm(p)
|
||||
if comm == "go" {
|
||||
continue
|
||||
}
|
||||
pids = append(pids, p)
|
||||
}
|
||||
return pids
|
||||
}
|
||||
|
||||
// configPathFor returns the config file path for the given agent ID.
|
||||
func (m *Manager) configPathFor(id string) string {
|
||||
matches, err := filepath.Glob(m.agentsGlob)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
for _, path := range matches {
|
||||
cfg, err := config.LoadMeta(path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if cfg.Agent.ID == id {
|
||||
return path
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveRunningPID returns the PID of the running agent, checking the PID file
|
||||
// first and falling back to process discovery. It also repairs stale PID files.
|
||||
func (m *Manager) resolveRunningPID(id string) int {
|
||||
// Check PID file first
|
||||
pid := m.readPID(id)
|
||||
if pid > 0 && m.isAlive(pid) {
|
||||
return pid
|
||||
}
|
||||
|
||||
// PID file is stale or missing — search for actual processes
|
||||
pids := m.findProcessPIDs(id)
|
||||
if len(pids) > 0 {
|
||||
// Repair the PID file with the first found process
|
||||
_ = os.WriteFile(m.pidPath(id), []byte(strconv.Itoa(pids[0])), 0o644)
|
||||
return pids[0]
|
||||
}
|
||||
|
||||
// Clean up stale PID file
|
||||
if pid > 0 {
|
||||
m.removePID(id)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *Manager) isAlive(pid int) bool {
|
||||
return m.prober.isAlive(pid)
|
||||
}
|
||||
|
||||
func (m *Manager) removePID(id string) {
|
||||
_ = os.Remove(m.pidPath(id))
|
||||
}
|
||||
|
||||
// BuildEnv returns the environment for child processes: current env + .env file vars.
|
||||
func (m *Manager) BuildEnv() []string {
|
||||
env := os.Environ()
|
||||
if m.envFile == "" {
|
||||
return env
|
||||
}
|
||||
data, err := os.ReadFile(m.envFile)
|
||||
if err != nil {
|
||||
return env
|
||||
}
|
||||
// Parse KEY=VALUE lines, skip comments and blanks.
|
||||
seen := make(map[string]bool)
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
if idx := strings.Index(line, "="); idx > 0 {
|
||||
key := line[:idx]
|
||||
seen[key] = true
|
||||
env = append(env, line)
|
||||
}
|
||||
}
|
||||
_ = seen // .env values appended last, so they override earlier entries
|
||||
return env
|
||||
}
|
||||
|
||||
func (m *Manager) resolvedBin() string {
|
||||
if m.binPath != "" {
|
||||
return m.binPath
|
||||
}
|
||||
if _, err := os.Stat("bin/launcher"); err == nil {
|
||||
return "bin/launcher"
|
||||
}
|
||||
return "go run ./cmd/launcher"
|
||||
}
|
||||
@@ -0,0 +1,190 @@
|
||||
package process
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// fakeProber is a test double for processProber.
|
||||
type fakeProber struct {
|
||||
pids map[string][]int // pattern → PIDs
|
||||
comms map[int]string // PID → comm name
|
||||
alive map[int]bool // PID → is alive
|
||||
}
|
||||
|
||||
func newFakeProber() *fakeProber {
|
||||
return &fakeProber{
|
||||
pids: make(map[string][]int),
|
||||
comms: make(map[int]string),
|
||||
alive: make(map[int]bool),
|
||||
}
|
||||
}
|
||||
|
||||
func (f *fakeProber) pgrepPIDs(pattern string) []int { return f.pids[pattern] }
|
||||
func (f *fakeProber) processComm(pid int) string { return f.comms[pid] }
|
||||
func (f *fakeProber) isAlive(pid int) bool { return f.alive[pid] }
|
||||
|
||||
// testManager creates a Manager with a temp dir, fake prober, and a config file.
|
||||
func testManager(t *testing.T, fp *fakeProber) (*Manager, string) {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
runDir := filepath.Join(dir, "run")
|
||||
agentsDir := filepath.Join(dir, "agents", "test-bot")
|
||||
_ = os.MkdirAll(runDir, 0o755)
|
||||
_ = os.MkdirAll(agentsDir, 0o755)
|
||||
|
||||
// Minimal config.yaml so Scan() and configPathFor() work.
|
||||
cfgPath := filepath.Join(agentsDir, "config.yaml")
|
||||
_ = os.WriteFile(cfgPath, []byte(`agent:
|
||||
id: test-bot
|
||||
name: Test Bot
|
||||
version: "0.1"
|
||||
enabled: true
|
||||
`), 0o644)
|
||||
|
||||
glob := filepath.Join(dir, "agents", "*", "config.yaml")
|
||||
m := &Manager{
|
||||
runDir: runDir,
|
||||
agentsGlob: glob,
|
||||
binPath: "/bin/true", // won't actually run
|
||||
envFile: "",
|
||||
prober: fp,
|
||||
}
|
||||
return m, cfgPath
|
||||
}
|
||||
|
||||
func TestFindProcessPIDs_FiltersGoWrapper(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
// Simulate pgrep returning 2 PIDs: go wrapper (100) + real launcher (200).
|
||||
pattern := "launcher.*-c.*" + cfgPath
|
||||
fp.pids[pattern] = []int{100, 200}
|
||||
fp.comms[100] = "go"
|
||||
fp.comms[200] = "launcher"
|
||||
|
||||
pids := m.findProcessPIDs("test-bot")
|
||||
|
||||
if len(pids) != 1 {
|
||||
t.Fatalf("expected 1 PID, got %d: %v", len(pids), pids)
|
||||
}
|
||||
if pids[0] != 200 {
|
||||
t.Errorf("expected PID 200, got %d", pids[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindProcessPIDs_NoPIDs(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, _ := testManager(t, fp)
|
||||
|
||||
pids := m.findProcessPIDs("test-bot")
|
||||
if len(pids) != 0 {
|
||||
t.Fatalf("expected 0 PIDs, got %d", len(pids))
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatus_SingleInstance(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
pattern := "launcher.*-c.*" + cfgPath
|
||||
fp.pids[pattern] = []int{42}
|
||||
fp.comms[42] = "launcher"
|
||||
|
||||
info := AgentInfo{ID: "test-bot", Name: "Test", ConfigPath: cfgPath, Enabled: true}
|
||||
st := m.Status(info)
|
||||
|
||||
if !st.Running {
|
||||
t.Error("expected Running=true")
|
||||
}
|
||||
if st.PID != 42 {
|
||||
t.Errorf("expected PID=42, got %d", st.PID)
|
||||
}
|
||||
if st.Instances != 1 {
|
||||
t.Errorf("expected Instances=1, got %d", st.Instances)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatus_NoInstances(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
info := AgentInfo{ID: "test-bot", Name: "Test", ConfigPath: cfgPath, Enabled: true}
|
||||
st := m.Status(info)
|
||||
|
||||
if st.Running {
|
||||
t.Error("expected Running=false")
|
||||
}
|
||||
if st.Instances != 0 {
|
||||
t.Errorf("expected Instances=0, got %d", st.Instances)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStart_RejectsWhenAlreadyRunning(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
pattern := "launcher.*-c.*" + cfgPath
|
||||
fp.pids[pattern] = []int{99}
|
||||
fp.comms[99] = "launcher"
|
||||
|
||||
info := AgentInfo{ID: "test-bot", Name: "Test", ConfigPath: cfgPath, Enabled: true}
|
||||
err := m.Start(info)
|
||||
if err == nil {
|
||||
t.Fatal("expected error when agent already running")
|
||||
}
|
||||
if got := err.Error(); got != `agent "test-bot" is already running (PID 99)` {
|
||||
t.Errorf("unexpected error: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveRunningPID_RepairsStale(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
// Write a stale PID file (PID 999 is dead).
|
||||
_ = os.MkdirAll(m.runDir, 0o755)
|
||||
_ = os.WriteFile(m.pidPath("test-bot"), []byte("999"), 0o644)
|
||||
fp.alive[999] = false
|
||||
|
||||
// But the real process is at PID 42.
|
||||
pattern := "launcher.*-c.*" + cfgPath
|
||||
fp.pids[pattern] = []int{42}
|
||||
fp.comms[42] = "launcher"
|
||||
|
||||
pid := m.resolveRunningPID("test-bot")
|
||||
if pid != 42 {
|
||||
t.Errorf("expected repaired PID=42, got %d", pid)
|
||||
}
|
||||
|
||||
// Verify PID file was repaired.
|
||||
data, err := os.ReadFile(m.pidPath("test-bot"))
|
||||
if err != nil {
|
||||
t.Fatalf("read pid file: %v", err)
|
||||
}
|
||||
if got, _ := strconv.Atoi(string(data)); got != 42 {
|
||||
t.Errorf("expected PID file to contain 42, got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveRunningPID_CleansUpStalePIDFile(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, _ := testManager(t, fp)
|
||||
|
||||
// Write a stale PID file, no real process running.
|
||||
_ = os.MkdirAll(m.runDir, 0o755)
|
||||
_ = os.WriteFile(m.pidPath("test-bot"), []byte("999"), 0o644)
|
||||
fp.alive[999] = false
|
||||
|
||||
pid := m.resolveRunningPID("test-bot")
|
||||
if pid != 0 {
|
||||
t.Errorf("expected 0 for dead process, got %d", pid)
|
||||
}
|
||||
|
||||
// PID file should be removed.
|
||||
if _, err := os.Stat(m.pidPath("test-bot")); !os.IsNotExist(err) {
|
||||
t.Error("expected stale PID file to be removed")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user