feat: add testing support for crypto initialization and process management, including auto-recovery and filtering of go wrapper processes
This commit is contained in:
+55
-12
@@ -43,17 +43,57 @@ type ProcessStats struct {
|
||||
LogBytes int64
|
||||
}
|
||||
|
||||
// processProber abstracts process detection for testing.
|
||||
type processProber interface {
|
||||
// pgrepPIDs runs pgrep -f with the given pattern and returns matching PIDs.
|
||||
pgrepPIDs(pattern string) []int
|
||||
// processComm returns the comm name for a PID (e.g. "launcher", "go").
|
||||
processComm(pid int) string
|
||||
// isAlive checks if a PID is running.
|
||||
isAlive(pid int) bool
|
||||
}
|
||||
|
||||
// osProber is the real implementation using OS calls.
|
||||
type osProber struct{}
|
||||
|
||||
func (osProber) pgrepPIDs(pattern string) []int {
|
||||
out, err := exec.Command("pgrep", "-f", pattern).Output()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
var pids []int
|
||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||
if p, err := strconv.Atoi(strings.TrimSpace(line)); err == nil && p > 0 {
|
||||
pids = append(pids, p)
|
||||
}
|
||||
}
|
||||
return pids
|
||||
}
|
||||
|
||||
func (osProber) processComm(pid int) string {
|
||||
data, err := os.ReadFile(fmt.Sprintf("/proc/%d/comm", pid))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(string(data))
|
||||
}
|
||||
|
||||
func (osProber) isAlive(pid int) bool {
|
||||
return syscall.Kill(pid, 0) == nil
|
||||
}
|
||||
|
||||
// Manager handles agent process lifecycle.
|
||||
type Manager struct {
|
||||
runDir string
|
||||
agentsGlob string
|
||||
binPath string
|
||||
envFile string // path to .env file for child processes
|
||||
prober processProber
|
||||
}
|
||||
|
||||
// NewManager creates a Manager. binPath can be empty for auto-detection.
|
||||
func NewManager(runDir, agentsGlob, binPath string) *Manager {
|
||||
return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath, envFile: ".env"}
|
||||
return &Manager{runDir: runDir, agentsGlob: agentsGlob, binPath: binPath, envFile: ".env", prober: osProber{}}
|
||||
}
|
||||
|
||||
// Scan discovers all agents from config files.
|
||||
@@ -110,8 +150,11 @@ func (m *Manager) StatusAll() ([]AgentStatus, error) {
|
||||
}
|
||||
|
||||
// Start launches an agent process in the background.
|
||||
// Multiple instances of the same agent are allowed.
|
||||
// Returns an error if the agent is already running.
|
||||
func (m *Manager) Start(info AgentInfo) error {
|
||||
if pids := m.findProcessPIDs(info.ID); len(pids) > 0 {
|
||||
return fmt.Errorf("agent %q is already running (PID %d)", info.ID, pids[0])
|
||||
}
|
||||
if err := os.MkdirAll(m.runDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create run dir: %w", err)
|
||||
}
|
||||
@@ -351,23 +394,23 @@ func (m *Manager) readPID(id string) int {
|
||||
}
|
||||
|
||||
// findProcessPIDs searches for running launcher processes for a given agent ID
|
||||
// using pgrep. Returns all matching PIDs.
|
||||
// using pgrep. Filters out "go run" wrapper PIDs to avoid double-counting.
|
||||
func (m *Manager) findProcessPIDs(id string) []int {
|
||||
// First try to find the config path for this agent
|
||||
configPath := m.configPathFor(id)
|
||||
if configPath == "" {
|
||||
return nil
|
||||
}
|
||||
pattern := fmt.Sprintf("launcher.*-c.*%s", configPath)
|
||||
out, err := exec.Command("pgrep", "-f", pattern).Output()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
raw := m.prober.pgrepPIDs(pattern)
|
||||
|
||||
// Filter out the "go" wrapper process that appears when using "go run".
|
||||
var pids []int
|
||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||
if p, err := strconv.Atoi(strings.TrimSpace(line)); err == nil && p > 0 {
|
||||
pids = append(pids, p)
|
||||
for _, p := range raw {
|
||||
comm := m.prober.processComm(p)
|
||||
if comm == "go" {
|
||||
continue
|
||||
}
|
||||
pids = append(pids, p)
|
||||
}
|
||||
return pids
|
||||
}
|
||||
@@ -415,7 +458,7 @@ func (m *Manager) resolveRunningPID(id string) int {
|
||||
}
|
||||
|
||||
func (m *Manager) isAlive(pid int) bool {
|
||||
return syscall.Kill(pid, 0) == nil
|
||||
return m.prober.isAlive(pid)
|
||||
}
|
||||
|
||||
func (m *Manager) removePID(id string) {
|
||||
|
||||
@@ -0,0 +1,190 @@
|
||||
package process
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// fakeProber is a test double for processProber.
|
||||
type fakeProber struct {
|
||||
pids map[string][]int // pattern → PIDs
|
||||
comms map[int]string // PID → comm name
|
||||
alive map[int]bool // PID → is alive
|
||||
}
|
||||
|
||||
func newFakeProber() *fakeProber {
|
||||
return &fakeProber{
|
||||
pids: make(map[string][]int),
|
||||
comms: make(map[int]string),
|
||||
alive: make(map[int]bool),
|
||||
}
|
||||
}
|
||||
|
||||
func (f *fakeProber) pgrepPIDs(pattern string) []int { return f.pids[pattern] }
|
||||
func (f *fakeProber) processComm(pid int) string { return f.comms[pid] }
|
||||
func (f *fakeProber) isAlive(pid int) bool { return f.alive[pid] }
|
||||
|
||||
// testManager creates a Manager with a temp dir, fake prober, and a config file.
|
||||
func testManager(t *testing.T, fp *fakeProber) (*Manager, string) {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
runDir := filepath.Join(dir, "run")
|
||||
agentsDir := filepath.Join(dir, "agents", "test-bot")
|
||||
_ = os.MkdirAll(runDir, 0o755)
|
||||
_ = os.MkdirAll(agentsDir, 0o755)
|
||||
|
||||
// Minimal config.yaml so Scan() and configPathFor() work.
|
||||
cfgPath := filepath.Join(agentsDir, "config.yaml")
|
||||
_ = os.WriteFile(cfgPath, []byte(`agent:
|
||||
id: test-bot
|
||||
name: Test Bot
|
||||
version: "0.1"
|
||||
enabled: true
|
||||
`), 0o644)
|
||||
|
||||
glob := filepath.Join(dir, "agents", "*", "config.yaml")
|
||||
m := &Manager{
|
||||
runDir: runDir,
|
||||
agentsGlob: glob,
|
||||
binPath: "/bin/true", // won't actually run
|
||||
envFile: "",
|
||||
prober: fp,
|
||||
}
|
||||
return m, cfgPath
|
||||
}
|
||||
|
||||
func TestFindProcessPIDs_FiltersGoWrapper(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
// Simulate pgrep returning 2 PIDs: go wrapper (100) + real launcher (200).
|
||||
pattern := "launcher.*-c.*" + cfgPath
|
||||
fp.pids[pattern] = []int{100, 200}
|
||||
fp.comms[100] = "go"
|
||||
fp.comms[200] = "launcher"
|
||||
|
||||
pids := m.findProcessPIDs("test-bot")
|
||||
|
||||
if len(pids) != 1 {
|
||||
t.Fatalf("expected 1 PID, got %d: %v", len(pids), pids)
|
||||
}
|
||||
if pids[0] != 200 {
|
||||
t.Errorf("expected PID 200, got %d", pids[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindProcessPIDs_NoPIDs(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, _ := testManager(t, fp)
|
||||
|
||||
pids := m.findProcessPIDs("test-bot")
|
||||
if len(pids) != 0 {
|
||||
t.Fatalf("expected 0 PIDs, got %d", len(pids))
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatus_SingleInstance(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
pattern := "launcher.*-c.*" + cfgPath
|
||||
fp.pids[pattern] = []int{42}
|
||||
fp.comms[42] = "launcher"
|
||||
|
||||
info := AgentInfo{ID: "test-bot", Name: "Test", ConfigPath: cfgPath, Enabled: true}
|
||||
st := m.Status(info)
|
||||
|
||||
if !st.Running {
|
||||
t.Error("expected Running=true")
|
||||
}
|
||||
if st.PID != 42 {
|
||||
t.Errorf("expected PID=42, got %d", st.PID)
|
||||
}
|
||||
if st.Instances != 1 {
|
||||
t.Errorf("expected Instances=1, got %d", st.Instances)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatus_NoInstances(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
info := AgentInfo{ID: "test-bot", Name: "Test", ConfigPath: cfgPath, Enabled: true}
|
||||
st := m.Status(info)
|
||||
|
||||
if st.Running {
|
||||
t.Error("expected Running=false")
|
||||
}
|
||||
if st.Instances != 0 {
|
||||
t.Errorf("expected Instances=0, got %d", st.Instances)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStart_RejectsWhenAlreadyRunning(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
pattern := "launcher.*-c.*" + cfgPath
|
||||
fp.pids[pattern] = []int{99}
|
||||
fp.comms[99] = "launcher"
|
||||
|
||||
info := AgentInfo{ID: "test-bot", Name: "Test", ConfigPath: cfgPath, Enabled: true}
|
||||
err := m.Start(info)
|
||||
if err == nil {
|
||||
t.Fatal("expected error when agent already running")
|
||||
}
|
||||
if got := err.Error(); got != `agent "test-bot" is already running (PID 99)` {
|
||||
t.Errorf("unexpected error: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveRunningPID_RepairsStale(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, cfgPath := testManager(t, fp)
|
||||
|
||||
// Write a stale PID file (PID 999 is dead).
|
||||
_ = os.MkdirAll(m.runDir, 0o755)
|
||||
_ = os.WriteFile(m.pidPath("test-bot"), []byte("999"), 0o644)
|
||||
fp.alive[999] = false
|
||||
|
||||
// But the real process is at PID 42.
|
||||
pattern := "launcher.*-c.*" + cfgPath
|
||||
fp.pids[pattern] = []int{42}
|
||||
fp.comms[42] = "launcher"
|
||||
|
||||
pid := m.resolveRunningPID("test-bot")
|
||||
if pid != 42 {
|
||||
t.Errorf("expected repaired PID=42, got %d", pid)
|
||||
}
|
||||
|
||||
// Verify PID file was repaired.
|
||||
data, err := os.ReadFile(m.pidPath("test-bot"))
|
||||
if err != nil {
|
||||
t.Fatalf("read pid file: %v", err)
|
||||
}
|
||||
if got, _ := strconv.Atoi(string(data)); got != 42 {
|
||||
t.Errorf("expected PID file to contain 42, got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveRunningPID_CleansUpStalePIDFile(t *testing.T) {
|
||||
fp := newFakeProber()
|
||||
m, _ := testManager(t, fp)
|
||||
|
||||
// Write a stale PID file, no real process running.
|
||||
_ = os.MkdirAll(m.runDir, 0o755)
|
||||
_ = os.WriteFile(m.pidPath("test-bot"), []byte("999"), 0o644)
|
||||
fp.alive[999] = false
|
||||
|
||||
pid := m.resolveRunningPID("test-bot")
|
||||
if pid != 0 {
|
||||
t.Errorf("expected 0 for dead process, got %d", pid)
|
||||
}
|
||||
|
||||
// PID file should be removed.
|
||||
if _, err := os.Stat(m.pidPath("test-bot")); !os.IsNotExist(err) {
|
||||
t.Error("expected stale PID file to be removed")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user