fix: matar process group completo de claude-code al cancelar

Cuando se cancela una invocación de claude-code, el proceso principal
moría pero sus hijos (subprocesos node, etc.) quedaban huérfanos
consumiendo recursos. Ahora se crea un process group (Setpgid) y se
mata el grupo entero con kill(-pgid, SIGKILL) tanto en Cancel como
después de Run(), asegurando limpieza completa.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 15:46:18 +00:00
parent d95ae173de
commit f193f8d5ea
+21
View File
@@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"strings"
"syscall"
"time"
"github.com/enmanuel/agents/internal/config"
@@ -80,6 +81,20 @@ func NewClaudeCodeComplete(cfg config.ClaudeCodeCfg, log *slog.Logger) coretypes
cmd.Env = filterEnv(os.Environ(), "ANTHROPIC_API_KEY")
cmd.Stdin = strings.NewReader(prompt)
// Create a new process group so we can kill claude + all its children.
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
// Override the default cancel behavior: kill the entire process group
// instead of just the main process, preventing orphaned child processes.
cmd.Cancel = func() error {
if cmd.Process != nil {
pgid := cmd.Process.Pid
log.Info("killing claude-code process group", "pgid", pgid)
// Negative PID = kill entire process group
return syscall.Kill(-pgid, syscall.SIGKILL)
}
return nil
}
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
@@ -88,6 +103,12 @@ func NewClaudeCodeComplete(cfg config.ClaudeCodeCfg, log *slog.Logger) coretypes
err := cmd.Run()
elapsed := time.Since(start)
// Ensure the process group is fully dead after Run returns,
// even if cmd.Run() returned without triggering Cancel (normal exit).
if cmd.Process != nil {
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
log.Debug("claude_code_done",
"elapsed_ms", elapsed.Milliseconds(),
"stdout_len", stdout.Len(),