diff --git a/shell/llm/claudecode.go b/shell/llm/claudecode.go index 8c98b12..f410f5c 100644 --- a/shell/llm/claudecode.go +++ b/shell/llm/claudecode.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "strings" + "syscall" "time" "github.com/enmanuel/agents/internal/config" @@ -80,6 +81,20 @@ func NewClaudeCodeComplete(cfg config.ClaudeCodeCfg, log *slog.Logger) coretypes cmd.Env = filterEnv(os.Environ(), "ANTHROPIC_API_KEY") cmd.Stdin = strings.NewReader(prompt) + // Create a new process group so we can kill claude + all its children. + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + // Override the default cancel behavior: kill the entire process group + // instead of just the main process, preventing orphaned child processes. + cmd.Cancel = func() error { + if cmd.Process != nil { + pgid := cmd.Process.Pid + log.Info("killing claude-code process group", "pgid", pgid) + // Negative PID = kill entire process group + return syscall.Kill(-pgid, syscall.SIGKILL) + } + return nil + } + var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr @@ -88,6 +103,12 @@ func NewClaudeCodeComplete(cfg config.ClaudeCodeCfg, log *slog.Logger) coretypes err := cmd.Run() elapsed := time.Since(start) + // Ensure the process group is fully dead after Run returns, + // even if cmd.Run() returned without triggering Cancel (normal exit). + if cmd.Process != nil { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } + log.Debug("claude_code_done", "elapsed_ms", elapsed.Milliseconds(), "stdout_len", stdout.Len(),