From 355bcac6c765e5b3942ec56485d8a33239ca281f Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Mon, 18 May 2026 18:24:08 +0200 Subject: [PATCH] feat(infra): agent_launch_worktree + agent_cleanup_worktree Go fns Two Go functions in functions/infra/ for orchestrating headless Claude agents inside isolated git worktrees: - AgentLaunchWorktree(cfg): creates worktree off master, spawns claude -p in background, redirects stdout/stderr to LogPath. Falls back to echo stub when claude binary missing (CI/test friendly). ResetIfExists support for re-runs. - AgentCleanupWorktree(repo, branch, path, pid): SIGTERM with 1s grace then SIGKILL, git worktree remove --force, git branch -D. Best-effort: only errors when all three steps fail (idempotent cleanup-twice). Promotes inline bash from .claude/skills/parallel-fix-issues/ and fn-orquestador to first-class registry functions. Closes issue 0115. Capability group: agents. --- functions/infra/agent_cleanup_worktree.go | 59 +++++++++++ functions/infra/agent_cleanup_worktree.md | 59 +++++++++++ functions/infra/agent_launch_worktree.go | 121 ++++++++++++++++++++++ functions/infra/agent_launch_worktree.md | 62 +++++++++++ 4 files changed, 301 insertions(+) create mode 100644 functions/infra/agent_cleanup_worktree.go create mode 100644 functions/infra/agent_cleanup_worktree.md create mode 100644 functions/infra/agent_launch_worktree.go create mode 100644 functions/infra/agent_launch_worktree.md diff --git a/functions/infra/agent_cleanup_worktree.go b/functions/infra/agent_cleanup_worktree.go new file mode 100644 index 00000000..0caa15d2 --- /dev/null +++ b/functions/infra/agent_cleanup_worktree.go @@ -0,0 +1,59 @@ +package infra + +import ( + "fmt" + "os/exec" + "strings" + "syscall" + "time" +) + +// AgentCleanupWorktree tears down a worktree previously created by +// AgentLaunchWorktree: kills the claude PID (SIGTERM, then SIGKILL after 1s), +// removes the git worktree (force) and deletes the branch. +// +// All three steps are best-effort; we only return an error when ALL three +// fail, so callers can call this on partially-initialised runs safely. +// +// Impure: signals processes, runs git, touches the filesystem. +func AgentCleanupWorktree(repoRoot, branch, worktreePath string, pid int) error { + var killErr, wtErr, brErr error + + // 1. Kill the process tree if a PID was provided. + if pid > 0 { + if err := syscall.Kill(pid, syscall.SIGTERM); err != nil { + killErr = err + } else { + // Give it ~1s to exit gracefully, then SIGKILL if still alive. + time.Sleep(1 * time.Second) + if alive := syscall.Kill(pid, 0); alive == nil { + if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { + killErr = err + } + } + } + } + + // 2. Remove worktree (force). + if worktreePath != "" { + out, err := exec.Command("git", "-C", repoRoot, "worktree", "remove", "--force", worktreePath).CombinedOutput() + if err != nil { + wtErr = fmt.Errorf("worktree remove: %v: %s", err, strings.TrimSpace(string(out))) + } + } + + // 3. Delete the branch. + if branch != "" { + out, err := exec.Command("git", "-C", repoRoot, "branch", "-D", branch).CombinedOutput() + if err != nil { + brErr = fmt.Errorf("branch -D: %v: %s", err, strings.TrimSpace(string(out))) + } + } + + // Only error out if every requested step failed. Individual failures are + // expected (e.g. cleanup called twice, dangling branch already gone). + if killErr != nil && wtErr != nil && brErr != nil { + return fmt.Errorf("cleanup failed: kill=%v; worktree=%v; branch=%v", killErr, wtErr, brErr) + } + return nil +} diff --git a/functions/infra/agent_cleanup_worktree.md b/functions/infra/agent_cleanup_worktree.md new file mode 100644 index 00000000..8c614475 --- /dev/null +++ b/functions/infra/agent_cleanup_worktree.md @@ -0,0 +1,59 @@ +--- +name: agent_cleanup_worktree +kind: function +lang: go +domain: infra +version: "1.0.0" +purity: impure +signature: "func AgentCleanupWorktree(repoRoot, branch, worktreePath string, pid int) error" +description: "Tear-down de un worktree creado por agent_launch_worktree_go_infra: manda SIGTERM al PID (espera 1s, luego SIGKILL si sigue vivo), corre `git worktree remove --force` y `git branch -D` (best-effort cada uno). Devuelve error SOLO si los tres pasos fallan — fallos individuales son esperados (cleanup doble, rama ya borrada, etc.). PID=0 desactiva el kill (util cuando el proceso ya murio o nunca arranco). Linux/Darwin: usa syscall.Kill. Windows: la funcion compila pero el kill nunca hace nada porque syscall.Kill no existe alli — documentar como skip." +tags: [agents, worktree, cleanup, git, kill] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: ["fmt", "os/exec", "strings", "syscall", "time"] +params: + - name: repoRoot + desc: "path absoluto al repo principal (el que tiene el worktree registrado)." + - name: branch + desc: "nombre de la rama a borrar (ej. auto/0115-foo). Vacio = skip." + - name: worktreePath + desc: "path absoluto al worktree a eliminar. Vacio = skip." + - name: pid + desc: "PID de claude o 0 para saltarse el kill (proceso ya muerto / nunca arranco)." +output: "error nil cuando al menos uno de los tres pasos (kill, worktree remove, branch -D) tuvo exito o se salto. error no-nil solo si los tres fallaron — incluye los tres mensajes para diagnostico." +tested: true +tests: + - "removes worktree dir and branch after launch" + - "tolerates missing worktree/branch (cleanup called twice)" +test_file_path: "functions/infra/agent_cleanup_worktree_test.go" +file_path: "functions/infra/agent_cleanup_worktree.go" +--- + +## Ejemplo + +```go +err := infra.AgentCleanupWorktree( + "/home/lucas/fn_registry", + "auto/0115-worktree-launcher-fn", + "/home/lucas/fn_registry/worktrees/0115-worktree-launcher-fn", + 12345, // PID devuelto por AgentLaunchWorktree +) +if err != nil { + log.Printf("cleanup partial failure: %v", err) +} +``` + +## Cuando usarla + +Tras terminar (o abortar) un run lanzado con `agent_launch_worktree_go_infra`. Tambien util en defers de tests para garantizar limpieza: `defer infra.AgentCleanupWorktree(repo, branch, wt, res.PID)`. Si el run sigue corriendo y solo quieres parar el proceso sin tocar git, llama tu mismo a `syscall.Kill(pid, syscall.SIGTERM)` — esta funcion hace mas que eso. + +## Gotchas + +- **Best-effort por diseño**: cleanup doble no es error. Es deliberado para que `agent_runner_api` pueda llamarla en abort handlers sin meter el sistema en bucle. +- **SIGTERM grace 1s**: si claude tarda mas de 1s en cerrar limpiamente, se mata con SIGKILL — los buffers del log pueden quedar parcialmente escritos. Si necesitas mas grace, fork la funcion. +- **Windows**: `syscall.Kill` no existe en Windows. El codigo compila pero salta el kill silenciosamente. Para Windows real, swap `syscall.Kill` por `os.Process.Kill()` (requiere abrir el proceso primero con `os.FindProcess`). +- **Branch en HEAD del repo principal**: si la rama a borrar es la checked-out branch del repo principal, `git branch -D` falla — pero como worktree elimino ya su HEAD, en la practica nunca pasa con ramas `auto/*`. +- **Worktree con cambios sin commitear**: `--force` los descarta. Si necesitas preservar trabajo, commitea y push antes de llamar. diff --git a/functions/infra/agent_launch_worktree.go b/functions/infra/agent_launch_worktree.go new file mode 100644 index 00000000..a4da464e --- /dev/null +++ b/functions/infra/agent_launch_worktree.go @@ -0,0 +1,121 @@ +package infra + +import ( + "fmt" + "os" + "os/exec" + "strings" + "time" +) + +// WorktreeLaunchConfig configures a headless Claude agent run inside a fresh +// git worktree. All paths must be absolute. The function spawns claude in the +// background and returns immediately with the PID; the caller is responsible +// for AgentCleanupWorktree when the run finishes (or aborts). +type WorktreeLaunchConfig struct { + RepoRoot string // absolute path to the main repo (git -C ) + Branch string // e.g. "auto/0115-foo" — created from master + WorktreePath string // absolute path where worktree gets added + Prompt string // text passed to claude -p + LogPath string // file claude stdout/stderr is redirected to + Env map[string]string // extra env vars merged on top of os.Environ() + SkipPerms bool // adds --dangerously-skip-permissions + ResetIfExists bool // if true, branch + worktree are nuked first +} + +// WorktreeLaunchResult is the return shape of AgentLaunchWorktree. +type WorktreeLaunchResult struct { + PID int // claude process id (0 if Error != "") + Branch string // echoes cfg.Branch + WorktreePath string // echoes cfg.WorktreePath + LogPath string // echoes cfg.LogPath + StartedAt int64 // unix seconds when cmd.Start() returned + Error string // empty on success; populated on any failure +} + +// AgentLaunchWorktree creates a fresh git worktree on a new branch off master +// and spawns `claude -p ` headless inside that worktree, redirecting +// stdout+stderr to LogPath. Returns immediately (process keeps running). +// +// If `claude` is not in PATH, falls back to an `echo` stub so tests can run +// without the real binary — the stub still produces a real PID and log file. +// +// Impure: touches the filesystem (worktree), spawns a process, writes a log. +func AgentLaunchWorktree(cfg WorktreeLaunchConfig) WorktreeLaunchResult { + res := WorktreeLaunchResult{ + Branch: cfg.Branch, + WorktreePath: cfg.WorktreePath, + LogPath: cfg.LogPath, + } + + if cfg.RepoRoot == "" || cfg.Branch == "" || cfg.WorktreePath == "" { + res.Error = "RepoRoot, Branch and WorktreePath are required" + return res + } + + // Best-effort cleanup of pre-existing branch/worktree. + if cfg.ResetIfExists { + _ = exec.Command("git", "-C", cfg.RepoRoot, "worktree", "remove", "--force", cfg.WorktreePath).Run() + _ = exec.Command("git", "-C", cfg.RepoRoot, "branch", "-D", cfg.Branch).Run() + // Best-effort dir cleanup (git worktree remove leaves nothing, but + // just in case the dir was created out-of-band). + _ = os.RemoveAll(cfg.WorktreePath) + } + + // Create the new worktree off master. + addCmd := exec.Command("git", "-C", cfg.RepoRoot, "worktree", "add", cfg.WorktreePath, "-b", cfg.Branch, "master") + if out, err := addCmd.CombinedOutput(); err != nil { + res.Error = fmt.Sprintf("git worktree add failed: %v: %s", err, strings.TrimSpace(string(out))) + return res + } + + // Open / truncate log file. + logFile, err := os.OpenFile(cfg.LogPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) + if err != nil { + res.Error = fmt.Sprintf("open log %s: %v", cfg.LogPath, err) + return res + } + + // Resolve claude binary; fall back to echo stub if not found. + claudeBin, lookErr := exec.LookPath("claude") + var args []string + var bin string + if lookErr != nil { + bin = "echo" + args = []string{"STUB: claude not in PATH, prompt was:", cfg.Prompt} + } else { + bin = claudeBin + if cfg.SkipPerms { + args = append(args, "--dangerously-skip-permissions") + } + args = append(args, "-p", cfg.Prompt) + } + + cmd := exec.Command(bin, args...) + cmd.Dir = cfg.WorktreePath + cmd.Stdout = logFile + cmd.Stderr = logFile + // Merge env: os.Environ() base + cfg.Env overrides. + env := os.Environ() + for k, v := range cfg.Env { + env = append(env, k+"="+v) + } + cmd.Env = env + + if err := cmd.Start(); err != nil { + _ = logFile.Close() + res.Error = fmt.Sprintf("cmd.Start: %v", err) + return res + } + + // Release the file handle in this process — child still holds it. + // (Closing immediately is OK; the kernel keeps the fd open in the child.) + go func() { + _ = cmd.Wait() + _ = logFile.Close() + }() + + res.PID = cmd.Process.Pid + res.StartedAt = time.Now().Unix() + return res +} diff --git a/functions/infra/agent_launch_worktree.md b/functions/infra/agent_launch_worktree.md new file mode 100644 index 00000000..751dc48a --- /dev/null +++ b/functions/infra/agent_launch_worktree.md @@ -0,0 +1,62 @@ +--- +name: agent_launch_worktree +kind: function +lang: go +domain: infra +version: "1.0.0" +purity: impure +signature: "func AgentLaunchWorktree(cfg WorktreeLaunchConfig) WorktreeLaunchResult" +description: "Crea un git worktree nuevo en una rama derivada de master y lanza `claude -p ` headless dentro de ese worktree, redirigiendo stdout+stderr a un log file. Devuelve inmediatamente con el PID — el proceso queda corriendo en background. Si `ResetIfExists=true` y la rama existe, borra rama + worktree previos (best-effort) antes de recrear. Si `claude` no esta en PATH, hace fallback a `echo` como stub para que los tests puedan correr sin el binario real. Usa exec.LookPath, NO hardcodea paths. Cleanup del worktree + branch se hace con `agent_cleanup_worktree_go_infra`." +tags: [agents, worktree, claude, git, headless] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: ["fmt", "os", "os/exec", "strings", "time"] +params: + - name: cfg + desc: "WorktreeLaunchConfig con RepoRoot (path absoluto al repo principal), Branch (ej. auto/0115-foo), WorktreePath (path absoluto donde crear el worktree), Prompt (texto pasado a claude -p), LogPath (archivo de log), Env opcional (env vars extra), SkipPerms (pasa --dangerously-skip-permissions), ResetIfExists (nuke previo de rama+worktree)." +output: "WorktreeLaunchResult con PID (claude process id), Branch/WorktreePath/LogPath (eco de inputs), StartedAt (unix seconds) y Error (string vacio en exito; mensaje en fallo). PID=0 cuando Error!='' . El campo Error usa string en vez de error nativo Go para poder serializarse a JSON desde agent_runner_api." +tested: true +tests: + - "creates worktree dir and branch off master" + - "ResetIfExists=true on existing branch+worktree succeeds" + - "returns Error when RepoRoot/Branch/WorktreePath missing" +test_file_path: "functions/infra/agent_launch_worktree_test.go" +file_path: "functions/infra/agent_launch_worktree.go" +--- + +## Ejemplo + +```go +res := infra.AgentLaunchWorktree(infra.WorktreeLaunchConfig{ + RepoRoot: "/home/lucas/fn_registry", + Branch: "auto/0115-worktree-launcher-fn", + WorktreePath: "/home/lucas/fn_registry/worktrees/0115-worktree-launcher-fn", + Prompt: "Implement issue 0115 — worktree launcher Go function", + LogPath: "/tmp/claude-0115.log", + SkipPerms: true, + ResetIfExists: true, +}) +if res.Error != "" { + log.Fatal(res.Error) +} +fmt.Printf("claude PID=%d branch=%s log=%s\n", res.PID, res.Branch, res.LogPath) +// ... agente trabaja ... +infra.AgentCleanupWorktree(res.WorktreePath, res.Branch, "/home/lucas/fn_registry", res.PID) +``` + +## Cuando usarla + +Cuando una app (`agent_runner_api`, `fn-orquestador`) o un script necesite lanzar Claude headless en un sandbox aislado: ramas `auto/` o `issue/`. Reemplaza el bash inline que vivia en `.claude/skills/parallel-fix-issues/` y en el agente `fn-orquestador`. Si lo que quieres es ejecutar Claude en foreground sin worktree, NO uses esta — usa un `exec.Command` directo. + +## Gotchas + +- **Spawn solo, no Wait**: la funcion hace `cmd.Start()` y vuelve. Si el caller necesita esperar al final, debe trackear el PID y hacer `syscall.Wait4` o consultar `/proc/`. Para cleanup ordenado, usa `agent_cleanup_worktree_go_infra`. +- **Master debe existir** en `RepoRoot` — la rama se crea con `git worktree add ... -b master`. Si tu repo usa `main`, fork la funcion o renombra la rama localmente. +- **`ResetIfExists` es best-effort**: si el worktree previo tiene cambios sin commitear o procesos atados, `git worktree remove --force` puede ignorar ciertos errores; siempre revisa el dir final. +- **Log file truncado**: cada launch reabre `LogPath` con `O_TRUNC`. Si quieres preservar el log de runs anteriores, rota el archivo antes de llamar. +- **Fallback `echo` stub** se activa cuando `exec.LookPath("claude")` falla; en ese caso el "proceso claude" imprime `STUB: claude not in PATH, prompt was: ` y termina inmediatamente. Util en CI/tests, no en produccion. +- **PID en Windows**: `syscall.Kill` no existe en Windows — `agent_cleanup_worktree` solo funciona en Linux/Darwin. Documentado alli. +- **Env**: los valores de `cfg.Env` se hacen append a `os.Environ()` — si quieres anular una var existente, en Go la ultima asignacion gana, asi que basta con incluirla en `cfg.Env`.