Files
claude_pipe/main.go
T
agent 4574f08a22 feat: streaming incremental (--stream) parseando snapshots de la TUI
Fase 2. Anade modo --stream que emite la respuesta de claude como NDJSON
(eventos text_delta + result final), re-parseando snapshots del render.

Compone dos funciones nuevas del registry:
- pty_capture_stream_go_infra: captura snapshots acumulativos del PTY por canal.
- text_prefix_delta_go_core: delta por prefijo comun entre snapshots sucesivos.

Por cada snapshot: vt_render -> parse_claude_tui -> delta del Answer. Solo emite
text_delta cuando el answer extiende limpiamente al anterior (HasPrefix); los
frames no monotonos se reconcilian en el result final. Heuristico y documentado.

e2e_check smoke_fake_stream verifica el flujo con el fake TUI (sin gastar claude).
2026-06-03 23:27:12 +02:00

226 lines
8.0 KiB
Go

// Command claude_pipe is a drop-in-ish replacement for `claude -p` that works by
// driving the interactive `claude` TUI through a pseudo-terminal, capturing its
// rendered screen, and parsing it back into structured data — the assistant's
// answer plus the visible conversation turns.
//
// It exists for the (unusual) case where you want the result of an interactive
// claude session as data, going THROUGH the TUI rather than through
// `claude -p --output-format json`. For most programmatic use the stream-json path
// (claude_stream_go_core) is cleaner and more robust; claude_pipe is the TUI-parsing
// alternative, kept because the TUI exposes things `-p` does not.
//
// Pipeline (all registry functions):
//
// pty_capture_idle_go_infra -> capture the TUI render headlessly via PTY
// vt_render_go_tui -> reconstruct the 2D screen as plain text
// parse_claude_tui_go_tui -> extract turns + final answer
//
// Output formats:
//
// --format json {"type":"result","subtype":"success","is_error":false,"result":"<answer>"}
// (mirrors `claude -p --output-format json`)
// --format text just the answer text (mirrors plain `claude -p`)
// --format turns the full ClaudeTUIParse (every visible turn + answer) as JSON
// --format screen debug: the raw rendered screen before parsing
package main
import (
"encoding/json"
"flag"
"fmt"
"os"
"strings"
"time"
"context"
"fn-registry/functions/core"
"fn-registry/functions/infra"
"fn-registry/functions/tui"
)
// PTY grid size. Must match what pty_capture_idle_go_infra uses internally (40x120)
// so vt_render reconstructs the layout with the same wrapping.
const (
ptyRows = 40
ptyCols = 120
)
// claudePResult mirrors the shape of `claude -p --output-format json`.
type claudePResult struct {
Type string `json:"type"`
Subtype string `json:"subtype"`
IsError bool `json:"is_error"`
Result string `json:"result"`
}
// streamEvent is one NDJSON line emitted in --stream mode. Loosely mirrors the
// claude -p --output-format stream-json events: text_delta during generation,
// then a final result.
type streamEvent struct {
Type string `json:"type"` // "text_delta" | "result"
Text string `json:"text,omitempty"` // for text_delta
Subtype string `json:"subtype,omitempty"` // for result
IsError bool `json:"is_error,omitempty"`
Result string `json:"result,omitempty"` // for result: full answer
}
func main() {
var (
prompt = flag.String("prompt", "", "prompt to send. If empty, taken from the positional arg, or from piped stdin")
format = flag.String("format", "json", "output format: json (like claude -p --output-format json), text (just the answer), turns (full parse), screen (debug: raw render)")
cwd = flag.String("cwd", "", "run claude in this directory (use a repo root whose MCP servers are approved, to skip the startup dialog)")
bin = flag.String("bin", "claude", "claude binary to launch")
warmup = flag.Duration("warmup", 4*time.Second, "wait before sending the prompt, so the TUI finishes loading")
stepDelay = flag.Duration("step-delay", 600*time.Millisecond, "delay between typing the prompt and pressing Enter")
idle = flag.Duration("idle", 4*time.Second, "stop capturing after this much silence (response finished rendering)")
maxDur = flag.Duration("max", 120*time.Second, "hard timeout for the whole capture")
stream = flag.Bool("stream", false, "stream the answer incrementally as NDJSON (text_delta events) by parsing TUI snapshots as they render, then a final result event")
snapInt = flag.Duration("snapshot-interval", 150*time.Millisecond, "how often to snapshot and re-parse the TUI in --stream mode")
)
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `claude_pipe — get a claude answer as data by parsing its TUI (alternative to claude -p).
Usage:
claude_pipe [flags] [prompt]
Examples:
claude_pipe --cwd /home/enmanuel/fn_registry "responde solo PONG"
claude_pipe --format text --cwd /repo "resume el README en 3 lineas"
echo "explica este error" | claude_pipe --cwd /repo
claude_pipe --format turns --cwd /repo "lee main.go y resume" # incluye tool_use/tool_result visibles
Flags:
`)
flag.PrintDefaults()
}
flag.Parse()
if *cwd != "" {
if err := os.Chdir(*cwd); err != nil {
fmt.Fprintf(os.Stderr, "claude_pipe: --cwd: %v\n", err)
os.Exit(1)
}
}
promptText := *prompt
if promptText == "" && flag.NArg() > 0 {
promptText = strings.Join(flag.Args(), " ")
}
if promptText == "" && stdinIsPiped() {
if data, err := os.ReadFile("/dev/stdin"); err == nil {
promptText = strings.TrimRight(string(data), "\n")
}
}
if promptText == "" {
fmt.Fprintln(os.Stderr, "claude_pipe: no prompt (use --prompt, a positional arg, or pipe stdin)")
os.Exit(2)
}
// Type the prompt and press Enter as SEPARATE steps: a "\r" glued to the text is
// treated by claude as a literal newline in the input box, not a submit.
inputs := []string{promptText, "\r"}
ctx, cancel := context.WithTimeout(context.Background(), *maxDur+10*time.Second)
defer cancel()
if *stream {
streamAnswer(ctx, *bin, inputs, *warmup, *stepDelay, *snapInt, *idle, *maxDur)
return
}
raw, err := infra.PTYCaptureIdle(ctx, *bin, nil, *warmup, inputs, *stepDelay, *idle, *maxDur)
if err != nil {
fmt.Fprintf(os.Stderr, "claude_pipe: capture failed: %v\n", err)
os.Exit(1)
}
screen := tui.VTRender(raw, ptyRows, ptyCols)
if *format == "screen" {
fmt.Println(screen)
return
}
parsed := tui.ParseClaudeTUI(screen)
switch *format {
case "text":
fmt.Println(parsed.Answer)
case "turns":
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
if err := enc.Encode(parsed); err != nil {
fmt.Fprintf(os.Stderr, "claude_pipe: encode: %v\n", err)
os.Exit(1)
}
case "json":
res := claudePResult{
Type: "result",
Subtype: "success",
IsError: parsed.Answer == "",
Result: parsed.Answer,
}
enc := json.NewEncoder(os.Stdout)
if err := enc.Encode(res); err != nil {
fmt.Fprintf(os.Stderr, "claude_pipe: encode: %v\n", err)
os.Exit(1)
}
default:
fmt.Fprintf(os.Stderr, "claude_pipe: unknown --format %q (want json|text|turns|screen)\n", *format)
os.Exit(2)
}
}
// streamAnswer drives claude through a PTY and emits the assistant's answer
// incrementally as NDJSON, by re-parsing the TUI on every snapshot and emitting
// the prefix-delta of the parsed answer. Ends with a final result event.
//
// This is heuristic: the TUI re-renders the whole frame, so the parsed answer is
// not guaranteed monotonic (reflow can rewrite earlier text). We only emit a
// text_delta when the new answer cleanly extends the previous one (HasPrefix);
// non-monotonic frames are absorbed and reconciled by the final result, whose
// Result field carries the full answer regardless.
func streamAnswer(ctx context.Context, bin string, inputs []string, warmup, stepDelay, snapInt, idle, maxDur time.Duration) {
ch, err := infra.PTYCaptureStream(ctx, bin, nil, warmup, inputs, stepDelay, snapInt, idle, maxDur)
if err != nil {
fmt.Fprintf(os.Stderr, "claude_pipe: stream capture failed: %v\n", err)
os.Exit(1)
}
enc := json.NewEncoder(os.Stdout) // os.Stdout is unbuffered: each Encode writes through immediately
prev := ""
final := ""
for snap := range ch {
screen := tui.VTRender(snap, ptyRows, ptyCols)
ans := tui.ParseClaudeTUI(screen).Answer
if strings.HasPrefix(ans, prev) {
if delta := core.PrefixDelta(prev, ans); delta != "" {
_ = enc.Encode(streamEvent{Type: "text_delta", Text: delta})
}
prev = ans
}
// Keep the longest answer seen as the final, even if a later frame shrank
// (transient reflow / parse noise).
if len(ans) >= len(final) {
final = ans
}
}
_ = enc.Encode(streamEvent{
Type: "result",
Subtype: "success",
IsError: final == "",
Result: final,
})
}
// stdinIsPiped reports whether stdin is connected to a pipe/file rather than a terminal.
func stdinIsPiped() bool {
info, err := os.Stdin.Stat()
if err != nil {
return false
}
return (info.Mode() & os.ModeCharDevice) == 0
}