feat: streaming incremental (--stream) parseando snapshots de la TUI

Fase 2. Anade modo --stream que emite la respuesta de claude como NDJSON
(eventos text_delta + result final), re-parseando snapshots del render.

Compone dos funciones nuevas del registry:
- pty_capture_stream_go_infra: captura snapshots acumulativos del PTY por canal.
- text_prefix_delta_go_core: delta por prefijo comun entre snapshots sucesivos.

Por cada snapshot: vt_render -> parse_claude_tui -> delta del Answer. Solo emite
text_delta cuando el answer extiende limpiamente al anterior (HasPrefix); los
frames no monotonos se reconcilian en el result final. Heuristico y documentado.

e2e_check smoke_fake_stream verifica el flujo con el fake TUI (sin gastar claude).
This commit is contained in:
agent
2026-06-03 23:27:12 +02:00
parent 8d6078e99e
commit 4574f08a22
4 changed files with 98 additions and 3 deletions
+62
View File
@@ -34,6 +34,7 @@ import (
"context"
"fn-registry/functions/core"
"fn-registry/functions/infra"
"fn-registry/functions/tui"
)
@@ -53,6 +54,17 @@ type claudePResult struct {
Result string `json:"result"`
}
// streamEvent is one NDJSON line emitted in --stream mode. Loosely mirrors the
// claude -p --output-format stream-json events: text_delta during generation,
// then a final result.
type streamEvent struct {
Type string `json:"type"` // "text_delta" | "result"
Text string `json:"text,omitempty"` // for text_delta
Subtype string `json:"subtype,omitempty"` // for result
IsError bool `json:"is_error,omitempty"`
Result string `json:"result,omitempty"` // for result: full answer
}
func main() {
var (
prompt = flag.String("prompt", "", "prompt to send. If empty, taken from the positional arg, or from piped stdin")
@@ -63,6 +75,8 @@ func main() {
stepDelay = flag.Duration("step-delay", 600*time.Millisecond, "delay between typing the prompt and pressing Enter")
idle = flag.Duration("idle", 4*time.Second, "stop capturing after this much silence (response finished rendering)")
maxDur = flag.Duration("max", 120*time.Second, "hard timeout for the whole capture")
stream = flag.Bool("stream", false, "stream the answer incrementally as NDJSON (text_delta events) by parsing TUI snapshots as they render, then a final result event")
snapInt = flag.Duration("snapshot-interval", 150*time.Millisecond, "how often to snapshot and re-parse the TUI in --stream mode")
)
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `claude_pipe — get a claude answer as data by parsing its TUI (alternative to claude -p).
@@ -110,6 +124,11 @@ Flags:
ctx, cancel := context.WithTimeout(context.Background(), *maxDur+10*time.Second)
defer cancel()
if *stream {
streamAnswer(ctx, *bin, inputs, *warmup, *stepDelay, *snapInt, *idle, *maxDur)
return
}
raw, err := infra.PTYCaptureIdle(ctx, *bin, nil, *warmup, inputs, *stepDelay, *idle, *maxDur)
if err != nil {
fmt.Fprintf(os.Stderr, "claude_pipe: capture failed: %v\n", err)
@@ -153,6 +172,49 @@ Flags:
}
}
// streamAnswer drives claude through a PTY and emits the assistant's answer
// incrementally as NDJSON, by re-parsing the TUI on every snapshot and emitting
// the prefix-delta of the parsed answer. Ends with a final result event.
//
// This is heuristic: the TUI re-renders the whole frame, so the parsed answer is
// not guaranteed monotonic (reflow can rewrite earlier text). We only emit a
// text_delta when the new answer cleanly extends the previous one (HasPrefix);
// non-monotonic frames are absorbed and reconciled by the final result, whose
// Result field carries the full answer regardless.
func streamAnswer(ctx context.Context, bin string, inputs []string, warmup, stepDelay, snapInt, idle, maxDur time.Duration) {
ch, err := infra.PTYCaptureStream(ctx, bin, nil, warmup, inputs, stepDelay, snapInt, idle, maxDur)
if err != nil {
fmt.Fprintf(os.Stderr, "claude_pipe: stream capture failed: %v\n", err)
os.Exit(1)
}
enc := json.NewEncoder(os.Stdout) // os.Stdout is unbuffered: each Encode writes through immediately
prev := ""
final := ""
for snap := range ch {
screen := tui.VTRender(snap, ptyRows, ptyCols)
ans := tui.ParseClaudeTUI(screen).Answer
if strings.HasPrefix(ans, prev) {
if delta := core.PrefixDelta(prev, ans); delta != "" {
_ = enc.Encode(streamEvent{Type: "text_delta", Text: delta})
}
prev = ans
}
// Keep the longest answer seen as the final, even if a later frame shrank
// (transient reflow / parse noise).
if len(ans) >= len(final) {
final = ans
}
}
_ = enc.Encode(streamEvent{
Type: "result",
Subtype: "success",
IsError: final == "",
Result: final,
})
}
// stdinIsPiped reports whether stdin is connected to a pipe/file rather than a terminal.
func stdinIsPiped() bool {
info, err := os.Stdin.Stat()