fix: stream usa ultimo frame + playground artifact_probe
- streamAnswer: el result final es el ULTIMO answer parseado (frame post-idle, limpio) en vez del mas largo, que podia ser un frame intermedio con spinner. - playground/artifact_probe.go: herramienta de auditoria que corre oneshot + stream contra varios prompts, detecta artefactos del render que se cuelan (box chars, status bar, spinner, palabras pegadas, prompt echo) y verifica consistencia del streaming (concat de deltas == result). Opcional --ref para comparar con claude -p real. No se indexa (playground del padre). El fix del spinner en si vive en parse_claude_tui_go_tui (repo padre): detecta la linea de carga por estructura (glyph + palabra + ellipsis) y firma ((Ns tokens) / esc to interrupt), no por la palabra, que cambia cada frame.
This commit is contained in:
@@ -0,0 +1,281 @@
|
||||
// artifact_probe drives the claude_pipe binary across a set of prompts and looks
|
||||
// for two classes of problems that are inherent to parsing the claude TUI:
|
||||
//
|
||||
// 1. Artifacts: bits of the render that leaked into the parsed answer — box
|
||||
// drawing characters from the banner, status-bar fragments (CTX:, IN:, $...),
|
||||
// the "✻ Crunched" meta line, the echoed prompt, replacement characters, or
|
||||
// glued words (a heuristic: very long runs with no spaces).
|
||||
//
|
||||
// 2. Streaming inconsistencies: in --stream mode, the concatenation of all
|
||||
// text_delta events should reconstruct the final result. If it doesn't, the
|
||||
// prefix-delta heuristic dropped or duplicated text under reflow.
|
||||
//
|
||||
// Optionally (--ref) it also runs the real `claude -p` for the same prompt and
|
||||
// reports whether claude_pipe's answer matches it (whitespace-normalized). claude
|
||||
// is not deterministic, so only trivial prompts are expected to match exactly;
|
||||
// for open prompts the comparison is informational.
|
||||
//
|
||||
// This is a playground tool: it is not indexed, has no registry entry, and exists
|
||||
// only to probe claude_pipe's TUI-parsing quality. Run it when you want to audit
|
||||
// the parser against real claude output.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// go run artifact_probe.go --root /home/enmanuel/fn_registry # default prompts, no ref
|
||||
// go run artifact_probe.go --root /home/enmanuel/fn_registry --ref # also compare vs claude -p
|
||||
// go run artifact_probe.go --root /repo --prompt "tu prompt" # single custom prompt
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// defaultPrompts exercise different shapes: one word, a short list, a multi-line
|
||||
// answer, and one that mentions code (markers that often trip up TUI parsing).
|
||||
var defaultPrompts = []string{
|
||||
"responde unicamente con la palabra PONG, sin explicaciones",
|
||||
"lista exactamente tres frutas, una por linea, sin numeracion ni texto extra",
|
||||
"explica en dos frases que es un pseudo-terminal (PTY)",
|
||||
"escribe una linea de codigo Go que imprima hola, sin explicaciones",
|
||||
}
|
||||
|
||||
// artifactPatterns are substrings/regexes that should NEVER appear in a clean
|
||||
// parsed answer. Each is a piece of TUI chrome, not model output.
|
||||
var artifactPatterns = []struct {
|
||||
name string
|
||||
re *regexp.Regexp
|
||||
}{
|
||||
{"box_drawing", regexp.MustCompile(`[╭╮╰╯┌┐└┘├┤┬┴┼│─]`)},
|
||||
{"horizontal_rule", regexp.MustCompile(`─{8,}`)},
|
||||
{"status_ctx", regexp.MustCompile(`CTX:\s*[\d█░]`)},
|
||||
{"status_inout", regexp.MustCompile(`\bIN:\d|\bOUT:\d`)},
|
||||
{"status_limits", regexp.MustCompile(`Limits:|Total:\s*↓|⎇\s`)},
|
||||
{"status_cost", regexp.MustCompile(`\$\d+\.\d`)},
|
||||
{"for_agents", regexp.MustCompile(`←\s*for agents`)},
|
||||
// Spinner detected by structure (any glyph + word…) and by signature
|
||||
// ("(Ns ... tokens", "esc to interrupt"), not by the ever-changing word.
|
||||
{"meta_spinner", regexp.MustCompile(`[✻✽✢✶✺✷✦✳✱]|esc to interrupt|\(\d+s\b[^)]*tokens?\b`)},
|
||||
{"prompt_marker", regexp.MustCompile(`❯`)},
|
||||
{"replacement_char", regexp.MustCompile("�")},
|
||||
}
|
||||
|
||||
// gluedWordRe flags a run of >40 non-space characters, the signature of stripped
|
||||
// cursor moves collapsing columns together (e.g. "2newMCPservers").
|
||||
var gluedWordRe = regexp.MustCompile(`\S{41,}`)
|
||||
|
||||
type streamEvent struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text"`
|
||||
Result string `json:"result"`
|
||||
}
|
||||
|
||||
type caseResult struct {
|
||||
prompt string
|
||||
oneshot string
|
||||
streamDeltas []string
|
||||
streamResult string
|
||||
ref string
|
||||
artifactsOne []string
|
||||
artifactsStrm []string
|
||||
streamConsistent bool
|
||||
matchesRef string // "yes" | "no" | "n/a"
|
||||
errs []string
|
||||
}
|
||||
|
||||
func main() {
|
||||
root := flag.String("root", "/home/enmanuel/fn_registry", "cwd for claude (a repo whose MCP servers are approved)")
|
||||
bin := flag.String("bin", "../claude_pipe", "path to the claude_pipe binary")
|
||||
single := flag.String("prompt", "", "run a single custom prompt instead of the default set")
|
||||
ref := flag.Bool("ref", false, "also run real `claude -p` and compare")
|
||||
warmup := flag.String("warmup", "4s", "claude_pipe --warmup")
|
||||
idle := flag.String("idle", "4s", "claude_pipe --idle")
|
||||
maxDur := flag.String("max", "90s", "claude_pipe --max")
|
||||
flag.Parse()
|
||||
|
||||
prompts := defaultPrompts
|
||||
if *single != "" {
|
||||
prompts = []string{*single}
|
||||
}
|
||||
|
||||
if _, err := os.Stat(*bin); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "claude_pipe binary not found at %s — build it first:\n (cd .. && CGO_ENABLED=1 go build -tags fts5 -o claude_pipe .)\n", *bin)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var results []caseResult
|
||||
for i, p := range prompts {
|
||||
fmt.Fprintf(os.Stderr, "[%d/%d] probing: %s\n", i+1, len(prompts), truncate(p, 60))
|
||||
results = append(results, probe(*bin, *root, p, *warmup, *idle, *maxDur, *ref))
|
||||
}
|
||||
|
||||
report(results, *ref)
|
||||
|
||||
// Exit non-zero if any artifact was found, so this can gate CI if desired.
|
||||
for _, r := range results {
|
||||
if len(r.artifactsOne) > 0 || len(r.artifactsStrm) > 0 || !r.streamConsistent {
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func probe(bin, root, prompt, warmup, idle, maxDur string, withRef bool) caseResult {
|
||||
r := caseResult{prompt: prompt, streamConsistent: true, matchesRef: "n/a"}
|
||||
|
||||
// One-shot, text format.
|
||||
one, err := run(90*time.Second, bin,
|
||||
"--format", "text", "--cwd", root,
|
||||
"--warmup", warmup, "--idle", idle, "--max", maxDur, prompt)
|
||||
if err != nil {
|
||||
r.errs = append(r.errs, "oneshot: "+err.Error())
|
||||
}
|
||||
r.oneshot = strings.TrimRight(one, "\n")
|
||||
r.artifactsOne = findArtifacts(r.oneshot, prompt)
|
||||
|
||||
// Streaming.
|
||||
strm, err := run(90*time.Second, bin,
|
||||
"--stream", "--cwd", root,
|
||||
"--warmup", warmup, "--idle", idle, "--max", maxDur,
|
||||
"--snapshot-interval", "150ms", prompt)
|
||||
if err != nil {
|
||||
r.errs = append(r.errs, "stream: "+err.Error())
|
||||
}
|
||||
r.streamDeltas, r.streamResult = parseStream(strm)
|
||||
r.artifactsStrm = findArtifacts(r.streamResult, prompt)
|
||||
// Consistency: concatenated deltas should reconstruct the final result.
|
||||
recon := strings.Join(r.streamDeltas, "")
|
||||
r.streamConsistent = normalize(recon) == normalize(r.streamResult)
|
||||
|
||||
if withRef {
|
||||
refOut, err := run(90*time.Second, "claude", "-p", prompt)
|
||||
if err != nil {
|
||||
r.errs = append(r.errs, "ref: "+err.Error())
|
||||
} else {
|
||||
r.ref = strings.TrimRight(refOut, "\n")
|
||||
if normalize(r.ref) == normalize(r.oneshot) {
|
||||
r.matchesRef = "yes"
|
||||
} else {
|
||||
r.matchesRef = "no"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// run executes a command with a timeout and returns its stdout.
|
||||
func run(timeout time.Duration, name string, args ...string) (string, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, name, args...)
|
||||
out, err := cmd.Output()
|
||||
return string(out), err
|
||||
}
|
||||
|
||||
// parseStream splits the NDJSON stream into the ordered text_delta texts and the
|
||||
// final result string.
|
||||
func parseStream(s string) (deltas []string, result string) {
|
||||
sc := bufio.NewScanner(strings.NewReader(s))
|
||||
sc.Buffer(make([]byte, 1024*1024), 1024*1024)
|
||||
for sc.Scan() {
|
||||
line := strings.TrimSpace(sc.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
var ev streamEvent
|
||||
if json.Unmarshal([]byte(line), &ev) != nil {
|
||||
continue
|
||||
}
|
||||
switch ev.Type {
|
||||
case "text_delta":
|
||||
deltas = append(deltas, ev.Text)
|
||||
case "result":
|
||||
result = ev.Result
|
||||
}
|
||||
}
|
||||
return deltas, result
|
||||
}
|
||||
|
||||
func findArtifacts(text, prompt string) []string {
|
||||
var found []string
|
||||
for _, ap := range artifactPatterns {
|
||||
if ap.re.MatchString(text) {
|
||||
found = append(found, ap.name)
|
||||
}
|
||||
}
|
||||
if gluedWordRe.MatchString(text) {
|
||||
found = append(found, "glued_words")
|
||||
}
|
||||
// Prompt echoed verbatim into the answer (claude shouldn't repeat the prompt).
|
||||
if len(prompt) > 12 && strings.Contains(text, prompt) {
|
||||
found = append(found, "prompt_echo")
|
||||
}
|
||||
return found
|
||||
}
|
||||
|
||||
// normalize collapses all whitespace runs to single spaces and trims, so that
|
||||
// layout-induced spacing differences don't count as content differences.
|
||||
func normalize(s string) string {
|
||||
return strings.Join(strings.Fields(s), " ")
|
||||
}
|
||||
|
||||
func truncate(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[:n] + "…"
|
||||
}
|
||||
|
||||
func report(results []caseResult, withRef bool) {
|
||||
fmt.Println()
|
||||
fmt.Println("=== claude_pipe artifact probe ===")
|
||||
for i, r := range results {
|
||||
fmt.Printf("\n[%d] %s\n", i+1, truncate(r.prompt, 70))
|
||||
fmt.Printf(" oneshot: %q\n", truncate(r.oneshot, 80))
|
||||
fmt.Printf(" stream: %d deltas, result=%q\n", len(r.streamDeltas), truncate(r.streamResult, 60))
|
||||
fmt.Printf(" consistent: %s\n", yesno(r.streamConsistent))
|
||||
printArtifacts(" artifacts(oneshot):", r.artifactsOne)
|
||||
printArtifacts(" artifacts(stream): ", r.artifactsStrm)
|
||||
if withRef {
|
||||
fmt.Printf(" matches claude -p: %s\n", r.matchesRef)
|
||||
if r.matchesRef == "no" {
|
||||
fmt.Printf(" ref: %q\n", truncate(r.ref, 80))
|
||||
}
|
||||
}
|
||||
for _, e := range r.errs {
|
||||
fmt.Printf(" ERROR: %s\n", e)
|
||||
}
|
||||
}
|
||||
|
||||
// Summary.
|
||||
clean := 0
|
||||
for _, r := range results {
|
||||
if len(r.artifactsOne) == 0 && len(r.artifactsStrm) == 0 && r.streamConsistent {
|
||||
clean++
|
||||
}
|
||||
}
|
||||
fmt.Printf("\n=== %d/%d cases clean (no artifacts, stream consistent) ===\n", clean, len(results))
|
||||
}
|
||||
|
||||
func printArtifacts(label string, a []string) {
|
||||
if len(a) == 0 {
|
||||
fmt.Printf("%s none\n", label)
|
||||
return
|
||||
}
|
||||
fmt.Printf("%s %s\n", label, strings.Join(a, ", "))
|
||||
}
|
||||
|
||||
func yesno(b bool) string {
|
||||
if b {
|
||||
return "yes"
|
||||
}
|
||||
return "NO"
|
||||
}
|
||||
Reference in New Issue
Block a user