// artifact_probe drives the claude_pipe binary across a set of prompts and looks // for two classes of problems that are inherent to parsing the claude TUI: // // 1. Artifacts: bits of the render that leaked into the parsed answer — box // drawing characters from the banner, status-bar fragments (CTX:, IN:, $...), // the "✻ Crunched" meta line, the echoed prompt, replacement characters, or // glued words (a heuristic: very long runs with no spaces). // // 2. Streaming inconsistencies: in --stream mode, the concatenation of all // text_delta events should reconstruct the final result. If it doesn't, the // prefix-delta heuristic dropped or duplicated text under reflow. // // Optionally (--ref) it also runs the real `claude -p` for the same prompt and // reports whether claude_pipe's answer matches it (whitespace-normalized). claude // is not deterministic, so only trivial prompts are expected to match exactly; // for open prompts the comparison is informational. // // This is a playground tool: it is not indexed, has no registry entry, and exists // only to probe claude_pipe's TUI-parsing quality. Run it when you want to audit // the parser against real claude output. // // Usage: // // go run artifact_probe.go --root /home/enmanuel/fn_registry # default prompts, no ref // go run artifact_probe.go --root /home/enmanuel/fn_registry --ref # also compare vs claude -p // go run artifact_probe.go --root /repo --prompt "tu prompt" # single custom prompt package main import ( "bufio" "context" "encoding/json" "flag" "fmt" "os" "os/exec" "regexp" "strings" "time" ) // defaultPrompts exercise different shapes: one word, a short list, a multi-line // answer, and one that mentions code (markers that often trip up TUI parsing). var defaultPrompts = []string{ "responde unicamente con la palabra PONG, sin explicaciones", "lista exactamente tres frutas, una por linea, sin numeracion ni texto extra", "explica en dos frases que es un pseudo-terminal (PTY)", "escribe una linea de codigo Go que imprima hola, sin explicaciones", } // artifactPatterns are substrings/regexes that should NEVER appear in a clean // parsed answer. Each is a piece of TUI chrome, not model output. var artifactPatterns = []struct { name string re *regexp.Regexp }{ {"box_drawing", regexp.MustCompile(`[╭╮╰╯┌┐└┘├┤┬┴┼│─]`)}, {"horizontal_rule", regexp.MustCompile(`─{8,}`)}, {"status_ctx", regexp.MustCompile(`CTX:\s*[\d█░]`)}, {"status_inout", regexp.MustCompile(`\bIN:\d|\bOUT:\d`)}, {"status_limits", regexp.MustCompile(`Limits:|Total:\s*↓|⎇\s`)}, {"status_cost", regexp.MustCompile(`\$\d+\.\d`)}, {"for_agents", regexp.MustCompile(`←\s*for agents`)}, // Spinner detected by structure (any glyph + word…) and by signature // ("(Ns ... tokens", "esc to interrupt"), not by the ever-changing word. {"meta_spinner", regexp.MustCompile(`[✻✽✢✶✺✷✦✳✱]|esc to interrupt|\(\d+s\b[^)]*tokens?\b`)}, {"prompt_marker", regexp.MustCompile(`❯`)}, {"replacement_char", regexp.MustCompile("�")}, } // gluedWordRe flags a run of >40 non-space characters, the signature of stripped // cursor moves collapsing columns together (e.g. "2newMCPservers"). var gluedWordRe = regexp.MustCompile(`\S{41,}`) type streamEvent struct { Type string `json:"type"` Text string `json:"text"` Result string `json:"result"` } type caseResult struct { prompt string oneshot string streamDeltas []string streamResult string ref string artifactsOne []string artifactsStrm []string streamConsistent bool matchesRef string // "yes" | "no" | "n/a" errs []string } func main() { root := flag.String("root", "/home/enmanuel/fn_registry", "cwd for claude (a repo whose MCP servers are approved)") bin := flag.String("bin", "../claude_pipe", "path to the claude_pipe binary") single := flag.String("prompt", "", "run a single custom prompt instead of the default set") ref := flag.Bool("ref", false, "also run real `claude -p` and compare") warmup := flag.String("warmup", "4s", "claude_pipe --warmup") idle := flag.String("idle", "4s", "claude_pipe --idle") maxDur := flag.String("max", "90s", "claude_pipe --max") flag.Parse() prompts := defaultPrompts if *single != "" { prompts = []string{*single} } if _, err := os.Stat(*bin); err != nil { fmt.Fprintf(os.Stderr, "claude_pipe binary not found at %s — build it first:\n (cd .. && CGO_ENABLED=1 go build -tags fts5 -o claude_pipe .)\n", *bin) os.Exit(1) } var results []caseResult for i, p := range prompts { fmt.Fprintf(os.Stderr, "[%d/%d] probing: %s\n", i+1, len(prompts), truncate(p, 60)) results = append(results, probe(*bin, *root, p, *warmup, *idle, *maxDur, *ref)) } report(results, *ref) // Exit non-zero if any artifact was found, so this can gate CI if desired. for _, r := range results { if len(r.artifactsOne) > 0 || len(r.artifactsStrm) > 0 || !r.streamConsistent { os.Exit(2) } } } func probe(bin, root, prompt, warmup, idle, maxDur string, withRef bool) caseResult { r := caseResult{prompt: prompt, streamConsistent: true, matchesRef: "n/a"} // One-shot, text format. one, err := run(90*time.Second, bin, "--format", "text", "--cwd", root, "--warmup", warmup, "--idle", idle, "--max", maxDur, prompt) if err != nil { r.errs = append(r.errs, "oneshot: "+err.Error()) } r.oneshot = strings.TrimRight(one, "\n") r.artifactsOne = findArtifacts(r.oneshot, prompt) // Streaming. strm, err := run(90*time.Second, bin, "--stream", "--cwd", root, "--warmup", warmup, "--idle", idle, "--max", maxDur, "--snapshot-interval", "150ms", prompt) if err != nil { r.errs = append(r.errs, "stream: "+err.Error()) } r.streamDeltas, r.streamResult = parseStream(strm) r.artifactsStrm = findArtifacts(r.streamResult, prompt) // Consistency: concatenated deltas should reconstruct the final result. recon := strings.Join(r.streamDeltas, "") r.streamConsistent = normalize(recon) == normalize(r.streamResult) if withRef { refOut, err := run(90*time.Second, "claude", "-p", prompt) if err != nil { r.errs = append(r.errs, "ref: "+err.Error()) } else { r.ref = strings.TrimRight(refOut, "\n") if normalize(r.ref) == normalize(r.oneshot) { r.matchesRef = "yes" } else { r.matchesRef = "no" } } } return r } // run executes a command with a timeout and returns its stdout. func run(timeout time.Duration, name string, args ...string) (string, error) { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() cmd := exec.CommandContext(ctx, name, args...) out, err := cmd.Output() return string(out), err } // parseStream splits the NDJSON stream into the ordered text_delta texts and the // final result string. func parseStream(s string) (deltas []string, result string) { sc := bufio.NewScanner(strings.NewReader(s)) sc.Buffer(make([]byte, 1024*1024), 1024*1024) for sc.Scan() { line := strings.TrimSpace(sc.Text()) if line == "" { continue } var ev streamEvent if json.Unmarshal([]byte(line), &ev) != nil { continue } switch ev.Type { case "text_delta": deltas = append(deltas, ev.Text) case "result": result = ev.Result } } return deltas, result } func findArtifacts(text, prompt string) []string { var found []string for _, ap := range artifactPatterns { if ap.re.MatchString(text) { found = append(found, ap.name) } } if gluedWordRe.MatchString(text) { found = append(found, "glued_words") } // Prompt echoed verbatim into the answer (claude shouldn't repeat the prompt). if len(prompt) > 12 && strings.Contains(text, prompt) { found = append(found, "prompt_echo") } return found } // normalize collapses all whitespace runs to single spaces and trims, so that // layout-induced spacing differences don't count as content differences. func normalize(s string) string { return strings.Join(strings.Fields(s), " ") } func truncate(s string, n int) string { if len(s) <= n { return s } return s[:n] + "…" } func report(results []caseResult, withRef bool) { fmt.Println() fmt.Println("=== claude_pipe artifact probe ===") for i, r := range results { fmt.Printf("\n[%d] %s\n", i+1, truncate(r.prompt, 70)) fmt.Printf(" oneshot: %q\n", truncate(r.oneshot, 80)) fmt.Printf(" stream: %d deltas, result=%q\n", len(r.streamDeltas), truncate(r.streamResult, 60)) fmt.Printf(" consistent: %s\n", yesno(r.streamConsistent)) printArtifacts(" artifacts(oneshot):", r.artifactsOne) printArtifacts(" artifacts(stream): ", r.artifactsStrm) if withRef { fmt.Printf(" matches claude -p: %s\n", r.matchesRef) if r.matchesRef == "no" { fmt.Printf(" ref: %q\n", truncate(r.ref, 80)) } } for _, e := range r.errs { fmt.Printf(" ERROR: %s\n", e) } } // Summary. clean := 0 for _, r := range results { if len(r.artifactsOne) == 0 && len(r.artifactsStrm) == 0 && r.streamConsistent { clean++ } } fmt.Printf("\n=== %d/%d cases clean (no artifacts, stream consistent) ===\n", clean, len(results)) } func printArtifacts(label string, a []string) { if len(a) == 0 { fmt.Printf("%s none\n", label) return } fmt.Printf("%s %s\n", label, strings.Join(a, ", ")) } func yesno(b bool) string { if b { return "yes" } return "NO" }