@@ -0,0 +1,281 @@
// artifact_probe drives the claude_pipe binary across a set of prompts and looks
// for two classes of problems that are inherent to parsing the claude TUI:
//
// 1. Artifacts: bits of the render that leaked into the parsed answer — box
// drawing characters from the banner, status-bar fragments (CTX:, IN:, $...),
// the "✻ Crunched" meta line, the echoed prompt, replacement characters, or
// glued words (a heuristic: very long runs with no spaces).
//
// 2. Streaming inconsistencies: in --stream mode, the concatenation of all
// text_delta events should reconstruct the final result. If it doesn't, the
// prefix-delta heuristic dropped or duplicated text under reflow.
//
// Optionally (--ref) it also runs the real `claude -p` for the same prompt and
// reports whether claude_pipe's answer matches it (whitespace-normalized). claude
// is not deterministic, so only trivial prompts are expected to match exactly;
// for open prompts the comparison is informational.
//
// This is a playground tool: it is not indexed, has no registry entry, and exists
// only to probe claude_pipe's TUI-parsing quality. Run it when you want to audit
// the parser against real claude output.
//
// Usage:
//
// go run artifact_probe.go --root /home/enmanuel/fn_registry # default prompts, no ref
// go run artifact_probe.go --root /home/enmanuel/fn_registry --ref # also compare vs claude -p
// go run artifact_probe.go --root /repo --prompt "tu prompt" # single custom prompt
package main
import (
"bufio"
"context"
"encoding/json"
"flag"
"fmt"
"os"
"os/exec"
"regexp"
"strings"
"time"
)
// defaultPrompts exercise different shapes: one word, a short list, a multi-line
// answer, and one that mentions code (markers that often trip up TUI parsing).
var defaultPrompts = [ ] string {
"responde unicamente con la palabra PONG, sin explicaciones" ,
"lista exactamente tres frutas, una por linea, sin numeracion ni texto extra" ,
"explica en dos frases que es un pseudo-terminal (PTY)" ,
"escribe una linea de codigo Go que imprima hola, sin explicaciones" ,
}
// artifactPatterns are substrings/regexes that should NEVER appear in a clean
// parsed answer. Each is a piece of TUI chrome, not model output.
var artifactPatterns = [ ] struct {
name string
re * regexp . Regexp
} {
{ "box_drawing" , regexp . MustCompile ( ` [╭╮╰╯┌┐└┘├┤┬┴┼│─] ` ) } ,
{ "horizontal_rule" , regexp . MustCompile ( ` ─ { 8,} ` ) } ,
{ "status_ctx" , regexp . MustCompile ( ` CTX:\s*[\d█░] ` ) } ,
{ "status_inout" , regexp . MustCompile ( ` \bIN:\d|\bOUT:\d ` ) } ,
{ "status_limits" , regexp . MustCompile ( ` Limits:|Total:\s*↓|⎇\s ` ) } ,
{ "status_cost" , regexp . MustCompile ( ` \$\d+\.\d ` ) } ,
{ "for_agents" , regexp . MustCompile ( ` ←\s*for agents ` ) } ,
// Spinner detected by structure (any glyph + word…) and by signature
// ("(Ns ... tokens", "esc to interrupt"), not by the ever-changing word.
{ "meta_spinner" , regexp . MustCompile ( ` [✻✽✢✶✺✷✦✳✱]|esc to interrupt|\(\d+s\b[^)]*tokens?\b ` ) } ,
{ "prompt_marker" , regexp . MustCompile ( ` ❯ ` ) } ,
{ "replacement_char" , regexp . MustCompile ( "� " ) } ,
}
// gluedWordRe flags a run of >40 non-space characters, the signature of stripped
// cursor moves collapsing columns together (e.g. "2newMCPservers").
var gluedWordRe = regexp . MustCompile ( ` \S { 41,} ` )
type streamEvent struct {
Type string ` json:"type" `
Text string ` json:"text" `
Result string ` json:"result" `
}
type caseResult struct {
prompt string
oneshot string
streamDeltas [ ] string
streamResult string
ref string
artifactsOne [ ] string
artifactsStrm [ ] string
streamConsistent bool
matchesRef string // "yes" | "no" | "n/a"
errs [ ] string
}
func main ( ) {
root := flag . String ( "root" , "/home/enmanuel/fn_registry" , "cwd for claude (a repo whose MCP servers are approved)" )
bin := flag . String ( "bin" , "../claude_pipe" , "path to the claude_pipe binary" )
single := flag . String ( "prompt" , "" , "run a single custom prompt instead of the default set" )
ref := flag . Bool ( "ref" , false , "also run real `claude -p` and compare" )
warmup := flag . String ( "warmup" , "4s" , "claude_pipe --warmup" )
idle := flag . String ( "idle" , "4s" , "claude_pipe --idle" )
maxDur := flag . String ( "max" , "90s" , "claude_pipe --max" )
flag . Parse ( )
prompts := defaultPrompts
if * single != "" {
prompts = [ ] string { * single }
}
if _ , err := os . Stat ( * bin ) ; err != nil {
fmt . Fprintf ( os . Stderr , "claude_pipe binary not found at %s — build it first:\n (cd .. && CGO_ENABLED=1 go build -tags fts5 -o claude_pipe .)\n" , * bin )
os . Exit ( 1 )
}
var results [ ] caseResult
for i , p := range prompts {
fmt . Fprintf ( os . Stderr , "[%d/%d] probing: %s\n" , i + 1 , len ( prompts ) , truncate ( p , 60 ) )
results = append ( results , probe ( * bin , * root , p , * warmup , * idle , * maxDur , * ref ) )
}
report ( results , * ref )
// Exit non-zero if any artifact was found, so this can gate CI if desired.
for _ , r := range results {
if len ( r . artifactsOne ) > 0 || len ( r . artifactsStrm ) > 0 || ! r . streamConsistent {
os . Exit ( 2 )
}
}
}
func probe ( bin , root , prompt , warmup , idle , maxDur string , withRef bool ) caseResult {
r := caseResult { prompt : prompt , streamConsistent : true , matchesRef : "n/a" }
// One-shot, text format.
one , err := run ( 90 * time . Second , bin ,
"--format" , "text" , "--cwd" , root ,
"--warmup" , warmup , "--idle" , idle , "--max" , maxDur , prompt )
if err != nil {
r . errs = append ( r . errs , "oneshot: " + err . Error ( ) )
}
r . oneshot = strings . TrimRight ( one , "\n" )
r . artifactsOne = findArtifacts ( r . oneshot , prompt )
// Streaming.
strm , err := run ( 90 * time . Second , bin ,
"--stream" , "--cwd" , root ,
"--warmup" , warmup , "--idle" , idle , "--max" , maxDur ,
"--snapshot-interval" , "150ms" , prompt )
if err != nil {
r . errs = append ( r . errs , "stream: " + err . Error ( ) )
}
r . streamDeltas , r . streamResult = parseStream ( strm )
r . artifactsStrm = findArtifacts ( r . streamResult , prompt )
// Consistency: concatenated deltas should reconstruct the final result.
recon := strings . Join ( r . streamDeltas , "" )
r . streamConsistent = normalize ( recon ) == normalize ( r . streamResult )
if withRef {
refOut , err := run ( 90 * time . Second , "claude" , "-p" , prompt )
if err != nil {
r . errs = append ( r . errs , "ref: " + err . Error ( ) )
} else {
r . ref = strings . TrimRight ( refOut , "\n" )
if normalize ( r . ref ) == normalize ( r . oneshot ) {
r . matchesRef = "yes"
} else {
r . matchesRef = "no"
}
}
}
return r
}
// run executes a command with a timeout and returns its stdout.
func run ( timeout time . Duration , name string , args ... string ) ( string , error ) {
ctx , cancel := context . WithTimeout ( context . Background ( ) , timeout )
defer cancel ( )
cmd := exec . CommandContext ( ctx , name , args ... )
out , err := cmd . Output ( )
return string ( out ) , err
}
// parseStream splits the NDJSON stream into the ordered text_delta texts and the
// final result string.
func parseStream ( s string ) ( deltas [ ] string , result string ) {
sc := bufio . NewScanner ( strings . NewReader ( s ) )
sc . Buffer ( make ( [ ] byte , 1024 * 1024 ) , 1024 * 1024 )
for sc . Scan ( ) {
line := strings . TrimSpace ( sc . Text ( ) )
if line == "" {
continue
}
var ev streamEvent
if json . Unmarshal ( [ ] byte ( line ) , & ev ) != nil {
continue
}
switch ev . Type {
case "text_delta" :
deltas = append ( deltas , ev . Text )
case "result" :
result = ev . Result
}
}
return deltas , result
}
func findArtifacts ( text , prompt string ) [ ] string {
var found [ ] string
for _ , ap := range artifactPatterns {
if ap . re . MatchString ( text ) {
found = append ( found , ap . name )
}
}
if gluedWordRe . MatchString ( text ) {
found = append ( found , "glued_words" )
}
// Prompt echoed verbatim into the answer (claude shouldn't repeat the prompt).
if len ( prompt ) > 12 && strings . Contains ( text , prompt ) {
found = append ( found , "prompt_echo" )
}
return found
}
// normalize collapses all whitespace runs to single spaces and trims, so that
// layout-induced spacing differences don't count as content differences.
func normalize ( s string ) string {
return strings . Join ( strings . Fields ( s ) , " " )
}
func truncate ( s string , n int ) string {
if len ( s ) <= n {
return s
}
return s [ : n ] + "…"
}
func report ( results [ ] caseResult , withRef bool ) {
fmt . Println ( )
fmt . Println ( "=== claude_pipe artifact probe ===" )
for i , r := range results {
fmt . Printf ( "\n[%d] %s\n" , i + 1 , truncate ( r . prompt , 70 ) )
fmt . Printf ( " oneshot: %q\n" , truncate ( r . oneshot , 80 ) )
fmt . Printf ( " stream: %d deltas, result=%q\n" , len ( r . streamDeltas ) , truncate ( r . streamResult , 60 ) )
fmt . Printf ( " consistent: %s\n" , yesno ( r . streamConsistent ) )
printArtifacts ( " artifacts(oneshot):" , r . artifactsOne )
printArtifacts ( " artifacts(stream): " , r . artifactsStrm )
if withRef {
fmt . Printf ( " matches claude -p: %s\n" , r . matchesRef )
if r . matchesRef == "no" {
fmt . Printf ( " ref: %q\n" , truncate ( r . ref , 80 ) )
}
}
for _ , e := range r . errs {
fmt . Printf ( " ERROR: %s\n" , e )
}
}
// Summary.
clean := 0
for _ , r := range results {
if len ( r . artifactsOne ) == 0 && len ( r . artifactsStrm ) == 0 && r . streamConsistent {
clean ++
}
}
fmt . Printf ( "\n=== %d/%d cases clean (no artifacts, stream consistent) ===\n" , clean , len ( results ) )
}
func printArtifacts ( label string , a [ ] string ) {
if len ( a ) == 0 {
fmt . Printf ( "%s none\n" , label )
return
}
fmt . Printf ( "%s %s\n" , label , strings . Join ( a , ", " ) )
}
func yesno ( b bool ) string {
if b {
return "yes"
}
return "NO"
}