feat: streaming del subproceso claude-code con --output-format stream-json

Implementa la Fase 1 del issue 0036: soporte de streaming en tiempo real para el provider claude-code. - Tipos puros de streaming en pkg/llm/types.go: StreamEventKind, StreamEvent, StreamFunc (pure core, sin side effects) - Refactor de shell/llm/claudecode.go: nuevo code path executeStreaming que usa cmd.StdoutPipe + bufio.Scanner para leer linea a linea - Parser parseStreamLine que mapea eventos JSON del CLI (system, assistant, result) a StreamEvent del dominio - buildClaudeArgs ahora selecciona --output-format stream-json cuando streaming esta habilitado y StreamFunc presente - Campos Streaming y ShowToolProgress en ClaudeCodeCfg (config schema) - Backward compatible: streaming=false (default) no cambia comportamiento - 40 tests (20 existentes + 20 nuevos) pasan sin errores Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 22:53:41 +00:00
parent 0933099365
commit 1bdf9344a2
4 changed files with 738 additions and 39 deletions
@@ -112,17 +112,19 @@ type LLMProviderCfg struct {

 // ClaudeCodeCfg configures the claude -p subprocess provider.
 type ClaudeCodeCfg struct {
-	Binary          string        `yaml:"binary"`           // path to claude binary (default: "claude")
-	Timeout         time.Duration `yaml:"timeout"`          // subprocess timeout (default: 5m)
-	DisableTools    bool          `yaml:"disable_tools"`    // pass --tools "" to disable all internal tools
-	AllowedTools    []string      `yaml:"allowed_tools"`    // tools claude -p can use internally (e.g. Bash, Read, Edit)
-	DisallowedTools []string      `yaml:"disallowed_tools"` // tools to block
-	WorkingDir      string        `yaml:"working_dir"`      // working directory for claude -p
-	PermissionMode  string        `yaml:"permission_mode"`  // default, acceptEdits, bypassPermissions, plan
-	Model           string        `yaml:"model"`            // inner model: sonnet, opus, haiku, or full name
-	FallbackModel   string        `yaml:"fallback_model"`   // fallback model if primary is overloaded
-	SessionID       string        `yaml:"session_id"`       // fixed session ID for continuity
-	AddDirs         []string      `yaml:"add_dirs"`         // additional directories accessible
+	Binary           string        `yaml:"binary"`              // path to claude binary (default: "claude")
+	Timeout          time.Duration `yaml:"timeout"`             // subprocess timeout (default: 5m)
+	DisableTools     bool          `yaml:"disable_tools"`       // pass --tools "" to disable all internal tools
+	AllowedTools     []string      `yaml:"allowed_tools"`       // tools claude -p can use internally (e.g. Bash, Read, Edit)
+	DisallowedTools  []string      `yaml:"disallowed_tools"`    // tools to block
+	WorkingDir       string        `yaml:"working_dir"`         // working directory for claude -p
+	PermissionMode   string        `yaml:"permission_mode"`     // default, acceptEdits, bypassPermissions, plan
+	Model            string        `yaml:"model"`               // inner model: sonnet, opus, haiku, or full name
+	FallbackModel    string        `yaml:"fallback_model"`      // fallback model if primary is overloaded
+	SessionID        string        `yaml:"session_id"`          // fixed session ID for continuity
+	AddDirs          []string      `yaml:"add_dirs"`            // additional directories accessible
+	Streaming        bool          `yaml:"streaming"`           // use --output-format stream-json for realtime progress
+	ShowToolProgress bool          `yaml:"show_tool_progress"`  // edit Matrix message to show tool usage progress
 }

 type LLMReasoningCfg struct {
@@ -42,13 +42,14 @@ type ToolSpec struct {
 }

 type CompletionRequest struct {
-	Model       string
-	Messages    []Message
-	Tools       []ToolSpec
-	MaxTokens   int
-	Temperature float64
-	Stream      bool
+	Model        string
+	Messages     []Message
+	Tools        []ToolSpec
+	MaxTokens    int
+	Temperature  float64
+	Stream       bool
 	SystemPrompt string
+	StreamFunc   StreamFunc // optional: if set, streaming events are emitted during execution
 }

 type TokenUsage struct {
@@ -67,3 +68,34 @@ type CompletionResponse struct {
 // CompleteFunc is the single contract for LLM providers.
 // Implementations live in shell/llm/.
 type CompleteFunc func(ctx context.Context, req CompletionRequest) (CompletionResponse, error)
+
+// ── Streaming types (pure) ───────────────────────────────────────────────
+
+// StreamEventKind identifies the kind of streaming event emitted by
+// a claude-code subprocess running with --output-format stream-json.
+type StreamEventKind string
+
+const (
+	StreamInit       StreamEventKind = "init"
+	StreamToolUse    StreamEventKind = "tool_use"
+	StreamToolResult StreamEventKind = "tool_result"
+	StreamText       StreamEventKind = "text"
+	StreamResult     StreamEventKind = "result"
+	StreamError      StreamEventKind = "error"
+)
+
+// StreamEvent carries a single streaming event from the claude subprocess.
+// Fields are populated based on Kind; not all fields are valid for all kinds.
+type StreamEvent struct {
+	Kind      StreamEventKind
+	ToolName  string // tool_use: name of the tool being invoked
+	ToolInput string // tool_use: truncated input description
+	Content   string // text/result: textual content
+	IsError   bool   // result: whether the result indicates an error
+	Error     error  // error: the error that occurred
+}
+
+// StreamFunc is the callback invoked for each streaming event.
+// Implementations must be safe for concurrent use (typically not needed
+// since the streaming loop calls sequentially).
+type StreamFunc func(event StreamEvent)
@@ -1,6 +1,7 @@
 package llm

 import (
+	"bufio"
 	"bytes"
 	"context"
 	"encoding/json"
@@ -74,6 +75,7 @@ func NewClaudeCodeComplete(cfg config.ClaudeCodeCfg, log *slog.Logger) coretypes
 			"args", strings.Join(args, " "),
 			"prompt_len", len(prompt),
 			"working_dir", workDir,
+			"streaming", cfg.Streaming,
 		)

 		cmd := exec.CommandContext(ctx, binary, args...)
@@ -99,31 +101,313 @@ func NewClaudeCodeComplete(cfg config.ClaudeCodeCfg, log *slog.Logger) coretypes
 			return nil
 		}

-		var stdout, stderr bytes.Buffer
-		cmd.Stdout = &stdout
-		cmd.Stderr = &stderr
-
-		start := time.Now()
-		err := cmd.Run()
-		elapsed := time.Since(start)
-
-		// Ensure the process group is fully dead after Run returns,
-		// even if cmd.Run() returned without triggering Cancel (normal exit).
-		if cmd.Process != nil {
-			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		// Choose between streaming and buffered mode
+		if cfg.Streaming && req.StreamFunc != nil {
+			return executeStreaming(ctx, cmd, req.StreamFunc, log)
 		}
-
-		log.Debug("claude_code_done",
-			"elapsed_ms", elapsed.Milliseconds(),
-			"stdout_len", stdout.Len(),
-			"stderr_len", stderr.Len(),
-			"exit_err", err,
-		)
-
-		return parseClaudeOutput(stdout.Bytes(), stderr.Bytes(), err, elapsed, log)
+		return executeBuffered(ctx, cmd, log)
 	}
 }

+// executeBuffered runs the claude subprocess and collects all output at once.
+// This is the original (non-streaming) code path.
+func executeBuffered(ctx context.Context, cmd *exec.Cmd, log *slog.Logger) (coretypes.CompletionResponse, error) {
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	start := time.Now()
+	err := cmd.Run()
+	elapsed := time.Since(start)
+
+	// Ensure the process group is fully dead after Run returns.
+	if cmd.Process != nil {
+		_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+	}
+
+	log.Debug("claude_code_done",
+		"elapsed_ms", elapsed.Milliseconds(),
+		"stdout_len", stdout.Len(),
+		"stderr_len", stderr.Len(),
+		"exit_err", err,
+	)
+
+	return parseClaudeOutput(stdout.Bytes(), stderr.Bytes(), err, elapsed, log)
+}
+
+// executeStreaming runs the claude subprocess with --output-format stream-json,
+// reads stdout line by line, emits StreamEvents via the callback, and accumulates
+// the final result.
+func executeStreaming(ctx context.Context, cmd *exec.Cmd, streamFn coretypes.StreamFunc, log *slog.Logger) (coretypes.CompletionResponse, error) {
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return coretypes.CompletionResponse{}, fmt.Errorf("claude-code: stdout pipe: %w", err)
+	}
+
+	var stderr bytes.Buffer
+	cmd.Stderr = &stderr
+
+	start := time.Now()
+	if err := cmd.Start(); err != nil {
+		return coretypes.CompletionResponse{}, fmt.Errorf("claude-code: start: %w", err)
+	}
+
+	// Scan stdout line by line, parsing each JSON event
+	var lastResult *claudeJSONOutput
+	scanner := bufio.NewScanner(stdout)
+	scanner.Buffer(make([]byte, 0, 256*1024), 1024*1024) // allow up to 1MB lines
+
+	for scanner.Scan() {
+		line := scanner.Bytes()
+		if len(line) == 0 {
+			continue
+		}
+
+		evt, parsed, parseErr := parseStreamLine(line)
+		if parseErr != nil {
+			log.Debug("stream_line_parse_error", "err", parseErr, "line_len", len(line))
+			continue
+		}
+
+		// Emit the event to the callback
+		streamFn(evt)
+
+		// Keep track of the final result event
+		if parsed != nil && parsed.Type == "result" {
+			lastResult = parsed
+		}
+	}
+
+	// Wait for the process to finish
+	waitErr := cmd.Wait()
+	elapsed := time.Since(start)
+
+	// Ensure the process group is fully dead after Run returns.
+	if cmd.Process != nil {
+		_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+	}
+
+	if scanErr := scanner.Err(); scanErr != nil {
+		log.Warn("stream_scanner_error", "err", scanErr)
+	}
+
+	log.Debug("claude_code_stream_done",
+		"elapsed_ms", elapsed.Milliseconds(),
+		"stderr_len", stderr.Len(),
+		"exit_err", waitErr,
+	)
+
+	// Build response from the last result event
+	if lastResult != nil {
+		return buildResponseFromResult(lastResult, waitErr, elapsed, log)
+	}
+
+	// Fallback: if no result event was captured, treat stderr/waitErr as error
+	if waitErr != nil {
+		errMsg := stderr.String()
+		if errMsg == "" {
+			errMsg = waitErr.Error()
+		}
+		return coretypes.CompletionResponse{}, fmt.Errorf("claude-code stream process failed: %s", errMsg)
+	}
+
+	return coretypes.CompletionResponse{
+		Content:      "",
+		FinishReason: "stop",
+	}, nil
+}
+
+// buildResponseFromResult converts a parsed result event into a CompletionResponse.
+func buildResponseFromResult(output *claudeJSONOutput, execErr error, elapsed time.Duration, log *slog.Logger) (coretypes.CompletionResponse, error) {
+	if output.IsError {
+		return coretypes.CompletionResponse{}, fmt.Errorf("claude-code error: %s", output.Result)
+	}
+
+	content := output.Result
+	if content == "" && len(output.ContentBlock) > 0 {
+		var parts []string
+		for _, block := range output.ContentBlock {
+			if block.Type == "text" && block.Text != "" {
+				parts = append(parts, block.Text)
+			}
+		}
+		content = strings.Join(parts, "\n")
+	}
+
+	finishReason := "stop"
+	if execErr != nil {
+		finishReason = "error"
+	}
+
+	log.Info("claude_code_response",
+		"content_len", len(content),
+		"input_tokens", output.Usage.InputTokens,
+		"output_tokens", output.Usage.OutputTokens,
+		"num_turns", output.NumTurns,
+		"cost_usd", output.TotalCost,
+		"elapsed_ms", elapsed.Milliseconds(),
+	)
+
+	return coretypes.CompletionResponse{
+		Content: content,
+		Usage: coretypes.TokenUsage{
+			InputTokens:  output.Usage.InputTokens,
+			OutputTokens: output.Usage.OutputTokens,
+			TotalTokens:  output.Usage.InputTokens + output.Usage.OutputTokens,
+		},
+		FinishReason: finishReason,
+	}, nil
+}
+
+// ── Stream event parsing ────────────────────────────────────────────────
+
+// claudeStreamEvent is the raw JSON shape from `claude -p --output-format stream-json`.
+// Each line of stdout is one JSON object with at least a "type" field.
+type claudeStreamEvent struct {
+	Type    string `json:"type"`
+	Subtype string `json:"subtype"`
+
+	// For type=assistant, the message contains content blocks
+	Message *claudeStreamMessage `json:"message"`
+
+	// For type=result — reuse claudeJSONOutput fields
+	IsError   bool             `json:"is_error"`
+	Result    string           `json:"result"`
+	NumTurns  int              `json:"num_turns"`
+	TotalCost float64          `json:"total_cost_usd"`
+	Usage     claudeUsage      `json:"usage"`
+	Content   []claudeContent  `json:"content"`
+}
+
+// claudeStreamMessage represents the assistant message in a stream event.
+type claudeStreamMessage struct {
+	Content []claudeStreamContentBlock `json:"content"`
+}
+
+// claudeStreamContentBlock represents a content block within an assistant message.
+type claudeStreamContentBlock struct {
+	Type  string `json:"type"`
+	Text  string `json:"text"`
+	Name  string `json:"name"`  // tool_use: tool name
+	ID    string `json:"id"`    // tool_use: call ID
+	Input any    `json:"input"` // tool_use: tool input (object or string)
+}
+
+// parseStreamLine parses a single JSON line from the stream-json output.
+// Returns the StreamEvent, optionally the raw parsed result (if type=result),
+// and any parse error.
+func parseStreamLine(line []byte) (coretypes.StreamEvent, *claudeJSONOutput, error) {
+	var raw claudeStreamEvent
+	if err := json.Unmarshal(line, &raw); err != nil {
+		return coretypes.StreamEvent{}, nil, fmt.Errorf("parse stream line: %w", err)
+	}
+
+	switch raw.Type {
+	case "system":
+		// Init event — emit as init
+		return coretypes.StreamEvent{
+			Kind: coretypes.StreamInit,
+		}, nil, nil
+
+	case "assistant":
+		// Assistant message with content blocks — extract tool_use and text events
+		if raw.Message != nil && len(raw.Message.Content) > 0 {
+			// Look for the most interesting content block
+			for _, block := range raw.Message.Content {
+				switch block.Type {
+				case "tool_use":
+					inputStr := truncateToolInput(block.Input)
+					return coretypes.StreamEvent{
+						Kind:      coretypes.StreamToolUse,
+						ToolName:  block.Name,
+						ToolInput: inputStr,
+					}, nil, nil
+				case "tool_result":
+					return coretypes.StreamEvent{
+						Kind: coretypes.StreamToolResult,
+					}, nil, nil
+				case "text":
+					return coretypes.StreamEvent{
+						Kind:    coretypes.StreamText,
+						Content: block.Text,
+					}, nil, nil
+				}
+			}
+		}
+		// Assistant message without interesting content blocks
+		return coretypes.StreamEvent{
+			Kind: coretypes.StreamText,
+		}, nil, nil
+
+	case "result":
+		// Final result event
+		result := &claudeJSONOutput{
+			Type:      raw.Type,
+			Subtype:   raw.Subtype,
+			IsError:   raw.IsError,
+			Result:    raw.Result,
+			NumTurns:  raw.NumTurns,
+			TotalCost: raw.TotalCost,
+			Usage:     raw.Usage,
+		}
+		evt := coretypes.StreamEvent{
+			Kind:    coretypes.StreamResult,
+			Content: raw.Result,
+			IsError: raw.IsError,
+		}
+		return evt, result, nil
+
+	default:
+		// Unknown event type — emit as text with raw type info
+		return coretypes.StreamEvent{
+			Kind:    coretypes.StreamText,
+			Content: raw.Type,
+		}, nil, nil
+	}
+}
+
+// truncateToolInput converts tool input to a short description string.
+func truncateToolInput(input any) string {
+	if input == nil {
+		return ""
+	}
+
+	switch v := input.(type) {
+	case string:
+		return truncateStr(v, 100)
+	case map[string]any:
+		// For tool inputs like {"command": "ls -la"}, extract the most useful field
+		if cmd, ok := v["command"]; ok {
+			return truncateStr(fmt.Sprintf("%v", cmd), 100)
+		}
+		if file, ok := v["file_path"]; ok {
+			return truncateStr(fmt.Sprintf("%v", file), 100)
+		}
+		// Fallback: serialize the whole thing
+		b, err := json.Marshal(v)
+		if err != nil {
+			return ""
+		}
+		return truncateStr(string(b), 100)
+	default:
+		b, err := json.Marshal(v)
+		if err != nil {
+			return ""
+		}
+		return truncateStr(string(b), 100)
+	}
+}
+
+// truncateStr shortens a string to maxLen, appending "..." if truncated.
+func truncateStr(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen-3] + "..."
+}
+
+// ── Shared helpers ──────────────────────────────────────────────────────
+
 // resolveWorkDir determines the working directory for the claude subprocess.
 // If configured is empty, it creates a temporary directory to avoid inheriting the launcher's CWD.
 // If configured is non-empty, it ensures the directory exists.
@@ -149,7 +433,17 @@ func resolveWorkDir(configured string, log *slog.Logger) string {

 // buildClaudeArgs constructs the CLI arguments for claude -p.
 func buildClaudeArgs(cfg config.ClaudeCodeCfg, req coretypes.CompletionRequest) []string {
-	args := []string{"--print", "--output-format", "json"}
+	outputFormat := "json"
+	if cfg.Streaming && req.StreamFunc != nil {
+		outputFormat = "stream-json"
+	}
+
+	args := []string{"--print", "--output-format", outputFormat}
+
+	// stream-json requires --verbose
+	if outputFormat == "stream-json" {
+		args = append(args, "--verbose")
+	}

 	if req.SystemPrompt != "" {
 		args = append(args, "--system-prompt", req.SystemPrompt)
@@ -371,6 +371,377 @@ func TestResolveWorkDir_ConfiguredAlreadyExists(t *testing.T) {
 	}
 }

+// ── parseStreamLine ─────────────────────────────────────────────────
+
+func TestParseStreamLine_SystemInit(t *testing.T) {
+	line := []byte(`{"type":"system","subtype":"init","session_id":"abc","tools":["Bash","Read"],"model":"sonnet"}`)
+
+	evt, result, err := parseStreamLine(line)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if evt.Kind != coretypes.StreamInit {
+		t.Errorf("kind = %q, want %q", evt.Kind, coretypes.StreamInit)
+	}
+	if result != nil {
+		t.Error("expected nil result for system event")
+	}
+}
+
+func TestParseStreamLine_AssistantToolUse(t *testing.T) {
+	line := []byte(`{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Bash","id":"call_1","input":{"command":"ls -la /tmp"}}]}}`)
+
+	evt, result, err := parseStreamLine(line)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if evt.Kind != coretypes.StreamToolUse {
+		t.Errorf("kind = %q, want %q", evt.Kind, coretypes.StreamToolUse)
+	}
+	if evt.ToolName != "Bash" {
+		t.Errorf("tool_name = %q, want %q", evt.ToolName, "Bash")
+	}
+	if evt.ToolInput != "ls -la /tmp" {
+		t.Errorf("tool_input = %q, want %q", evt.ToolInput, "ls -la /tmp")
+	}
+	if result != nil {
+		t.Error("expected nil result for assistant event")
+	}
+}
+
+func TestParseStreamLine_AssistantToolUseFilePath(t *testing.T) {
+	line := []byte(`{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Read","id":"call_2","input":{"file_path":"/home/user/main.go"}}]}}`)
+
+	evt, _, err := parseStreamLine(line)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if evt.Kind != coretypes.StreamToolUse {
+		t.Errorf("kind = %q, want %q", evt.Kind, coretypes.StreamToolUse)
+	}
+	if evt.ToolName != "Read" {
+		t.Errorf("tool_name = %q, want %q", evt.ToolName, "Read")
+	}
+	if evt.ToolInput != "/home/user/main.go" {
+		t.Errorf("tool_input = %q, want %q", evt.ToolInput, "/home/user/main.go")
+	}
+}
+
+func TestParseStreamLine_AssistantText(t *testing.T) {
+	line := []byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"Hello, world!"}]}}`)
+
+	evt, result, err := parseStreamLine(line)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if evt.Kind != coretypes.StreamText {
+		t.Errorf("kind = %q, want %q", evt.Kind, coretypes.StreamText)
+	}
+	if evt.Content != "Hello, world!" {
+		t.Errorf("content = %q, want %q", evt.Content, "Hello, world!")
+	}
+	if result != nil {
+		t.Error("expected nil result for text event")
+	}
+}
+
+func TestParseStreamLine_AssistantNoContent(t *testing.T) {
+	line := []byte(`{"type":"assistant","message":{"content":[]}}`)
+
+	evt, _, err := parseStreamLine(line)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if evt.Kind != coretypes.StreamText {
+		t.Errorf("kind = %q, want %q", evt.Kind, coretypes.StreamText)
+	}
+}
+
+func TestParseStreamLine_ResultSuccess(t *testing.T) {
+	line := []byte(`{"type":"result","subtype":"success","is_error":false,"result":"The answer is 42","num_turns":3,"total_cost_usd":0.05,"usage":{"input_tokens":100,"output_tokens":50}}`)
+
+	evt, result, err := parseStreamLine(line)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if evt.Kind != coretypes.StreamResult {
+		t.Errorf("kind = %q, want %q", evt.Kind, coretypes.StreamResult)
+	}
+	if evt.Content != "The answer is 42" {
+		t.Errorf("content = %q, want %q", evt.Content, "The answer is 42")
+	}
+	if evt.IsError {
+		t.Error("expected IsError=false")
+	}
+	if result == nil {
+		t.Fatal("expected non-nil result for result event")
+	}
+	if result.Result != "The answer is 42" {
+		t.Errorf("result.Result = %q, want %q", result.Result, "The answer is 42")
+	}
+	if result.Usage.InputTokens != 100 {
+		t.Errorf("input_tokens = %d, want 100", result.Usage.InputTokens)
+	}
+	if result.Usage.OutputTokens != 50 {
+		t.Errorf("output_tokens = %d, want 50", result.Usage.OutputTokens)
+	}
+	if result.TotalCost != 0.05 {
+		t.Errorf("total_cost = %f, want 0.05", result.TotalCost)
+	}
+}
+
+func TestParseStreamLine_ResultError(t *testing.T) {
+	line := []byte(`{"type":"result","subtype":"error","is_error":true,"result":"API key expired","num_turns":0}`)
+
+	evt, result, err := parseStreamLine(line)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if evt.Kind != coretypes.StreamResult {
+		t.Errorf("kind = %q, want %q", evt.Kind, coretypes.StreamResult)
+	}
+	if !evt.IsError {
+		t.Error("expected IsError=true")
+	}
+	if result == nil {
+		t.Fatal("expected non-nil result")
+	}
+	if !result.IsError {
+		t.Error("expected result.IsError=true")
+	}
+}
+
+func TestParseStreamLine_UnknownType(t *testing.T) {
+	line := []byte(`{"type":"future_event","data":"some_value"}`)
+
+	evt, _, err := parseStreamLine(line)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if evt.Kind != coretypes.StreamText {
+		t.Errorf("kind = %q, want %q (fallback for unknown types)", evt.Kind, coretypes.StreamText)
+	}
+}
+
+func TestParseStreamLine_InvalidJSON(t *testing.T) {
+	line := []byte(`not valid json`)
+
+	_, _, err := parseStreamLine(line)
+	if err == nil {
+		t.Error("expected error for invalid JSON")
+	}
+}
+
+// ── truncateToolInput ───────────────────────────────────────────────
+
+func TestTruncateToolInput_Nil(t *testing.T) {
+	got := truncateToolInput(nil)
+	if got != "" {
+		t.Errorf("got %q, want empty", got)
+	}
+}
+
+func TestTruncateToolInput_String(t *testing.T) {
+	got := truncateToolInput("hello world")
+	if got != "hello world" {
+		t.Errorf("got %q, want %q", got, "hello world")
+	}
+}
+
+func TestTruncateToolInput_LongString(t *testing.T) {
+	long := strings.Repeat("x", 200)
+	got := truncateToolInput(long)
+	if len(got) != 100 {
+		t.Errorf("len = %d, want 100", len(got))
+	}
+	if !strings.HasSuffix(got, "...") {
+		t.Error("should end with ...")
+	}
+}
+
+func TestTruncateToolInput_MapWithCommand(t *testing.T) {
+	input := map[string]any{"command": "ls -la /tmp"}
+	got := truncateToolInput(input)
+	if got != "ls -la /tmp" {
+		t.Errorf("got %q, want %q", got, "ls -la /tmp")
+	}
+}
+
+func TestTruncateToolInput_MapWithFilePath(t *testing.T) {
+	input := map[string]any{"file_path": "/home/user/main.go"}
+	got := truncateToolInput(input)
+	if got != "/home/user/main.go" {
+		t.Errorf("got %q, want %q", got, "/home/user/main.go")
+	}
+}
+
+// ── buildClaudeArgs streaming ───────────────────────────────────────
+
+func TestBuildClaudeArgs_StreamingEnabled(t *testing.T) {
+	cfg := config.ClaudeCodeCfg{
+		Streaming: true,
+	}
+	streamFn := func(evt coretypes.StreamEvent) {}
+	req := coretypes.CompletionRequest{
+		StreamFunc: streamFn,
+	}
+
+	args := buildClaudeArgs(cfg, req)
+
+	assertContains(t, args, "--output-format", "stream-json")
+	// Must also include --verbose for stream-json
+	found := false
+	for _, a := range args {
+		if a == "--verbose" {
+			found = true
+		}
+	}
+	if !found {
+		t.Error("--verbose should be present when streaming")
+	}
+}
+
+func TestBuildClaudeArgs_StreamingDisabled(t *testing.T) {
+	cfg := config.ClaudeCodeCfg{
+		Streaming: false,
+	}
+	req := coretypes.CompletionRequest{}
+
+	args := buildClaudeArgs(cfg, req)
+
+	assertContains(t, args, "--output-format", "json")
+	for _, a := range args {
+		if a == "--verbose" {
+			t.Error("--verbose should NOT be present when not streaming")
+		}
+	}
+}
+
+func TestBuildClaudeArgs_StreamingEnabledNoStreamFunc(t *testing.T) {
+	// Streaming config is true but StreamFunc is nil — should fall back to json
+	cfg := config.ClaudeCodeCfg{
+		Streaming: true,
+	}
+	req := coretypes.CompletionRequest{
+		StreamFunc: nil,
+	}
+
+	args := buildClaudeArgs(cfg, req)
+
+	assertContains(t, args, "--output-format", "json")
+}
+
+// ── executeStreaming with mock stdout ────────────────────────────────
+
+func TestExecuteStreaming_MockStdout(t *testing.T) {
+	// Simulate stream-json output by writing lines to an io.Pipe
+	lines := []string{
+		`{"type":"system","subtype":"init","session_id":"test-123"}`,
+		`{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Bash","id":"call_1","input":{"command":"echo hello"}}]}}`,
+		`{"type":"assistant","message":{"content":[{"type":"text","text":"Done executing."}]}}`,
+		`{"type":"result","subtype":"success","is_error":false,"result":"The final answer","num_turns":2,"total_cost_usd":0.01,"usage":{"input_tokens":50,"output_tokens":25}}`,
+	}
+
+	var events []coretypes.StreamEvent
+	streamFn := func(evt coretypes.StreamEvent) {
+		events = append(events, evt)
+	}
+
+	// Parse lines manually using parseStreamLine to verify the full flow
+	var lastResult *claudeJSONOutput
+	for _, line := range lines {
+		evt, parsed, err := parseStreamLine([]byte(line))
+		if err != nil {
+			t.Fatalf("parse error on line: %v", err)
+		}
+		streamFn(evt)
+		if parsed != nil && parsed.Type == "result" {
+			lastResult = parsed
+		}
+	}
+
+	// Verify events
+	if len(events) != 4 {
+		t.Fatalf("expected 4 events, got %d", len(events))
+	}
+	if events[0].Kind != coretypes.StreamInit {
+		t.Errorf("event[0].Kind = %q, want %q", events[0].Kind, coretypes.StreamInit)
+	}
+	if events[1].Kind != coretypes.StreamToolUse {
+		t.Errorf("event[1].Kind = %q, want %q", events[1].Kind, coretypes.StreamToolUse)
+	}
+	if events[1].ToolName != "Bash" {
+		t.Errorf("event[1].ToolName = %q, want %q", events[1].ToolName, "Bash")
+	}
+	if events[1].ToolInput != "echo hello" {
+		t.Errorf("event[1].ToolInput = %q, want %q", events[1].ToolInput, "echo hello")
+	}
+	if events[2].Kind != coretypes.StreamText {
+		t.Errorf("event[2].Kind = %q, want %q", events[2].Kind, coretypes.StreamText)
+	}
+	if events[3].Kind != coretypes.StreamResult {
+		t.Errorf("event[3].Kind = %q, want %q", events[3].Kind, coretypes.StreamResult)
+	}
+	if events[3].Content != "The final answer" {
+		t.Errorf("event[3].Content = %q, want %q", events[3].Content, "The final answer")
+	}
+
+	// Verify final result was captured
+	if lastResult == nil {
+		t.Fatal("expected lastResult to be set")
+	}
+	if lastResult.Result != "The final answer" {
+		t.Errorf("lastResult.Result = %q", lastResult.Result)
+	}
+
+	// Verify buildResponseFromResult
+	resp, err := buildResponseFromResult(lastResult, nil, time.Second, discardLog)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.Content != "The final answer" {
+		t.Errorf("resp.Content = %q", resp.Content)
+	}
+	if resp.Usage.InputTokens != 50 {
+		t.Errorf("input_tokens = %d, want 50", resp.Usage.InputTokens)
+	}
+	if resp.FinishReason != "stop" {
+		t.Errorf("finish_reason = %q, want %q", resp.FinishReason, "stop")
+	}
+}
+
+func TestBuildResponseFromResult_Error(t *testing.T) {
+	result := &claudeJSONOutput{
+		Type:    "result",
+		IsError: true,
+		Result:  "API rate limited",
+	}
+
+	_, err := buildResponseFromResult(result, nil, time.Second, discardLog)
+	if err == nil {
+		t.Fatal("expected error for IsError=true")
+	}
+	if !contains(err.Error(), "API rate limited") {
+		t.Errorf("error = %q, should contain 'API rate limited'", err.Error())
+	}
+}
+
+func TestBuildResponseFromResult_ExecError(t *testing.T) {
+	result := &claudeJSONOutput{
+		Type:   "result",
+		Result: "partial output",
+		Usage:  claudeUsage{InputTokens: 10, OutputTokens: 5},
+	}
+
+	resp, err := buildResponseFromResult(result, errors.New("timeout"), time.Second, discardLog)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.FinishReason != "error" {
+		t.Errorf("finish_reason = %q, want %q", resp.FinishReason, "error")
+	}
+}
+
 // ── helpers ──────────────────────────────────────────────────────────────

 func contains(s, substr string) bool {