feat: import agents_and_robots platform as unibots (Matrix-out, unibus transport)

Reemplaza el scaffold del echobot por la plataforma completa de bots traida desde ~/DataProyects/Github/agents_and_robots tras la operacion Matrix-out: los bots ya no hablan por Matrix sino por el bus unibus (modelo todo-rooms + E2E via shell/transportunibus sobre github.com/enmanuel/unibus/pkg/client). - go.mod: replace de unibus -> ../unibus y de fn-registry -> ../../../.. (paths relativos reajustados a la nueva ubicacion dentro de fn_registry). - app.md: bump a 0.2.0, descripcion + arquitectura + comandos + gotchas reales. - modulo Go conservado como github.com/enmanuel/agents (sin reescribir imports). agents_and_robots queda archivado como museo de la era Matrix.
2026-06-07 11:50:13 +02:00
parent bb5b0e09b1
commit fc644ecd6e
308 changed files with 38829 additions and 474 deletions
@@ -0,0 +1,242 @@
+// Package llm contains impure LLM provider implementations.
+package llm
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"os"
+	"time"
+
+	coretypes "github.com/enmanuel/agents/pkg/llm"
+	"github.com/enmanuel/agents/shell/logger"
+)
+
+const anthropicAPIBase = "https://api.anthropic.com/v1"
+const anthropicVersion = "2023-06-01"
+
+// NewAnthropicComplete returns a CompleteFunc backed by the Anthropic API.
+func NewAnthropicComplete(apiKeyEnv, baseURL string, log *slog.Logger) coretypes.CompleteFunc {
+	if baseURL == "" {
+		baseURL = anthropicAPIBase
+	}
+
+	return func(ctx context.Context, req coretypes.CompletionRequest) (coretypes.CompletionResponse, error) {
+		apiKey := os.Getenv(apiKeyEnv)
+		if apiKey == "" {
+			return coretypes.CompletionResponse{}, fmt.Errorf("env var %s is not set", apiKeyEnv)
+		}
+
+		log.Info("llm_request",
+			"provider", "anthropic",
+			"model", req.Model,
+			"messages", len(req.Messages),
+			"tools", len(req.Tools),
+		)
+
+		body := toAnthropicRequest(req)
+		raw, err := json.Marshal(body)
+		if err != nil {
+			return coretypes.CompletionResponse{}, fmt.Errorf("marshal request: %w", err)
+		}
+
+		httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, baseURL+"/messages", bytes.NewReader(raw))
+		if err != nil {
+			return coretypes.CompletionResponse{}, err
+		}
+		httpReq.Header.Set("x-api-key", apiKey)
+		httpReq.Header.Set("anthropic-version", anthropicVersion)
+		httpReq.Header.Set("content-type", "application/json")
+
+		start := time.Now()
+		resp, err := http.DefaultClient.Do(httpReq)
+		if err != nil {
+			ms := time.Since(start).Milliseconds()
+			log.Error("llm_error", "provider", "anthropic", logger.FieldDurationMS, ms, "err", err)
+			return coretypes.CompletionResponse{}, fmt.Errorf("anthropic request: %w", err)
+		}
+		defer resp.Body.Close()
+
+		respBytes, err := io.ReadAll(resp.Body)
+		if err != nil {
+			return coretypes.CompletionResponse{}, fmt.Errorf("read response: %w", err)
+		}
+
+		ms := time.Since(start).Milliseconds()
+		if resp.StatusCode != http.StatusOK {
+			log.Error("llm_error", "provider", "anthropic", logger.FieldDurationMS, ms, "status", resp.StatusCode)
+			return coretypes.CompletionResponse{}, fmt.Errorf("anthropic error %d: %s", resp.StatusCode, respBytes)
+		}
+
+		result, err := fromAnthropicResponse(respBytes)
+		if err != nil {
+			log.Error("llm_error", "provider", "anthropic", logger.FieldDurationMS, ms, "err", err)
+			return result, err
+		}
+
+		log.Info("llm_response",
+			"provider", "anthropic",
+			"model", req.Model,
+			logger.FieldDurationMS, ms,
+			logger.FieldTokensUsed, result.Usage.TotalTokens,
+			"input_tokens", result.Usage.InputTokens,
+			"output_tokens", result.Usage.OutputTokens,
+			"tool_calls", len(result.ToolCalls),
+			"finish_reason", result.FinishReason,
+		)
+
+		return result, nil
+	}
+}
+
+// ── private conversion helpers ────────────────────────────────────────────
+
+type anthropicRequest struct {
+	Model     string             `json:"model"`
+	MaxTokens int                `json:"max_tokens"`
+	System    string             `json:"system,omitempty"`
+	Messages  []anthropicMessage `json:"messages"`
+	Tools     []anthropicTool    `json:"tools,omitempty"`
+}
+
+type anthropicMessage struct {
+	Role    string          `json:"role"`
+	Content json.RawMessage `json:"content"`
+}
+
+type anthropicTool struct {
+	Name        string         `json:"name"`
+	Description string         `json:"description"`
+	InputSchema map[string]any `json:"input_schema"`
+}
+
+// anthropicContentBlock represents a block in a content array.
+type anthropicContentBlock struct {
+	Type string `json:"type"`
+
+	// text block
+	Text string `json:"text,omitempty"`
+
+	// tool_use block (in assistant responses)
+	ID    string         `json:"id,omitempty"`
+	Name  string         `json:"name,omitempty"`
+	Input map[string]any `json:"input,omitempty"`
+
+	// tool_result block (in user messages)
+	ToolUseID string `json:"tool_use_id,omitempty"`
+	Content   string `json:"content,omitempty"`
+}
+
+type anthropicResponse struct {
+	Content []anthropicContentBlock `json:"content"`
+	Usage   struct {
+		InputTokens  int `json:"input_tokens"`
+		OutputTokens int `json:"output_tokens"`
+	} `json:"usage"`
+	StopReason string `json:"stop_reason"`
+}
+
+func toAnthropicRequest(req coretypes.CompletionRequest) anthropicRequest {
+	msgs := make([]anthropicMessage, 0, len(req.Messages))
+	for _, m := range req.Messages {
+		if m.Role == coretypes.RoleSystem {
+			continue
+		}
+		msgs = append(msgs, toAnthropicMessage(m))
+	}
+
+	tools := make([]anthropicTool, len(req.Tools))
+	for i, t := range req.Tools {
+		tools[i] = anthropicTool{
+			Name:        t.Name,
+			Description: t.Description,
+			InputSchema: t.InputSchema,
+		}
+	}
+
+	return anthropicRequest{
+		Model:     req.Model,
+		MaxTokens: req.MaxTokens,
+		System:    req.SystemPrompt,
+		Messages:  msgs,
+		Tools:     tools,
+	}
+}
+
+// toAnthropicMessage converts a core Message to the Anthropic format.
+// Handles plain text, assistant messages with tool calls, and tool result messages.
+func toAnthropicMessage(m coretypes.Message) anthropicMessage {
+	// Assistant message with tool calls → content array with text + tool_use blocks
+	if m.Role == coretypes.RoleAssistant && len(m.ToolCalls) > 0 {
+		blocks := make([]anthropicContentBlock, 0, len(m.ToolCalls)+1)
+		if m.Content != "" {
+			blocks = append(blocks, anthropicContentBlock{Type: "text", Text: m.Content})
+		}
+		for _, tc := range m.ToolCalls {
+			var input map[string]any
+			_ = json.Unmarshal([]byte(tc.Arguments), &input)
+			blocks = append(blocks, anthropicContentBlock{
+				Type:  "tool_use",
+				ID:    tc.ID,
+				Name:  tc.Name,
+				Input: input,
+			})
+		}
+		raw, _ := json.Marshal(blocks)
+		return anthropicMessage{Role: "assistant", Content: raw}
+	}
+
+	// Tool result message → user message with tool_result content array
+	if m.Role == coretypes.RoleTool {
+		blocks := []anthropicContentBlock{{
+			Type:      "tool_result",
+			ToolUseID: m.ToolCallID,
+			Content:   m.Content,
+		}}
+		raw, _ := json.Marshal(blocks)
+		return anthropicMessage{Role: "user", Content: raw}
+	}
+
+	// Plain text message
+	raw, _ := json.Marshal(m.Content)
+	return anthropicMessage{Role: string(m.Role), Content: raw}
+}
+
+func fromAnthropicResponse(raw []byte) (coretypes.CompletionResponse, error) {
+	var ar anthropicResponse
+	if err := json.Unmarshal(raw, &ar); err != nil {
+		return coretypes.CompletionResponse{}, fmt.Errorf("unmarshal response: %w", err)
+	}
+
+	var content string
+	var toolCalls []coretypes.ToolCall
+
+	for _, c := range ar.Content {
+		switch c.Type {
+		case "text":
+			content += c.Text
+		case "tool_use":
+			argsJSON, _ := json.Marshal(c.Input)
+			toolCalls = append(toolCalls, coretypes.ToolCall{
+				ID:        c.ID,
+				Name:      c.Name,
+				Arguments: string(argsJSON),
+			})
+		}
+	}
+
+	return coretypes.CompletionResponse{
+		Content:      content,
+		ToolCalls:    toolCalls,
+		FinishReason: ar.StopReason,
+		Usage: coretypes.TokenUsage{
+			InputTokens:  ar.Usage.InputTokens,
+			OutputTokens: ar.Usage.OutputTokens,
+			TotalTokens:  ar.Usage.InputTokens + ar.Usage.OutputTokens,
+		},
+	}, nil
+}
@@ -0,0 +1,295 @@
+package llm
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+	"strings"
+	"syscall"
+	"time"
+
+	"github.com/enmanuel/agents/internal/config"
+	coretypes "github.com/enmanuel/agents/pkg/llm"
+)
+
+const (
+	defaultClaudeBinary  = "claude"
+	defaultClaudeTimeout = 5 * time.Minute
+)
+
+// claudeJSONOutput represents the JSON output from `claude -p --output-format json`.
+type claudeJSONOutput struct {
+	Type         string           `json:"type"`
+	Subtype      string           `json:"subtype"`
+	CostUSD      float64          `json:"cost_usd"`
+	IsError      bool             `json:"is_error"`
+	Duration     float64          `json:"duration_api_ms"`
+	NumTurns     int              `json:"num_turns"`
+	Result       string           `json:"result"`
+	SessionID    string           `json:"session_id"`
+	TotalCost    float64          `json:"total_cost_usd"`
+	Usage        claudeUsage      `json:"usage"`
+	ContentBlock []claudeContent  `json:"content"`
+}
+
+type claudeUsage struct {
+	InputTokens  int `json:"input_tokens"`
+	OutputTokens int `json:"output_tokens"`
+}
+
+type claudeContent struct {
+	Type string `json:"type"`
+	Text string `json:"text"`
+}
+
+// NewClaudeCodeComplete creates a CompleteFunc that executes `claude -p` as a subprocess.
+func NewClaudeCodeComplete(cfg config.ClaudeCodeCfg, log *slog.Logger) coretypes.CompleteFunc {
+	binary := cfg.Binary
+	if binary == "" {
+		binary = defaultClaudeBinary
+	}
+
+	timeout := cfg.Timeout
+	if timeout <= 0 {
+		timeout = defaultClaudeTimeout
+	}
+
+	// Resolve working directory once at init time.
+	workDir := resolveWorkDir(cfg.WorkingDir, log)
+
+	return func(ctx context.Context, req coretypes.CompletionRequest) (coretypes.CompletionResponse, error) {
+		ctx, cancel := context.WithTimeout(ctx, timeout)
+		defer cancel()
+
+		args := buildClaudeArgs(cfg, req)
+
+		prompt := flattenMessages(req.Messages)
+
+		log.Debug("claude_code_exec",
+			"binary", binary,
+			"args", strings.Join(args, " "),
+			"prompt_len", len(prompt),
+			"working_dir", workDir,
+		)
+
+		cmd := exec.CommandContext(ctx, binary, args...)
+		if workDir != "" {
+			cmd.Dir = workDir
+		}
+		// Build clean env: inherit parent but remove ANTHROPIC_API_KEY
+		// so claude uses its own OAuth auth instead of a potentially invalid key.
+		cmd.Env = filterEnv(os.Environ(), "ANTHROPIC_API_KEY")
+		cmd.Stdin = strings.NewReader(prompt)
+
+		// Create a new process group so we can kill claude + all its children.
+		cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+		// Override the default cancel behavior: kill the entire process group
+		// instead of just the main process, preventing orphaned child processes.
+		cmd.Cancel = func() error {
+			if cmd.Process != nil {
+				pgid := cmd.Process.Pid
+				log.Info("killing claude-code process group", "pgid", pgid)
+				// Negative PID = kill entire process group
+				return syscall.Kill(-pgid, syscall.SIGKILL)
+			}
+			return nil
+		}
+
+		var stdout, stderr bytes.Buffer
+		cmd.Stdout = &stdout
+		cmd.Stderr = &stderr
+
+		start := time.Now()
+		err := cmd.Run()
+		elapsed := time.Since(start)
+
+		// Ensure the process group is fully dead after Run returns,
+		// even if cmd.Run() returned without triggering Cancel (normal exit).
+		if cmd.Process != nil {
+			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		}
+
+		log.Debug("claude_code_done",
+			"elapsed_ms", elapsed.Milliseconds(),
+			"stdout_len", stdout.Len(),
+			"stderr_len", stderr.Len(),
+			"exit_err", err,
+		)
+
+		return parseClaudeOutput(stdout.Bytes(), stderr.Bytes(), err, elapsed, log)
+	}
+}
+
+// resolveWorkDir determines the working directory for the claude subprocess.
+// If configured is empty, it creates a temporary directory to avoid inheriting the launcher's CWD.
+// If configured is non-empty, it ensures the directory exists.
+func resolveWorkDir(configured string, log *slog.Logger) string {
+	if configured == "" {
+		tmp, err := os.MkdirTemp("", "claude-agent-*")
+		if err != nil {
+			log.Error("claude-code: failed to create temp working dir", "err", err)
+			return "" // Fall through — cmd.Dir will remain empty (inherits CWD).
+		}
+		log.Warn("claude-code working_dir is empty, using temporary directory",
+			"dir", tmp,
+		)
+		return tmp
+	}
+
+	// Ensure configured directory exists.
+	if err := os.MkdirAll(configured, 0o755); err != nil {
+		log.Error("claude-code: failed to create working dir", "dir", configured, "err", err)
+	}
+	return configured
+}
+
+// buildClaudeArgs constructs the CLI arguments for claude -p.
+func buildClaudeArgs(cfg config.ClaudeCodeCfg, req coretypes.CompletionRequest) []string {
+	args := []string{"--print", "--output-format", "json"}
+
+	if req.SystemPrompt != "" {
+		args = append(args, "--system-prompt", req.SystemPrompt)
+	}
+
+	if cfg.DisableTools {
+		args = append(args, "--tools", "")
+	} else {
+		if len(cfg.AllowedTools) > 0 {
+			args = append(args, "--allowedTools")
+			args = append(args, cfg.AllowedTools...)
+		}
+
+		if len(cfg.DisallowedTools) > 0 {
+			args = append(args, "--disallowedTools")
+			args = append(args, cfg.DisallowedTools...)
+		}
+	}
+
+	if cfg.PermissionMode != "" {
+		args = append(args, "--permission-mode", cfg.PermissionMode)
+	}
+
+	if cfg.Model != "" {
+		args = append(args, "--model", cfg.Model)
+	}
+
+	if cfg.FallbackModel != "" {
+		args = append(args, "--fallback-model", cfg.FallbackModel)
+	}
+
+	if cfg.SessionID != "" {
+		args = append(args, "--session-id", cfg.SessionID)
+	}
+
+	for _, dir := range cfg.AddDirs {
+		args = append(args, "--add-dir", dir)
+	}
+
+	return args
+}
+
+// flattenMessages converts a conversation history into a single text prompt for stdin.
+func flattenMessages(msgs []coretypes.Message) string {
+	var b strings.Builder
+	for _, m := range msgs {
+		switch m.Role {
+		case coretypes.RoleUser:
+			fmt.Fprintf(&b, "User: %s\n\n", m.Content)
+		case coretypes.RoleAssistant:
+			fmt.Fprintf(&b, "Assistant: %s\n\n", m.Content)
+		case coretypes.RoleTool:
+			fmt.Fprintf(&b, "Tool result: %s\n\n", m.Content)
+		}
+	}
+	return b.String()
+}
+
+// parseClaudeOutput parses the JSON output from `claude -p --output-format json`.
+func parseClaudeOutput(
+	stdout, stderr []byte,
+	execErr error,
+	elapsed time.Duration,
+	log *slog.Logger,
+) (coretypes.CompletionResponse, error) {
+	// If the process failed and there's no stdout, report the error
+	if execErr != nil && len(stdout) == 0 {
+		errMsg := string(stderr)
+		if errMsg == "" {
+			errMsg = execErr.Error()
+		}
+		return coretypes.CompletionResponse{}, fmt.Errorf("claude-code process failed: %s", errMsg)
+	}
+
+	// Parse JSON output
+	var output claudeJSONOutput
+	if err := json.Unmarshal(stdout, &output); err != nil {
+		// Fall back to treating stdout as plain text
+		log.Warn("claude_code_json_parse_failed", "err", err, "stdout_len", len(stdout))
+		return coretypes.CompletionResponse{
+			Content:      strings.TrimSpace(string(stdout)),
+			FinishReason: "stop",
+		}, nil
+	}
+
+	if output.IsError {
+		return coretypes.CompletionResponse{}, fmt.Errorf("claude-code error: %s", output.Result)
+	}
+
+	// Extract text from result field or content blocks
+	content := output.Result
+	if content == "" && len(output.ContentBlock) > 0 {
+		var parts []string
+		for _, block := range output.ContentBlock {
+			if block.Type == "text" && block.Text != "" {
+				parts = append(parts, block.Text)
+			}
+		}
+		content = strings.Join(parts, "\n")
+	}
+
+	finishReason := "stop"
+	if execErr != nil {
+		finishReason = "error"
+	}
+
+	log.Info("claude_code_response",
+		"content_len", len(content),
+		"input_tokens", output.Usage.InputTokens,
+		"output_tokens", output.Usage.OutputTokens,
+		"num_turns", output.NumTurns,
+		"cost_usd", output.TotalCost,
+		"elapsed_ms", elapsed.Milliseconds(),
+	)
+
+	return coretypes.CompletionResponse{
+		Content: content,
+		Usage: coretypes.TokenUsage{
+			InputTokens:  output.Usage.InputTokens,
+			OutputTokens: output.Usage.OutputTokens,
+			TotalTokens:  output.Usage.InputTokens + output.Usage.OutputTokens,
+		},
+		FinishReason: finishReason,
+	}, nil
+}
+
+// filterEnv returns a copy of environ with the named keys removed.
+func filterEnv(environ []string, keys ...string) []string {
+	out := make([]string, 0, len(environ))
+	for _, e := range environ {
+		skip := false
+		for _, k := range keys {
+			if strings.HasPrefix(e, k+"=") {
+				skip = true
+				break
+			}
+		}
+		if !skip {
+			out = append(out, e)
+		}
+	}
+	return out
+}
@@ -0,0 +1,402 @@
+package llm
+
+import (
+	"encoding/json"
+	"errors"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/enmanuel/agents/internal/config"
+	coretypes "github.com/enmanuel/agents/pkg/llm"
+)
+
+var discardLog = slog.New(slog.NewTextHandler(io.Discard, nil))
+
+// ── buildClaudeArgs ──────────────────────────────────────────────────────
+
+func TestBuildClaudeArgs_Minimal(t *testing.T) {
+	cfg := config.ClaudeCodeCfg{}
+	req := coretypes.CompletionRequest{}
+
+	args := buildClaudeArgs(cfg, req)
+
+	// Must always start with --print --output-format json
+	want := []string{"--print", "--output-format", "json"}
+	if len(args) != len(want) {
+		t.Fatalf("got %v, want %v", args, want)
+	}
+	for i := range want {
+		if args[i] != want[i] {
+			t.Errorf("args[%d] = %q, want %q", i, args[i], want[i])
+		}
+	}
+}
+
+func TestBuildClaudeArgs_AllOptions(t *testing.T) {
+	cfg := config.ClaudeCodeCfg{
+		Model:          "sonnet",
+		FallbackModel:  "haiku",
+		PermissionMode: "bypassPermissions",
+		AllowedTools:   []string{"Bash(git:*)", "Read"},
+		SessionID:      "abc-123",
+		AddDirs:        []string{"/tmp/extra"},
+	}
+	req := coretypes.CompletionRequest{
+		SystemPrompt: "You are a helpful bot",
+	}
+
+	args := buildClaudeArgs(cfg, req)
+
+	assertContains(t, args, "--system-prompt", "You are a helpful bot")
+	assertContains(t, args, "--model", "sonnet")
+	assertContains(t, args, "--fallback-model", "haiku")
+	assertContains(t, args, "--permission-mode", "bypassPermissions")
+	assertContains(t, args, "--session-id", "abc-123")
+	assertContains(t, args, "--add-dir", "/tmp/extra")
+	assertContains(t, args, "--allowedTools", "Bash(git:*)")
+}
+
+func TestBuildClaudeArgs_DisableTools(t *testing.T) {
+	cfg := config.ClaudeCodeCfg{
+		DisableTools: true,
+		AllowedTools: []string{"Bash"}, // should be ignored
+	}
+	req := coretypes.CompletionRequest{}
+
+	args := buildClaudeArgs(cfg, req)
+
+	assertContains(t, args, "--tools", "")
+	// --allowedTools must NOT appear when disable_tools is set
+	for _, a := range args {
+		if a == "--allowedTools" {
+			t.Error("--allowedTools should not appear when DisableTools=true")
+		}
+	}
+}
+
+func TestBuildClaudeArgs_DisallowedTools(t *testing.T) {
+	cfg := config.ClaudeCodeCfg{
+		DisallowedTools: []string{"Edit", "Write"},
+	}
+	req := coretypes.CompletionRequest{}
+
+	args := buildClaudeArgs(cfg, req)
+	assertContains(t, args, "--disallowedTools", "Edit")
+}
+
+// ── flattenMessages ──────────────────────────────────────────────────────
+
+func TestFlattenMessages_Empty(t *testing.T) {
+	got := flattenMessages(nil)
+	if got != "" {
+		t.Errorf("expected empty, got %q", got)
+	}
+}
+
+func TestFlattenMessages_MultiRole(t *testing.T) {
+	msgs := []coretypes.Message{
+		{Role: coretypes.RoleUser, Content: "hello"},
+		{Role: coretypes.RoleAssistant, Content: "hi there"},
+		{Role: coretypes.RoleTool, Content: `{"time":"12:00"}`},
+		{Role: coretypes.RoleUser, Content: "thanks"},
+	}
+
+	got := flattenMessages(msgs)
+
+	expects := []string{
+		"User: hello",
+		"Assistant: hi there",
+		`Tool result: {"time":"12:00"}`,
+		"User: thanks",
+	}
+	for _, e := range expects {
+		if !contains(got, e) {
+			t.Errorf("missing %q in:\n%s", e, got)
+		}
+	}
+}
+
+func TestFlattenMessages_SkipsSystem(t *testing.T) {
+	msgs := []coretypes.Message{
+		{Role: coretypes.RoleSystem, Content: "system prompt"},
+		{Role: coretypes.RoleUser, Content: "hello"},
+	}
+
+	got := flattenMessages(msgs)
+	if contains(got, "system prompt") {
+		t.Error("system messages should not appear in flattened output")
+	}
+	if !contains(got, "User: hello") {
+		t.Error("user message missing")
+	}
+}
+
+// ── parseClaudeOutput ────────────────────────────────────────────────────
+
+func TestParseClaudeOutput_Success(t *testing.T) {
+	output := claudeJSONOutput{
+		Type:      "result",
+		Subtype:   "success",
+		IsError:   false,
+		NumTurns:  1,
+		Result:    "Hello! I'm Claude.",
+		TotalCost: 0.025,
+		Usage:     claudeUsage{InputTokens: 10, OutputTokens: 50},
+	}
+	stdout, _ := json.Marshal(output)
+
+	resp, err := parseClaudeOutput(stdout, nil, nil, 2*time.Second, discardLog)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.Content != "Hello! I'm Claude." {
+		t.Errorf("content = %q, want %q", resp.Content, "Hello! I'm Claude.")
+	}
+	if resp.Usage.InputTokens != 10 {
+		t.Errorf("input tokens = %d, want 10", resp.Usage.InputTokens)
+	}
+	if resp.Usage.OutputTokens != 50 {
+		t.Errorf("output tokens = %d, want 50", resp.Usage.OutputTokens)
+	}
+	if resp.Usage.TotalTokens != 60 {
+		t.Errorf("total tokens = %d, want 60", resp.Usage.TotalTokens)
+	}
+	if resp.FinishReason != "stop" {
+		t.Errorf("finish reason = %q, want %q", resp.FinishReason, "stop")
+	}
+}
+
+func TestParseClaudeOutput_ErrorResponse(t *testing.T) {
+	output := claudeJSONOutput{
+		IsError: true,
+		Result:  "Invalid API key",
+	}
+	stdout, _ := json.Marshal(output)
+
+	_, err := parseClaudeOutput(stdout, nil, nil, time.Second, discardLog)
+	if err == nil {
+		t.Fatal("expected error for IsError=true")
+	}
+	if !contains(err.Error(), "Invalid API key") {
+		t.Errorf("error = %q, should contain 'Invalid API key'", err.Error())
+	}
+}
+
+func TestParseClaudeOutput_ProcessFailedNoStdout(t *testing.T) {
+	_, err := parseClaudeOutput(nil, []byte("unknown option\n"), errors.New("exit 1"), time.Second, discardLog)
+	if err == nil {
+		t.Fatal("expected error when process fails with no stdout")
+	}
+	if !contains(err.Error(), "unknown option") {
+		t.Errorf("error = %q, should contain stderr message", err.Error())
+	}
+}
+
+func TestParseClaudeOutput_ProcessFailedNoStderr(t *testing.T) {
+	_, err := parseClaudeOutput(nil, nil, errors.New("exit 1"), time.Second, discardLog)
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if !contains(err.Error(), "exit 1") {
+		t.Errorf("error = %q, should contain exec error", err.Error())
+	}
+}
+
+func TestParseClaudeOutput_FallbackPlainText(t *testing.T) {
+	// Non-JSON stdout should be treated as plain text
+	resp, err := parseClaudeOutput([]byte("just plain text\n"), nil, nil, time.Second, discardLog)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.Content != "just plain text" {
+		t.Errorf("content = %q, want %q", resp.Content, "just plain text")
+	}
+}
+
+func TestParseClaudeOutput_ContentBlocks(t *testing.T) {
+	output := claudeJSONOutput{
+		Result: "", // empty result, content in blocks
+		ContentBlock: []claudeContent{
+			{Type: "text", Text: "First part."},
+			{Type: "text", Text: "Second part."},
+		},
+		Usage: claudeUsage{InputTokens: 5, OutputTokens: 20},
+	}
+	stdout, _ := json.Marshal(output)
+
+	resp, err := parseClaudeOutput(stdout, nil, nil, time.Second, discardLog)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.Content != "First part.\nSecond part." {
+		t.Errorf("content = %q, want joined blocks", resp.Content)
+	}
+}
+
+func TestParseClaudeOutput_ExecErrWithStdout(t *testing.T) {
+	// Process failed but produced valid JSON output — should parse and set finish_reason=error
+	output := claudeJSONOutput{
+		Result: "partial answer",
+		Usage:  claudeUsage{InputTokens: 3, OutputTokens: 10},
+	}
+	stdout, _ := json.Marshal(output)
+
+	resp, err := parseClaudeOutput(stdout, nil, errors.New("timeout"), time.Second, discardLog)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.FinishReason != "error" {
+		t.Errorf("finish reason = %q, want %q", resp.FinishReason, "error")
+	}
+	if resp.Content != "partial answer" {
+		t.Errorf("content = %q", resp.Content)
+	}
+}
+
+// ── filterEnv ────────────────────────────────────────────────────────────
+
+func TestFilterEnv_RemovesSingleKey(t *testing.T) {
+	env := []string{
+		"HOME=/home/user",
+		"ANTHROPIC_API_KEY=sk-secret",
+		"PATH=/usr/bin",
+	}
+
+	got := filterEnv(env, "ANTHROPIC_API_KEY")
+
+	if len(got) != 2 {
+		t.Fatalf("expected 2 entries, got %d: %v", len(got), got)
+	}
+	for _, e := range got {
+		if contains(e, "ANTHROPIC_API_KEY") {
+			t.Errorf("ANTHROPIC_API_KEY should have been removed: %v", got)
+		}
+	}
+}
+
+func TestFilterEnv_RemovesMultipleKeys(t *testing.T) {
+	env := []string{
+		"HOME=/home/user",
+		"ANTHROPIC_API_KEY=sk-secret",
+		"OPENAI_API_KEY=sk-openai",
+		"PATH=/usr/bin",
+	}
+
+	got := filterEnv(env, "ANTHROPIC_API_KEY", "OPENAI_API_KEY")
+
+	if len(got) != 2 {
+		t.Fatalf("expected 2 entries, got %d: %v", len(got), got)
+	}
+}
+
+func TestFilterEnv_NoMatchKeepsAll(t *testing.T) {
+	env := []string{"HOME=/home/user", "PATH=/usr/bin"}
+
+	got := filterEnv(env, "NONEXISTENT")
+
+	if len(got) != 2 {
+		t.Fatalf("expected 2, got %d", len(got))
+	}
+}
+
+func TestFilterEnv_PrefixSafety(t *testing.T) {
+	// ANTHROPIC_API_KEY_V2 should NOT be removed when filtering ANTHROPIC_API_KEY
+	env := []string{
+		"ANTHROPIC_API_KEY=secret",
+		"ANTHROPIC_API_KEY_V2=other",
+	}
+
+	got := filterEnv(env, "ANTHROPIC_API_KEY")
+
+	if len(got) != 1 {
+		t.Fatalf("expected 1, got %d: %v", len(got), got)
+	}
+	if got[0] != "ANTHROPIC_API_KEY_V2=other" {
+		t.Errorf("wrong entry kept: %q", got[0])
+	}
+}
+
+// ── resolveWorkDir ──────────────────────────────────────────────────────
+
+func TestResolveWorkDir_EmptyCreatesTempDir(t *testing.T) {
+	dir := resolveWorkDir("", discardLog)
+	if dir == "" {
+		t.Fatal("expected a temp directory, got empty string")
+	}
+	defer os.RemoveAll(dir)
+
+	if !strings.Contains(dir, "claude-agent-") {
+		t.Errorf("temp dir %q should contain 'claude-agent-' prefix", dir)
+	}
+
+	info, err := os.Stat(dir)
+	if err != nil {
+		t.Fatalf("temp dir should exist: %v", err)
+	}
+	if !info.IsDir() {
+		t.Error("temp dir should be a directory")
+	}
+}
+
+func TestResolveWorkDir_ConfiguredValueUsed(t *testing.T) {
+	want := filepath.Join(t.TempDir(), "custom-workdir")
+
+	got := resolveWorkDir(want, discardLog)
+
+	if got != want {
+		t.Errorf("got %q, want %q", got, want)
+	}
+
+	info, err := os.Stat(got)
+	if err != nil {
+		t.Fatalf("configured dir should be created: %v", err)
+	}
+	if !info.IsDir() {
+		t.Error("configured dir should be a directory")
+	}
+}
+
+func TestResolveWorkDir_ConfiguredAlreadyExists(t *testing.T) {
+	want := t.TempDir() // already exists
+
+	got := resolveWorkDir(want, discardLog)
+
+	if got != want {
+		t.Errorf("got %q, want %q", got, want)
+	}
+}
+
+// ── helpers ──────────────────────────────────────────────────────────────
+
+func contains(s, substr string) bool {
+	return len(s) >= len(substr) && (s == substr || len(substr) == 0 ||
+		(len(s) > 0 && stringContains(s, substr)))
+}
+
+func stringContains(s, sub string) bool {
+	for i := 0; i <= len(s)-len(sub); i++ {
+		if s[i:i+len(sub)] == sub {
+			return true
+		}
+	}
+	return false
+}
+
+func assertContains(t *testing.T, args []string, flag, value string) {
+	t.Helper()
+	for i, a := range args {
+		if a == flag && i+1 < len(args) && args[i+1] == value {
+			return
+		}
+		// For --tools "" where value is empty string
+		if a == flag && value == "" && i+1 < len(args) && args[i+1] == "" {
+			return
+		}
+	}
+	t.Errorf("args %v missing %s %q", args, flag, value)
+}
@@ -0,0 +1,51 @@
+package llm
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	"github.com/enmanuel/agents/internal/config"
+	coretypes "github.com/enmanuel/agents/pkg/llm"
+)
+
+// FromConfig builds a CompleteFunc from an LLMProviderCfg.
+func FromConfig(cfg config.LLMProviderCfg, log *slog.Logger) (coretypes.CompleteFunc, error) {
+	log.Info("llm_provider_init", "provider", cfg.Provider, "model", cfg.Model)
+	switch cfg.Provider {
+	case "anthropic":
+		return NewAnthropicComplete(cfg.APIKeyEnv, cfg.BaseURL, log), nil
+	case "openai":
+		return NewOpenAIComplete(cfg.APIKeyEnv, cfg.BaseURL, log), nil
+	case "ollama":
+		base := cfg.BaseURL
+		if base == "" {
+			base = "http://localhost:11434/v1"
+		}
+		return NewOpenAIComplete("OLLAMA_API_KEY", base, log), nil
+	case "claude-code":
+		return NewClaudeCodeComplete(cfg.ClaudeCode, log), nil
+	default:
+		return nil, fmt.Errorf("unknown LLM provider: %s", cfg.Provider)
+	}
+}
+
+// WithFallback wraps primary with a fallback CompleteFunc.
+// If primary returns an error, fallback is tried with the fallback config's model.
+func WithFallback(primary, fallback coretypes.CompleteFunc, fallbackCfg config.LLMProviderCfg, log *slog.Logger) coretypes.CompleteFunc {
+	return func(ctx context.Context, req coretypes.CompletionRequest) (coretypes.CompletionResponse, error) {
+		resp, err := primary(ctx, req)
+		if err != nil {
+			log.Warn("llm_fallback_triggered", "primary_err", err)
+			// Override request fields with fallback config values
+			if fallbackCfg.Model != "" {
+				req.Model = fallbackCfg.Model
+			}
+			if fallbackCfg.MaxTokens > 0 {
+				req.MaxTokens = fallbackCfg.MaxTokens
+			}
+			return fallback(ctx, req)
+		}
+		return resp, nil
+	}
+}
@@ -0,0 +1,169 @@
+package llm
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"os"
+	"time"
+
+	openai "github.com/sashabaranov/go-openai"
+
+	coretypes "github.com/enmanuel/agents/pkg/llm"
+	"github.com/enmanuel/agents/shell/logger"
+)
+
+// NewOpenAIComplete returns a CompleteFunc backed by the OpenAI-compatible API.
+// Works with OpenAI, Ollama, vLLM, LMStudio — just change baseURL.
+func NewOpenAIComplete(apiKeyEnv, baseURL string, log *slog.Logger) coretypes.CompleteFunc {
+	return func(ctx context.Context, req coretypes.CompletionRequest) (coretypes.CompletionResponse, error) {
+		apiKey := os.Getenv(apiKeyEnv)
+		if apiKey == "" {
+			apiKey = "ollama" // Ollama doesn't require a real key
+		}
+
+		cfg := openai.DefaultConfig(apiKey)
+		if baseURL != "" {
+			cfg.BaseURL = baseURL
+		}
+		client := openai.NewClientWithConfig(cfg)
+
+		msgs := make([]openai.ChatCompletionMessage, 0, len(req.Messages)+1)
+		if req.SystemPrompt != "" {
+			msgs = append(msgs, openai.ChatCompletionMessage{
+				Role:    openai.ChatMessageRoleSystem,
+				Content: req.SystemPrompt,
+			})
+		}
+		for _, m := range req.Messages {
+			msgs = append(msgs, toOpenAIMessage(m))
+		}
+
+		openReq := openai.ChatCompletionRequest{
+			Model:       req.Model,
+			Messages:    msgs,
+			MaxTokens:   req.MaxTokens,
+			Temperature: float32(req.Temperature),
+		}
+
+		// Add tools if present
+		if len(req.Tools) > 0 {
+			openReq.Tools = toOpenAITools(req.Tools)
+		}
+
+		log.Info("llm_request",
+			"provider", "openai",
+			"model", req.Model,
+			"messages", len(req.Messages),
+			"tools", len(req.Tools),
+		)
+
+		start := time.Now()
+		resp, err := client.CreateChatCompletion(ctx, openReq)
+		if err != nil {
+			ms := time.Since(start).Milliseconds()
+			log.Error("llm_error", "provider", "openai", logger.FieldDurationMS, ms, "err", err)
+			return coretypes.CompletionResponse{}, fmt.Errorf("openai completion: %w", err)
+		}
+		ms := time.Since(start).Milliseconds()
+
+		if len(resp.Choices) == 0 {
+			log.Error("llm_error", "provider", "openai", logger.FieldDurationMS, ms, "err", "empty choices")
+			return coretypes.CompletionResponse{}, fmt.Errorf("openai: empty choices")
+		}
+
+		choice := resp.Choices[0]
+		var toolCalls []coretypes.ToolCall
+		for _, tc := range choice.Message.ToolCalls {
+			toolCalls = append(toolCalls, coretypes.ToolCall{
+				ID:        tc.ID,
+				Name:      tc.Function.Name,
+				Arguments: tc.Function.Arguments,
+			})
+		}
+
+		log.Info("llm_response",
+			"provider", "openai",
+			"model", req.Model,
+			logger.FieldDurationMS, ms,
+			logger.FieldTokensUsed, resp.Usage.TotalTokens,
+			"input_tokens", resp.Usage.PromptTokens,
+			"output_tokens", resp.Usage.CompletionTokens,
+			"tool_calls", len(toolCalls),
+			"finish_reason", string(choice.FinishReason),
+		)
+
+		return coretypes.CompletionResponse{
+			Content:      choice.Message.Content,
+			ToolCalls:    toolCalls,
+			FinishReason: string(choice.FinishReason),
+			Usage: coretypes.TokenUsage{
+				InputTokens:  resp.Usage.PromptTokens,
+				OutputTokens: resp.Usage.CompletionTokens,
+				TotalTokens:  resp.Usage.TotalTokens,
+			},
+		}, nil
+	}
+}
+
+// toOpenAIMessage converts a core Message to an OpenAI ChatCompletionMessage.
+func toOpenAIMessage(m coretypes.Message) openai.ChatCompletionMessage {
+	role := openai.ChatMessageRoleUser
+	switch m.Role {
+	case coretypes.RoleAssistant:
+		role = openai.ChatMessageRoleAssistant
+	case coretypes.RoleSystem:
+		role = openai.ChatMessageRoleSystem
+	case coretypes.RoleTool:
+		role = openai.ChatMessageRoleTool
+	}
+
+	msg := openai.ChatCompletionMessage{
+		Role:       role,
+		Content:    m.Content,
+		ToolCallID: m.ToolCallID,
+	}
+
+	// Assistant messages with tool calls
+	if m.Role == coretypes.RoleAssistant && len(m.ToolCalls) > 0 {
+		msg.ToolCalls = make([]openai.ToolCall, len(m.ToolCalls))
+		for i, tc := range m.ToolCalls {
+			msg.ToolCalls[i] = openai.ToolCall{
+				ID:   tc.ID,
+				Type: openai.ToolTypeFunction,
+				Function: openai.FunctionCall{
+					Name:      tc.Name,
+					Arguments: tc.Arguments,
+				},
+			}
+		}
+	}
+
+	return msg
+}
+
+// toOpenAITools converts core ToolSpecs to OpenAI Tool format.
+func toOpenAITools(specs []coretypes.ToolSpec) []openai.Tool {
+	tools := make([]openai.Tool, len(specs))
+	for i, s := range specs {
+		tools[i] = openai.Tool{
+			Type: openai.ToolTypeFunction,
+			Function: &openai.FunctionDefinition{
+				Name:        s.Name,
+				Description: s.Description,
+				Parameters:  json.RawMessage(marshalSchema(s.InputSchema)),
+			},
+		}
+	}
+	return tools
+}
+
+// marshalSchema marshals a JSON schema map to bytes. Falls back to empty object.
+func marshalSchema(schema map[string]any) []byte {
+	b, err := json.Marshal(schema)
+	if err != nil {
+		return []byte("{}")
+	}
+	return b
+}