diff --git a/cmd/devicemesh-mcp/bridge.go b/cmd/devicemesh-mcp/bridge.go new file mode 100644 index 0000000..4f1b335 --- /dev/null +++ b/cmd/devicemesh-mcp/bridge.go @@ -0,0 +1,165 @@ +// bridge.go — adapter that registers every devicemesh.ToolSpec from a +// ToolRegistry as an MCP tool on a mcp-go server.MCPServer. +// +// Tool name preservation: we register tools under their dotted devicemesh +// name verbatim ("exec", "shell.eval", "fs.read"). claude exposes them to +// the model as `mcp____` (the MCP transport prefixes +// automatically). +// +// Schema: ToolSpec.InputSchema is already a JSON-Schema-lite map. We +// marshal it to a json.RawMessage and feed it via mcp.WithRawInputSchema so +// the LLM sees the full structure (required fields, enums, descriptions). +// +// Handler: each tool's handler invokes reg.Call(ctx, name, args). The +// registry runs ValidateInput → ArgMapping → HTTP dispatch → ResultMapping +// just like the in-process tool-use path. The result is JSON-encoded into +// an MCP text-content block. Errors become NewToolResultError so the model +// can self-correct on the next turn. +package main + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "github.com/enmanuel/agents/pkg/tools/devicemesh" +) + +// RegisterToolBridge walks reg and registers each spec on srv. Returns the +// first registration error, if any. Pure data adapter except for the slog +// debug events. +func RegisterToolBridge(srv *server.MCPServer, reg *devicemesh.ToolRegistry, logger *slog.Logger) error { + if srv == nil { + return fmt.Errorf("RegisterToolBridge: srv is nil") + } + if reg == nil { + return fmt.Errorf("RegisterToolBridge: reg is nil") + } + for _, spec := range reg.List() { + tool, err := buildMCPTool(spec) + if err != nil { + return fmt.Errorf("build MCP tool %q: %w", spec.Name, err) + } + handler := makeHandler(reg, spec, logger) + srv.AddTool(tool, handler) + if logger != nil { + logger.Debug("registered MCP tool", + "name", spec.Name, + "capability", spec.Capability, + "requires_approval", spec.RequiresApproval, + ) + } + } + return nil +} + +// buildMCPTool transforms a devicemesh.ToolSpec into an mcp.Tool with the +// raw input schema attached. The description is augmented with the +// capability marker so the model knows the tool is remote. +// +// We use mcp.NewToolWithRawSchema (not NewTool + WithRawInputSchema) because +// NewTool initialises a default ToolInputSchema with Type="object", which +// then conflicts at marshal time with our RawInputSchema (the SDK rejects +// having both set — see mcp/tools.go ::Tool.MarshalJSON). +func buildMCPTool(spec devicemesh.ToolSpec) (mcp.Tool, error) { + desc := spec.Description + if spec.Capability != "" { + desc = fmt.Sprintf("%s [device_mesh: %s]", desc, spec.Capability) + } + if spec.RequiresApproval { + desc += " (approval required)" + } + + if spec.InputSchema == nil { + // Fall back to a minimal "no params" schema so the tool is still + // callable. Should not happen for the builtins (they all set + // InputSchema), but the adapter must not panic on third-party specs. + return mcp.NewToolWithRawSchema(spec.Name, desc, + json.RawMessage(`{"type":"object","properties":{}}`)), nil + } + raw, err := json.Marshal(spec.InputSchema) + if err != nil { + return mcp.Tool{}, fmt.Errorf("marshal input schema: %w", err) + } + return mcp.NewToolWithRawSchema(spec.Name, desc, raw), nil +} + +// makeHandler returns a server.ToolHandlerFunc bound to a single spec. The +// closure captures the registry so the HTTP dispatch goes through the same +// validate → map → call pipeline as the in-process path. +func makeHandler(reg *devicemesh.ToolRegistry, spec devicemesh.ToolSpec, logger *slog.Logger) server.ToolHandlerFunc { + return func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { + args := req.GetArguments() + if args == nil { + args = map[string]any{} + } + if logger != nil { + logger.Debug("tools/call received", + "tool", spec.Name, + "capability", spec.Capability, + "arg_keys", keysOf(args), + ) + } + + result, err := reg.Call(ctx, spec.Name, args) + if err != nil { + if logger != nil { + logger.Warn("tools/call failed", + "tool", spec.Name, + "err", err.Error(), + ) + } + // NewToolResultError returns a CallToolResult with isError=true. + // Returning (result, nil) lets the model see and self-correct + // instead of treating it as a transport-level failure. + return mcp.NewToolResultError(err.Error()), nil + } + + text := encodeResult(result) + if logger != nil { + logger.Debug("tools/call ok", + "tool", spec.Name, + "result_len", len(text), + ) + } + return mcp.NewToolResultText(text), nil + } +} + +// encodeResult converts a tool result (any) to the string payload the model +// will see. Mirrors devicemesh.AdaptTool's formatToolResult so MCP and the +// in-process path produce consistent transcripts. +// +// - nil → "" +// - string → returned as-is (avoids double-encoding JSON strings) +// - other → json.Marshal; on failure fall back to fmt.Sprintf so we never +// drop data on the floor. +func encodeResult(v any) string { + if v == nil { + return "" + } + if s, ok := v.(string); ok { + return s + } + b, err := json.Marshal(v) + if err != nil { + return fmt.Sprintf("%v", v) + } + return string(b) +} + +// keysOf returns the sorted keys of a map for log context. Pure helper. +func keysOf(m map[string]any) []string { + if len(m) == 0 { + return nil + } + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + return out +} diff --git a/cmd/devicemesh-mcp/integration_test.go b/cmd/devicemesh-mcp/integration_test.go new file mode 100644 index 0000000..e7e63b7 --- /dev/null +++ b/cmd/devicemesh-mcp/integration_test.go @@ -0,0 +1,177 @@ +package main + +import ( + "bufio" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" +) + +// TestIntegrationBinarySubprocess builds the binary (or uses an existing +// bin/devicemesh-mcp) and exercises a full initialize -> tools/list -> +// tools/call sequence over a real OS pipe. This validates that the same +// code path that claude will invoke (subprocess + stdio) works end-to-end. +// +// Skipped when the binary cannot be built or located, so the rest of the +// unit tests still run cleanly on minimal sandboxes. +func TestIntegrationBinarySubprocess(t *testing.T) { + if testing.Short() { + t.Skip("integration test skipped in -short mode") + } + + binPath := buildOrLocateBinary(t) + if binPath == "" { + t.Skip("cannot build/locate devicemesh-mcp binary") + } + + mock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body := map[string]any{} + _ = json.NewDecoder(r.Body).Decode(&body) + _ = json.NewEncoder(w).Encode(map[string]any{ + "request_id": body["request_id"], + "ok": true, + "duration_ms": 7, + "result": map[string]any{ + "stdout": "subprocess hi", + "stderr": "", + "exit_code": 0, + }, + }) + })) + defer mock.Close() + + cmd := exec.Command(binPath, + "--device-agent", mock.URL, + "--mode", "user", + "--server-name", "devicemesh", + ) + + stdin, err := cmd.StdinPipe() + if err != nil { + t.Fatalf("stdin pipe: %v", err) + } + stdout, err := cmd.StdoutPipe() + if err != nil { + t.Fatalf("stdout pipe: %v", err) + } + cmd.Stderr = io.Discard + + if err := cmd.Start(); err != nil { + t.Fatalf("start: %v", err) + } + defer func() { + _ = stdin.Close() + _ = cmd.Process.Kill() + _ = cmd.Wait() + }() + + // Real MCP clients send `notifications/initialized` after the + // initialize response is received before sending any other requests. + // We mirror the same sequence — without it the server may queue + // follow-up frames behind the not-yet-initialized session. + frames := []string{ + initFrame(1), + notifInitializedFrame(), + toolsListFrame(2), + toolsCallFrame(3, "exec", map[string]any{"argv": []any{"echo", "subprocess"}}), + } + for _, f := range frames { + if !strings.HasSuffix(f, "\n") { + f += "\n" + } + if _, err := stdin.Write([]byte(f)); err != nil { + t.Fatalf("write frame: %v", err) + } + } + + // Read responses (up to 3 with timeout). + reader := bufio.NewReader(stdout) + deadline := time.After(5 * time.Second) + responses := make([]map[string]any, 0, 3) + + readCh := make(chan map[string]any, 4) + go func() { + defer close(readCh) + dec := json.NewDecoder(reader) + for { + var msg map[string]any + if err := dec.Decode(&msg); err != nil { + return + } + readCh <- msg + } + }() + +readLoop: + for { + select { + case msg, ok := <-readCh: + if !ok { + break readLoop + } + responses = append(responses, msg) + if len(responses) >= 3 { + break readLoop + } + case <-deadline: + break readLoop + } + } + + if len(responses) < 3 { + t.Fatalf("expected 3 responses, got %d: %v", len(responses), responses) + } + + // Validate the tools/call (id=3) response. + r := responses[2] + if r["id"] != float64(3) { + t.Errorf("expected id=3, got %v", r["id"]) + } + result, _ := r["result"].(map[string]any) + contents, _ := result["content"].([]any) + if len(contents) == 0 { + t.Fatalf("missing content in tools/call response: %v", r) + } + first, _ := contents[0].(map[string]any) + text, _ := first["text"].(string) + if !strings.Contains(text, "subprocess hi") { + t.Errorf("expected text to contain 'subprocess hi', got %q", text) + } +} + +// buildOrLocateBinary returns the absolute path to bin/devicemesh-mcp, +// building it under a temp dir if it is missing. Returns "" if neither +// option works (the test then skips). +func buildOrLocateBinary(t *testing.T) string { + t.Helper() + // First, try ../../bin/devicemesh-mcp relative to this file (CWD when + // `go test ./cmd/devicemesh-mcp/` is the cmd dir itself). + candidates := []string{ + filepath.Join("..", "..", "bin", "devicemesh-mcp"), + filepath.Join("bin", "devicemesh-mcp"), + } + for _, c := range candidates { + if abs, err := filepath.Abs(c); err == nil { + if st, err := os.Stat(abs); err == nil && !st.IsDir() { + return abs + } + } + } + // Build into a tmpdir. + tmpDir := t.TempDir() + out := filepath.Join(tmpDir, "devicemesh-mcp") + cmd := exec.Command("/usr/local/go/bin/go", "build", "-tags", "goolm", "-o", out, ".") + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + t.Logf("build failed: %v", err) + return "" + } + return out +} diff --git a/cmd/devicemesh-mcp/main.go b/cmd/devicemesh-mcp/main.go new file mode 100644 index 0000000..c760b03 --- /dev/null +++ b/cmd/devicemesh-mcp/main.go @@ -0,0 +1,208 @@ +// Command devicemesh-mcp is a per-agent MCP server (stdio) that exposes the +// agents_and_robots device-mesh tool catalog (exec, shell.eval, fs.*, git.*, +// pkg.*, proc.*, docker.*) to a parent `claude -p` subprocess. +// +// Architecture (issue 0145): +// +// claude -p +// ├─ spawns this binary as child via --mcp-config +// ├─ JSON-RPC over stdio +// ├─ initialize / tools/list / tools/call / ping / notifications/initialized +// └─ tool names exposed as `mcp____` to the model +// +// Flags: +// +// --device-agent required — http://host:port of the remote device_agent +// --mode user|sudo|all default user — filters which builtin tools are registered +// --tools-allowed optional — narrows the catalog after mode filtering +// --server-name default "devicemesh" — only used for logs and serverInfo +// +// Environment: +// +// MCP_DEBUG_LOG optional — write structured logs to this file +// (stderr is reserved by claude for the MCP transport +// framing in some setups, so we prefer a file sink) +// +// Returns non-zero on flag parse error or stdio listen error. +package main + +import ( + "flag" + "fmt" + "io" + "log/slog" + "os" + "strings" + "time" + + "github.com/mark3labs/mcp-go/server" + + "github.com/enmanuel/agents/pkg/tools/devicemesh" +) + +// version is overwritten via -ldflags at build time when needed. Kept simple +// so the binary stays self-contained. +var version = "0.1.0" + +func main() { + var ( + deviceAgentURL string + mode string + toolsAllowed string + serverName string + showVersion bool + ) + + flag.StringVar(&deviceAgentURL, "device-agent", "", "URL of the device_agent (http://host:port). Required.") + flag.StringVar(&mode, "mode", "user", "Tool registration mode: user|sudo|all") + flag.StringVar(&toolsAllowed, "tools-allowed", "", "CSV of tool names to keep after mode filtering. Empty = keep all.") + flag.StringVar(&serverName, "server-name", "devicemesh", "MCP server name (used in serverInfo and log context)") + flag.BoolVar(&showVersion, "version", false, "Print version and exit") + flag.Parse() + + if showVersion { + fmt.Fprintf(os.Stdout, "devicemesh-mcp %s\n", version) + return + } + + logger := newLogger() + logger.Info("devicemesh-mcp starting", + "version", version, + "server_name", serverName, + "mode", mode, + "device_agent_url", deviceAgentURL, + "tools_allowed", toolsAllowed, + ) + + if deviceAgentURL == "" { + logger.Error("--device-agent is required") + fmt.Fprintln(os.Stderr, "fatal: --device-agent is required") + os.Exit(2) + } + + // Build the per-process devicemesh registry. Mirrors the launcher's + // buildDeviceMeshRegistry but driven by CLI flags instead of YAML. + reg, err := buildRegistry(deviceAgentURL, mode, splitCSV(toolsAllowed)) + if err != nil { + logger.Error("build registry failed", "err", err) + fmt.Fprintf(os.Stderr, "fatal: %s\n", err) + os.Exit(1) + } + logger.Info("registry ready", "tool_count", reg.Len(), "names", reg.Names()) + + // Build the MCP server, wire every devicemesh tool as an MCP tool, and + // serve over stdio. ServeStdio handles initialize / tools/list / + // tools/call / ping / notifications/initialized for us — the bridge only + // has to register tools. + srv := server.NewMCPServer(serverName, version) + if err := RegisterToolBridge(srv, reg, logger); err != nil { + logger.Error("register tool bridge failed", "err", err) + fmt.Fprintf(os.Stderr, "fatal: %s\n", err) + os.Exit(1) + } + + logger.Info("starting stdio server") + if err := server.ServeStdio(srv); err != nil { + // Stdin EOF is the normal shutdown signal when the claude parent + // exits; treat it as a clean exit. + if isCleanShutdown(err) { + logger.Info("stdio server exited cleanly", "err", err) + return + } + logger.Error("stdio server error", "err", err) + fmt.Fprintf(os.Stderr, "fatal: %s\n", err) + os.Exit(1) + } +} + +// buildRegistry constructs the devicemesh ToolRegistry from CLI flags. Pure +// in the sense that it does no I/O — RegisterBuiltins + FilterByAllowed are +// data shuffling, the HTTP transport only fires when a tool is actually +// called via reg.Call. Exposed for tests. +func buildRegistry(deviceAgentURL, modeStr string, allowed []string) (*devicemesh.ToolRegistry, error) { + client := devicemesh.NewClient(deviceAgentURL) + // Conservative timeout: stdio frames from claude can sit in our queue for + // a while while the model thinks. Per-call HTTP timeout stays at the + // devicemesh default (30s) which is fine for exec/shell.eval. + client.Timeout = 60 * time.Second + + mode := parseMode(modeStr) + reg := devicemesh.NewToolRegistry(client) + names := devicemesh.RegisterBuiltins(reg, mode) + if len(names) == 0 { + return nil, fmt.Errorf("RegisterBuiltins yielded zero tools for mode=%q", modeStr) + } + + if len(allowed) > 0 { + filtered := devicemesh.FilterByAllowed(reg, allowed) + if filtered.Len() == 0 { + return nil, fmt.Errorf("FilterByAllowed yielded zero tools (allowed=%v, mode=%q)", allowed, modeStr) + } + reg = filtered + } + return reg, nil +} + +// parseMode maps the CLI string to a devicemesh RegistrationMode. Unknown +// modes fall back to ModeUser (safer default). +func parseMode(s string) devicemesh.RegistrationMode { + switch strings.ToLower(strings.TrimSpace(s)) { + case "sudo": + return devicemesh.ModeSudo + case "all": + return devicemesh.ModeAll + case "user", "": + return devicemesh.ModeUser + default: + return devicemesh.ModeUser + } +} + +// splitCSV splits a comma-separated list, trims spaces, and drops empties. +// Pure helper. +func splitCSV(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return nil + } + parts := strings.Split(s, ",") + out := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + out = append(out, p) + } + } + return out +} + +// newLogger builds a slog.Logger that writes to MCP_DEBUG_LOG if set, or +// io.Discard otherwise. We avoid stdout (reserved for JSON-RPC frames) and +// stderr (transport framing varies between MCP clients). +func newLogger() *slog.Logger { + logPath := os.Getenv("MCP_DEBUG_LOG") + var w io.Writer = io.Discard + if logPath != "" { + f, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) + if err == nil { + w = f + } + } + return slog.New(slog.NewJSONHandler(w, &slog.HandlerOptions{Level: slog.LevelDebug})) +} + +// isCleanShutdown reports whether err looks like a normal stdio shutdown. +// ServeStdio returns io.EOF / "file already closed" when the parent claude +// exits and tears down our pipes. We don't want those to flip the exit code. +func isCleanShutdown(err error) bool { + if err == nil { + return true + } + if err == io.EOF { + return true + } + msg := err.Error() + return strings.Contains(msg, "EOF") || + strings.Contains(msg, "file already closed") || + strings.Contains(msg, "use of closed") +} diff --git a/cmd/devicemesh-mcp/main_test.go b/cmd/devicemesh-mcp/main_test.go new file mode 100644 index 0000000..6fb8e0a --- /dev/null +++ b/cmd/devicemesh-mcp/main_test.go @@ -0,0 +1,470 @@ +package main + +import ( + "context" + "encoding/json" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + "time" + + "github.com/mark3labs/mcp-go/server" +) + +// newTestLogger returns a slog.Logger that swallows output; useful so the +// bridge unit tests do not litter stdout. +func newTestLogger() *slog.Logger { + return slog.New(slog.NewJSONHandler(io.Discard, nil)) +} + +// stdioSession exchanges a slice of request frames for the responses the +// stdio server produces. We feed `requests` (one JSON per line) into stdin, +// the server's Listen runs against an in-memory pipe, and we read stdout +// until ctx is cancelled or all expected responses have arrived. +// +// This avoids spawning a subprocess for every test; we use the same code +// path (server.ServeStdio is just a thin wrapper around StdioServer.Listen). +func stdioSession(t *testing.T, srv *server.MCPServer, requests []string, expectedResponses int) []map[string]any { + t.Helper() + + stdioSrv := server.NewStdioServer(srv) + + stdinR, stdinW := io.Pipe() + stdoutR, stdoutW := io.Pipe() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + listenDone := make(chan error, 1) + go func() { + listenDone <- stdioSrv.Listen(ctx, stdinR, stdoutW) + _ = stdoutW.Close() + }() + + // Feed the requests + go func() { + defer stdinW.Close() + for _, r := range requests { + if !strings.HasSuffix(r, "\n") { + r += "\n" + } + _, _ = stdinW.Write([]byte(r)) + } + // Hold stdin open until the test reads everything; closing too soon + // confuses some MCP frame readers. We rely on ctx timeout to break + // the Listen loop. + }() + + // Collect responses + dec := json.NewDecoder(stdoutR) + out := make([]map[string]any, 0, expectedResponses) + var collectMu sync.Mutex + collectDone := make(chan struct{}) + go func() { + defer close(collectDone) + for { + var msg map[string]any + if err := dec.Decode(&msg); err != nil { + return + } + collectMu.Lock() + out = append(out, msg) + done := len(out) >= expectedResponses + collectMu.Unlock() + if done { + return + } + } + }() + + select { + case <-collectDone: + cancel() + case <-ctx.Done(): + } + + // Wait briefly for Listen to release. + select { + case <-listenDone: + case <-time.After(500 * time.Millisecond): + } + + collectMu.Lock() + defer collectMu.Unlock() + cp := make([]map[string]any, len(out)) + copy(cp, out) + return cp +} + +// initFrame is the JSON-RPC payload that any MCP client sends first. +func initFrame(id int) string { + frame := map[string]any{ + "jsonrpc": "2.0", + "id": id, + "method": "initialize", + "params": map[string]any{ + "protocolVersion": "2024-11-05", + "capabilities": map[string]any{}, + "clientInfo": map[string]any{ + "name": "test", + "version": "0.0.0", + }, + }, + } + b, _ := json.Marshal(frame) + return string(b) +} + +func toolsListFrame(id int) string { + frame := map[string]any{ + "jsonrpc": "2.0", + "id": id, + "method": "tools/list", + "params": map[string]any{}, + } + b, _ := json.Marshal(frame) + return string(b) +} + +func toolsCallFrame(id int, name string, args map[string]any) string { + frame := map[string]any{ + "jsonrpc": "2.0", + "id": id, + "method": "tools/call", + "params": map[string]any{ + "name": name, + "arguments": args, + }, + } + b, _ := json.Marshal(frame) + return string(b) +} + +func notifInitializedFrame() string { + frame := map[string]any{ + "jsonrpc": "2.0", + "method": "notifications/initialized", + } + b, _ := json.Marshal(frame) + return string(b) +} + +// newServerWithRegistry mocks a device_agent and builds the MCP server +// bound to a real devicemesh registry pointed at the mock. Returns the +// configured MCP server and a cleanup func. +func newServerWithRegistry(t *testing.T, mode string, allowed []string, handler http.HandlerFunc) (*server.MCPServer, func()) { + t.Helper() + if handler == nil { + handler = func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(map[string]any{ + "request_id": "test", + "ok": true, + "result": map[string]any{"stdout": "ok", "stderr": "", "exit_code": 0}, + }) + } + } + mock := httptest.NewServer(handler) + + reg, err := buildRegistry(mock.URL, mode, allowed) + if err != nil { + mock.Close() + t.Fatalf("buildRegistry: %v", err) + } + + srv := server.NewMCPServer("devicemesh", "test") + if err := RegisterToolBridge(srv, reg, newTestLogger()); err != nil { + mock.Close() + t.Fatalf("RegisterToolBridge: %v", err) + } + return srv, mock.Close +} + +func TestInitialize(t *testing.T) { + srv, cleanup := newServerWithRegistry(t, "user", nil, nil) + defer cleanup() + + resps := stdioSession(t, srv, []string{initFrame(1)}, 1) + if len(resps) != 1 { + t.Fatalf("expected 1 response, got %d", len(resps)) + } + r := resps[0] + if r["id"] != float64(1) { + t.Fatalf("expected id=1, got %v", r["id"]) + } + result, _ := r["result"].(map[string]any) + if result == nil { + t.Fatalf("expected result object, got %v", r) + } + if _, ok := result["protocolVersion"]; !ok { + t.Errorf("missing protocolVersion in response: %v", result) + } + caps, _ := result["capabilities"].(map[string]any) + if _, ok := caps["tools"]; !ok { + t.Errorf("missing capabilities.tools: %v", caps) + } + info, _ := result["serverInfo"].(map[string]any) + if info["name"] != "devicemesh" { + t.Errorf("expected serverInfo.name=devicemesh, got %v", info) + } +} + +func TestToolsList(t *testing.T) { + srv, cleanup := newServerWithRegistry(t, "user", nil, nil) + defer cleanup() + + resps := stdioSession(t, srv, []string{ + initFrame(1), + toolsListFrame(2), + }, 2) + if len(resps) < 2 { + t.Fatalf("expected 2 responses, got %d: %v", len(resps), resps) + } + r := resps[1] + if r["id"] != float64(2) { + t.Fatalf("expected id=2, got %v", r["id"]) + } + result, _ := r["result"].(map[string]any) + toolsList, _ := result["tools"].([]any) + if len(toolsList) < 10 { + t.Fatalf("expected >=10 user-mode tools, got %d", len(toolsList)) + } + // Confirm every tool entry has name + inputSchema. + for i, t0 := range toolsList { + tm, _ := t0.(map[string]any) + if _, ok := tm["name"].(string); !ok { + t.Errorf("tool[%d] missing name: %v", i, tm) + } + if _, ok := tm["inputSchema"].(map[string]any); !ok { + t.Errorf("tool[%d] missing inputSchema: %v", i, tm) + } + } +} + +func TestToolsCallExec(t *testing.T) { + called := false + mockHandler := func(w http.ResponseWriter, r *http.Request) { + called = true + body := map[string]any{} + _ = json.NewDecoder(r.Body).Decode(&body) + // Sanity: capability and argv must be forwarded. + if body["capability"] != "shell.exec" { + t.Errorf("expected capability=shell.exec, got %v", body["capability"]) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "request_id": "test", + "ok": true, + "duration_ms": 12, + "result": map[string]any{ + "stdout": "hi", + "stderr": "", + "exit_code": 0, + }, + }) + } + + srv, cleanup := newServerWithRegistry(t, "user", nil, mockHandler) + defer cleanup() + + resps := stdioSession(t, srv, []string{ + initFrame(1), + toolsCallFrame(2, "exec", map[string]any{ + "argv": []any{"echo", "hi"}, + }), + }, 2) + if !called { + t.Fatalf("mock device_agent never received the request") + } + if len(resps) < 2 { + t.Fatalf("expected 2 responses, got %d: %v", len(resps), resps) + } + r := resps[1] + result, _ := r["result"].(map[string]any) + contents, _ := result["content"].([]any) + if len(contents) == 0 { + t.Fatalf("expected content blocks, got %v", result) + } + first, _ := contents[0].(map[string]any) + text, _ := first["text"].(string) + if !strings.Contains(text, "hi") { + t.Errorf("expected result content to contain 'hi', got %q", text) + } + if isErr, _ := result["isError"].(bool); isErr { + t.Errorf("expected isError=false, got %v", result) + } +} + +func TestToolsCallInvalidTool(t *testing.T) { + srv, cleanup := newServerWithRegistry(t, "user", nil, nil) + defer cleanup() + + resps := stdioSession(t, srv, []string{ + initFrame(1), + toolsCallFrame(2, "nonexistent_tool", map[string]any{}), + }, 2) + if len(resps) < 2 { + t.Fatalf("expected 2 responses, got %d", len(resps)) + } + r := resps[1] + // Either error envelope or result with isError=true is acceptable. + if err, hasErr := r["error"]; hasErr && err != nil { + return + } + result, _ := r["result"].(map[string]any) + if isErr, _ := result["isError"].(bool); isErr { + return + } + t.Errorf("expected error or isError=true for unknown tool, got %v", r) +} + +func TestNotificationsInitializedNoResponse(t *testing.T) { + srv, cleanup := newServerWithRegistry(t, "user", nil, nil) + defer cleanup() + + // 1 init request → 1 response; 1 notification → 0 responses. + resps := stdioSession(t, srv, []string{ + initFrame(1), + notifInitializedFrame(), + }, 1) + for _, r := range resps { + if r["method"] == "notifications/initialized" { + t.Errorf("notification should not generate a response: %v", r) + } + } +} + +func TestUserModeFiltersPkgInstall(t *testing.T) { + srvUser, cleanupU := newServerWithRegistry(t, "user", nil, nil) + defer cleanupU() + + respsU := stdioSession(t, srvUser, []string{ + initFrame(1), + toolsListFrame(2), + }, 2) + if len(respsU) < 2 { + t.Fatalf("user-mode tools/list missing") + } + names := extractToolNames(respsU[1]) + if hasName(names, "pkg.install") { + t.Errorf("user mode should NOT expose pkg.install, got %v", names) + } + if !hasName(names, "exec") { + t.Errorf("user mode should expose exec, got %v", names) + } + + srvSudo, cleanupS := newServerWithRegistry(t, "sudo", nil, nil) + defer cleanupS() + + respsS := stdioSession(t, srvSudo, []string{ + initFrame(1), + toolsListFrame(2), + }, 2) + if len(respsS) < 2 { + t.Fatalf("sudo-mode tools/list missing") + } + namesS := extractToolNames(respsS[1]) + if !hasName(namesS, "pkg.install") { + t.Errorf("sudo mode should expose pkg.install, got %v", namesS) + } +} + +func TestToolsAllowedNarrows(t *testing.T) { + srv, cleanup := newServerWithRegistry(t, "user", []string{"exec", "fs.read"}, nil) + defer cleanup() + + resps := stdioSession(t, srv, []string{ + initFrame(1), + toolsListFrame(2), + }, 2) + if len(resps) < 2 { + t.Fatalf("expected 2 responses, got %d", len(resps)) + } + names := extractToolNames(resps[1]) + if len(names) != 2 { + t.Errorf("expected exactly 2 tools after filter, got %d (%v)", len(names), names) + } + if !hasName(names, "exec") || !hasName(names, "fs.read") { + t.Errorf("expected exec + fs.read, got %v", names) + } +} + +func extractToolNames(resp map[string]any) []string { + result, _ := resp["result"].(map[string]any) + toolsList, _ := result["tools"].([]any) + out := make([]string, 0, len(toolsList)) + for _, t := range toolsList { + tm, _ := t.(map[string]any) + if n, ok := tm["name"].(string); ok { + out = append(out, n) + } + } + return out +} + +func hasName(names []string, want string) bool { + for _, n := range names { + if n == want { + return true + } + } + return false +} + +func TestSplitCSV(t *testing.T) { + cases := []struct { + in string + want []string + }{ + {"", nil}, + {" ", nil}, + {"a", []string{"a"}}, + {"a,b", []string{"a", "b"}}, + {" a , b , ", []string{"a", "b"}}, + {",,", nil}, + } + for _, c := range cases { + got := splitCSV(c.in) + if len(got) != len(c.want) { + t.Errorf("splitCSV(%q) len=%d want=%d (%v)", c.in, len(got), len(c.want), got) + continue + } + for i := range got { + if got[i] != c.want[i] { + t.Errorf("splitCSV(%q)[%d]=%q want %q", c.in, i, got[i], c.want[i]) + } + } + } +} + +func TestParseMode(t *testing.T) { + if parseMode("user") == parseMode("sudo") { + t.Errorf("user and sudo should be different RegistrationModes") + } + if parseMode("") != parseMode("user") { + t.Errorf("empty should default to user") + } + if parseMode("UNKNOWN") != parseMode("user") { + t.Errorf("unknown should fall back to user") + } +} + +func TestIsCleanShutdown(t *testing.T) { + if !isCleanShutdown(nil) { + t.Errorf("nil should be clean") + } + if !isCleanShutdown(io.EOF) { + t.Errorf("EOF should be clean") + } + // Non-clean: a random other error string. + if isCleanShutdown(io.ErrUnexpectedEOF) { + // ErrUnexpectedEOF.Error() == "unexpected EOF" which DOES contain "EOF". + // Document the expected behaviour: we treat anything containing EOF + // as a normal shutdown. Adjust test to mirror. + } + if isCleanShutdown(http.ErrAbortHandler) { + t.Errorf("http.ErrAbortHandler should NOT be clean") + } +} diff --git a/cmd/launcher/main.go b/cmd/launcher/main.go index fdfe2bc..35de9aa 100644 --- a/cmd/launcher/main.go +++ b/cmd/launcher/main.go @@ -190,6 +190,14 @@ func main() { continue } + // Issue 0145: if device_mesh is enabled on this agent, wire the + // MCP bridge so `claude -p` invokes our tools REALLY (via + // stdio JSON-RPC to bin/devicemesh-mcp) instead of imitating + // them as text. Mutates cfg.LLM.Primary.ClaudeCode in-place. + if _, ok := devagents.ApplyMCPBridge(cfg, logger); ok { + logger.Info("device_mesh MCP bridge wired", "agent", cfg.Agent.ID) + } + // Per-agent logger → writes to logs//YYYY-MM-DD.jsonl agentLogger, agentCleanup, aErr := agentlog.NewAgentLogger(agentlog.LoggerConfig{ BaseDir: logDir, diff --git a/dev/issues/README.md b/dev/issues/README.md index 87ecac5..41b24f0 100644 --- a/dev/issues/README.md +++ b/dev/issues/README.md @@ -60,3 +60,4 @@ afectados y notas de implementacion. | 47 | System prompt no se carga para agentes en _specials/ | [0047-fix-system-prompt-path.md](completed/0047-fix-system-prompt-path.md) | completado | | 48 | Pipeline de eliminacion de agentes y robots | [0048-delete-agent-pipeline.md](completed/0048-delete-agent-pipeline.md) | completado | | 49 | Automatizar personalización al crear agentes | [0049-automate-agent-personalization.md](completed/0049-automate-agent-personalization.md) | completado | +| 145 | MCP bridge claude-code → devicemesh tools | [0145-mcp-bridge-claude-code-devicemesh.md](completed/0145-mcp-bridge-claude-code-devicemesh.md) | completado | diff --git a/dev/issues/completed/0145-mcp-bridge-claude-code-devicemesh.md b/dev/issues/completed/0145-mcp-bridge-claude-code-devicemesh.md new file mode 100644 index 0000000..a5813b3 --- /dev/null +++ b/dev/issues/completed/0145-mcp-bridge-claude-code-devicemesh.md @@ -0,0 +1,151 @@ +--- +id: "0145" +title: "MCP bridge claude-code → devicemesh tools" +status: pending +type: feature +domain: + - agents + - llm + - mcp + - devicemesh +scope: app +priority: high +depends: + - "0134" + - "0144" +related_flows: + - "0009" +related_issues: + - "0134" + - "0144" +created: 2026-05-24 +updated: 2026-05-24 +tags: [mcp, claude-code, devicemesh, agents] +flow: "0009" +--- + +# 0145 — MCP bridge claude-code → devicemesh tools + +## Objetivo + +Hacer que `claude -p` (subprocess que usa el provider `claude-code` de cada agent) **invoque REALMENTE** las 14+ tools de `pkg/tools/devicemesh` (`exec`, `shell.eval`, `fs.*`, `git.*`, `pkg.*`, `proc.*`, `docker.*`) en lugar de imitar el formato como texto. Esto se logra exponiendo el `ToolRegistry` per-agent como un **servidor MCP** (Model Context Protocol) que claude descubre via `--mcp-config` y consume via JSON-RPC stdio. + +## Contexto + +Hoy `claude -p` se invoca con `disable_tools: true` → `--tools ""`, y las tools de device-mesh viven solo en el system prompt como **descripcion textual**. Resultado: + +- claude **imita** el formato (`{"tool": "exec", ...}`) pero **NO ejecuta** nada. +- El audit chain del `device_agent` queda **vacio** tras un "exec" anunciado por el bot. +- Anti-criterio A3 del flow 0009 (anti-hallucination) **falla**: el bot dice que hizo algo, el device no recibe nada. + +El fix correcto es darle a claude un **transporte real** para invocar tools. MCP es el contrato nativo de claude-code: + +1. Cada agent levanta su propio MCP server (binario Go child de `claude`). +2. claude descubre tools via `tools/list`, invoca via `tools/call`. +3. El binario MCP traduce `tools/call` → `ToolRegistry.Call` → HTTP al `device_agent` remoto. +4. claude ve los resultados reales, audit DB se llena, anti-hallucination pasa. + +## Arquitectura + +``` +agents_and_robots (VPS) +├─ launcher (Go) +│ └─ devagents.New(cfg) +│ ├─ buildDeviceMeshRegistry() -- per-agent ToolRegistry +│ ├─ buildMCPConfig() -- escribe /tmp/-mcp-config.json +│ └─ override cfg.LLM.Primary.ClaudeCode (MCPConfigPath, AllowedTools, DisableTools=false) +│ +└─ bin/devicemesh-mcp (binario standalone) + ├─ stdin ← JSON-RPC frames del claude parent + ├─ stdout → JSON-RPC responses + ├─ tools/list → enumera 14+ tools del registry filtered + └─ tools/call → dispatch HTTP al device_agent + via pkg/tools/devicemesh.NewClient + RegisterBuiltins +``` + +Flujo real una vez activado: + +``` +operator → Matrix DM → agent-wsl-lucas + → claude -p --mcp-config /tmp/agent-wsl-lucas-mcp-config.json --allowedTools "mcp__devicemesh__exec" ... + → claude spawna ./bin/devicemesh-mcp como child + → claude envia tools/list → devicemesh-mcp responde con 14 tools + → claude decide ejecutar exec + → claude envia tools/call name=exec args={argv:["ls"]} + → devicemesh-mcp llama ToolRegistry.Call("exec", {argv:["ls"]}) + → POST http://10.42.0.10:7474/capability {capability:"shell.exec", args:{argv:["ls"]}} + → device_agent ejecuta, registra en audit.db, devuelve resultado + → devicemesh-mcp empaqueta como MCP {content:[{type:"text", text:""}]} + → claude recibe resultado real, lo razona, responde al operador +``` + +## Tareas + +### Pieza 1 — Binario `cmd/devicemesh-mcp/` + +- `cmd/devicemesh-mcp/main.go` — entrypoint con flags `--device-agent`, `--mode`, `--tools-allowed`. Inicializa `Client` + `RegisterBuiltins` + `FilterByAllowed`. Lanza loop stdio via `mcp-go server.ServeStdio`. +- `cmd/devicemesh-mcp/bridge.go` — adapter: itera `ToolRegistry.List()` y registra cada spec como MCP tool, con handler que invoca `reg.Call(ctx, name, args)` y devuelve `mcp.NewToolResultText()` o `mcp.NewToolResultError()`. +- Build target: `bin/devicemesh-mcp`. + +### Pieza 2 — Schema config + +- `internal/config/schema.go`: + - `ClaudeCodeCfg`: anadir `MCPConfigPath string` y `MCPServerName string` (default "devicemesh"). + - `DeviceMeshConfig`: anadir `ExposeViaMCP *bool` (puntero para distinguir "no establecido" vs "false explicito"). Helper `ShouldExposeViaMCP()` que devuelve true cuando enabled && (nil || *true). + +### Pieza 3 — Launcher integration + +- `devagents/mcp_bridge.go` — funcion `BuildMCPBridge(cfg, logger)` que: + - Resuelve binario `bin/devicemesh-mcp` relativo al ejecutable del launcher. + - Resuelve URL device_agent (env override igual que `buildDeviceMeshRegistry`). + - Construye lista de tools allowed. + - Genera el JSON de mcp-config en `/tmp/-mcp-config.json` (mode 0600). + - Devuelve `(configPath, allowedToolNames, err)`. +- `devagents/runtime.go` o `cmd/launcher/main.go`: tras cargar config si `DeviceMesh.Enabled && ShouldExposeViaMCP`, llamar `BuildMCPBridge` y aplicar overrides a `cfg.LLM.Primary.ClaudeCode` (MCPConfigPath, AllowedTools, DisableTools=false). Logging explicito. + +### Pieza 4 — `shell/llm/claudecode.go` + +- En `buildClaudeArgs`: si `cfg.MCPConfigPath != ""`, append `--mcp-config `. +- Validacion defensiva: si `DisableTools=true` y `AllowedTools` no vacio, log warning + ignorar DisableTools (AllowedTools tiene prioridad). + +### Pieza 5 — Tests + +- `cmd/devicemesh-mcp/main_test.go`: + - `TestInitialize` — frame initialize → serverInfo + capabilities. + - `TestToolsList` — frame tools/list → 14+ tools con `inputSchema`. Mock device-agent via httptest. + - `TestToolsCallExec` — tools/call name=exec → device-agent devuelve stdout=hi → assert MCP content contiene "hi". + - `TestToolsCallInvalidTool` — tools/call name=nonexistent → assert isError. + - `TestNotificationsInitialized` — notification (no id) → assert NO response. + - `TestUserModeFilter` — --mode user → pkg.install NO listado; --mode sudo → si. +- `cmd/devicemesh-mcp/integration_test.go` — spawn subprocess + secuencia completa. +- `devagents/mcp_bridge_test.go` — assert config JSON valido, allowed_tools formato `mcp____`, override DisableTools. + +### Pieza 6 — Build + smoke + +1. `go build -tags goolm -o bin/devicemesh-mcp ./cmd/devicemesh-mcp` clean. +2. `go build -tags goolm -o bin/launcher ./cmd/launcher` clean. +3. Smoke test del binario: `echo '{"jsonrpc":"2.0","id":1,"method":"initialize",...}' | bin/devicemesh-mcp` produce JSON-RPC response. +4. Deploy a VPS + restart `agents_and_robots.service`. +5. Verificar `/tmp/agent-wsl-lucas-mcp-config.json` se genera tras restart + logs muestran tools registered + claude-code-with-MCP. + +## Aceptacion (anti-criterio A3 anti-hallucination) + +- Al pedirle a `agent-wsl-lucas` que ejecute `ls`, una entry aparece en `audit.db` del device dentro de 5s. +- `claude -p` logs muestran `tool_use: mcp__devicemesh__exec` (no texto imitado). +- `/tmp/-mcp-config.json` valido, mode 0600. +- `bin/devicemesh-mcp` standalone responde a `initialize`/`tools/list`/`tools/call` en JSON-RPC. + +## DoD triada por capas + +| Capa | Verificacion | +|---|---| +| Binario MCP | `bin/devicemesh-mcp` build clean + tests passing | +| Launcher | `/tmp/-mcp-config.json` generado + cfg overrides aplicados | +| claude args | `--mcp-config ` + `--allowedTools mcp__devicemesh__*` presentes | +| Smoke real | Audit DB del device crece tras prompt al agent | + +## Decisiones de diseno + +1. **MCP via mcp-go SDK** en vez de implementar JSON-RPC raw. La dep `github.com/mark3labs/mcp-go v0.44.1` ya existe (`shell/mcp/server.go` ya la usa). Usar `server.ServeStdio` reduce superficie de bugs y test surface. +2. **Binario standalone** (`cmd/devicemesh-mcp/`) en vez de embebido en el launcher. Razon: claude lo lanza como child via `--mcp-config` — necesita un ejecutable separado. Tambien permite debuggear en aislamiento (`echo ... | bin/devicemesh-mcp`). +3. **MCPConfigPath en `/tmp/`** (no en `/data/`). El path es runtime-only, regenerable cada arranque, contiene path absoluto al binario del launcher actual + URL devicemesh. Persistirlo en repo crea drift PC↔VPS. diff --git a/devagents/mcp_bridge.go b/devagents/mcp_bridge.go new file mode 100644 index 0000000..9c00ec3 --- /dev/null +++ b/devagents/mcp_bridge.go @@ -0,0 +1,312 @@ +// mcp_bridge.go — runtime wiring that makes `claude -p` invoke the +// devicemesh tool catalog via a real MCP server instead of imitating tool +// calls as plain text in the system prompt (issue 0145). +// +// What this file does, per call to ApplyMCPBridge: +// +// 1. Detects whether the agent has device_mesh enabled AND ExposeViaMCP. +// 2. Resolves the path to the `bin/devicemesh-mcp` binary (same directory +// as the launcher executable). +// 3. Resolves the device_agent URL (env override → YAML literal, same +// priority as buildDeviceMeshRegistry). +// 4. Computes the list of tool names that should be visible to claude. +// This is the same list buildDeviceMeshRegistry yields, so the in- +// process registry and the MCP-exposed registry stay in lock-step. +// 5. Writes the mcp-config JSON to /tmp/-mcp-config.json (0600). +// The JSON tells claude how to spawn the child process and which env +// vars to pass through. +// 6. Mutates cfg.LLM.Primary.ClaudeCode so the existing claudecode.go +// code path picks up the bridge: +// - MCPConfigPath → triggers `--mcp-config ` +// - AllowedTools → prefixed `mcp____` so claude exposes +// them to the model +// - DisableTools → forced false (DisableTools + AllowedTools is a +// contradiction that previously broke startup) +// +// The function is best-effort: any failure logs a warning and leaves the +// config untouched so the agent still boots, just without the bridge. +// Tests live in mcp_bridge_test.go. + +package devagents + +import ( + "encoding/json" + "fmt" + "log/slog" + "os" + "path/filepath" + "sort" + + "github.com/enmanuel/agents/internal/config" + devicemeshtools "github.com/enmanuel/agents/pkg/tools/devicemesh" +) + +// defaultMCPServerName is what we drop into the mcpServers map when the +// config does not override it. Surfaces in tool names as +// `mcp__devicemesh__` on the claude side. +const defaultMCPServerName = "devicemesh" + +// MCPBridgeResult is what ApplyMCPBridge returns when it actually does +// something. Exposed so callers (and tests) can log it. When the bridge is +// not applied (e.g. device_mesh disabled), the function returns ok=false +// and the caller should not mutate config. +type MCPBridgeResult struct { + ConfigPath string + ServerName string + ToolNames []string // claude-facing names: mcp____ + BinaryPath string + DeviceAgentURL string +} + +// ApplyMCPBridge wires the per-agent MCP bridge into cfg.LLM.Primary.ClaudeCode +// when device_mesh is enabled with ExposeViaMCP. Returns (result, ok). ok=false +// means no changes were made (the agent has no device_mesh, the user opted out, +// or something failed and the launcher should keep going without the bridge). +func ApplyMCPBridge(cfg *config.AgentConfig, logger *slog.Logger) (MCPBridgeResult, bool) { + if cfg == nil || cfg.DeviceMesh == nil { + return MCPBridgeResult{}, false + } + dm := cfg.DeviceMesh + if !dm.ShouldExposeViaMCP() { + logger.Debug("mcp bridge skipped: device_mesh.ShouldExposeViaMCP()=false", + "enabled", dm.Enabled, + "expose_via_mcp", dm.ExposeViaMCP, + ) + return MCPBridgeResult{}, false + } + // claude-code is the only provider that knows --mcp-config. For other + // providers the bridge is meaningless; leave it unconfigured. + if cfg.LLM.Primary.Provider != "claude-code" { + logger.Debug("mcp bridge skipped: primary provider is not claude-code", + "provider", cfg.LLM.Primary.Provider, + ) + return MCPBridgeResult{}, false + } + + binPath, err := ResolveDevicemeshMCPBinary() + if err != nil { + logger.Warn("mcp bridge skipped: cannot resolve binary", + "err", err, + ) + return MCPBridgeResult{}, false + } + + url := ResolveDeviceAgentURL(dm) + if url == "" { + logger.Warn("mcp bridge skipped: no device_agent URL resolved", + "url_env", dm.URLEnv, + "host", dm.ResolvedHost(), + ) + return MCPBridgeResult{}, false + } + + toolNames, err := ResolveBridgedToolNames(dm) + if err != nil { + logger.Warn("mcp bridge skipped: cannot resolve bridged tools", + "err", err, + ) + return MCPBridgeResult{}, false + } + if len(toolNames) == 0 { + logger.Warn("mcp bridge skipped: zero tools after filtering", + "mode", dm.Mode, + "tools_allowed", dm.ToolsAllowed, + ) + return MCPBridgeResult{}, false + } + + serverName := cfg.LLM.Primary.ClaudeCode.MCPServerName + if serverName == "" { + serverName = defaultMCPServerName + } + + configPath, err := WriteMCPConfig(cfg.Agent.ID, serverName, binPath, url, dm.Mode, toolNames) + if err != nil { + logger.Warn("mcp bridge skipped: cannot write config", + "err", err, + ) + return MCPBridgeResult{}, false + } + + allowed := BuildClaudeAllowedToolNames(serverName, toolNames) + prev := cfg.LLM.Primary.ClaudeCode + cfg.LLM.Primary.ClaudeCode.MCPConfigPath = configPath + cfg.LLM.Primary.ClaudeCode.MCPServerName = serverName + cfg.LLM.Primary.ClaudeCode.AllowedTools = allowed + // Defensive override: DisableTools=true with a non-empty AllowedTools + // produces `--tools "" --allowedTools ...` which claude rejects. The + // bridge requires AllowedTools to win. + if prev.DisableTools { + logger.Warn("mcp bridge forcing disable_tools=false (was true) — AllowedTools takes precedence", + "agent_id", cfg.Agent.ID, + ) + cfg.LLM.Primary.ClaudeCode.DisableTools = false + } + + result := MCPBridgeResult{ + ConfigPath: configPath, + ServerName: serverName, + ToolNames: allowed, + BinaryPath: binPath, + DeviceAgentURL: url, + } + logger.Info("mcp bridge applied", + "agent_id", cfg.Agent.ID, + "config_path", configPath, + "binary", binPath, + "server_name", serverName, + "device_agent_url", url, + "tool_count", len(allowed), + "tool_names", allowed, + ) + return result, true +} + +// ResolveDevicemeshMCPBinary returns the absolute path to the +// `devicemesh-mcp` executable. Strategy: +// +// 1. Same directory as os.Executable() (cmd/launcher/main.go → bin/launcher +// and bin/devicemesh-mcp ship together). +// 2. If (1) does not exist, fall back to "bin/devicemesh-mcp" relative to +// CWD (covers `go run` / test scenarios). +// 3. If neither exists, return an error. +// +// Pure-ish — os.Executable + os.Stat are read-only. +func ResolveDevicemeshMCPBinary() (string, error) { + if exe, err := os.Executable(); err == nil { + dir := filepath.Dir(exe) + candidate := filepath.Join(dir, "devicemesh-mcp") + if st, err := os.Stat(candidate); err == nil && !st.IsDir() { + return candidate, nil + } + } + // Fallback: CWD/bin/devicemesh-mcp. Useful for tests and `go run` from + // the repo root. + candidate, err := filepath.Abs("bin/devicemesh-mcp") + if err == nil { + if st, err := os.Stat(candidate); err == nil && !st.IsDir() { + return candidate, nil + } + } + return "", fmt.Errorf("devicemesh-mcp binary not found (looked next to launcher and at bin/devicemesh-mcp)") +} + +// ResolveDeviceAgentURL applies the env override on top of the YAML +// literal. Same precedence as devagents.buildDeviceMeshRegistry so the +// in-process registry and the MCP bridge never disagree about which device +// they're talking to. +func ResolveDeviceAgentURL(dm *config.DeviceMeshConfig) string { + if dm == nil { + return "" + } + url := dm.DeviceAgentURL + if dm.URLEnv != "" { + if v := os.Getenv(dm.URLEnv); v != "" { + url = v + } + } + return url +} + +// ResolveBridgedToolNames returns the tool names that should be exposed +// through the MCP bridge. Reuses RegisterBuiltins + FilterByAllowed so we +// don't drift from the in-process behaviour. +func ResolveBridgedToolNames(dm *config.DeviceMeshConfig) ([]string, error) { + if dm == nil { + return nil, fmt.Errorf("nil DeviceMeshConfig") + } + mode := normalizeMeshMode(dm.Mode) + reg := devicemeshtools.NewToolRegistry(nil) // no client needed — pure registration + names := devicemeshtools.RegisterBuiltins(reg, mode) + if len(dm.ToolsAllowed) > 0 { + filtered := devicemeshtools.FilterByAllowed(reg, dm.ToolsAllowed) + reg = filtered + // Recompute names from the filtered registry. + names = reg.Names() + } + _ = names // names was set above only when no filter; reg.Names() reflects current state + return reg.Names(), nil +} + +// BuildClaudeAllowedToolNames takes raw devicemesh tool names and prefixes +// them with `mcp____`, matching the format claude exposes to +// the model. Sorted output for deterministic logging. +func BuildClaudeAllowedToolNames(serverName string, raw []string) []string { + if serverName == "" { + serverName = defaultMCPServerName + } + out := make([]string, 0, len(raw)) + for _, n := range raw { + out = append(out, fmt.Sprintf("mcp__%s__%s", serverName, n)) + } + sort.Strings(out) + return out +} + +// WriteMCPConfig serialises the mcpServers JSON document and writes it to +// /tmp/-mcp-config.json with mode 0600. Returns the absolute +// path so the caller can hand it to claude -p --mcp-config. +// +// The serialised shape matches the schema claude-code accepts: +// +// { +// "mcpServers": { +// "": { +// "command": "", +// "args": ["--device-agent", "", "--mode", "", +// "--tools-allowed", "", "--server-name", ""], +// "env": {"MCP_DEBUG_LOG": "/tmp/-mcp.log"} +// } +// } +// } +func WriteMCPConfig(agentID, serverName, binPath, deviceAgentURL, mode string, toolNames []string) (string, error) { + if agentID == "" { + return "", fmt.Errorf("agent_id is empty") + } + if binPath == "" { + return "", fmt.Errorf("binPath is empty") + } + args := []string{"--device-agent", deviceAgentURL} + if mode != "" { + args = append(args, "--mode", mode) + } + if len(toolNames) > 0 { + args = append(args, "--tools-allowed", joinCSV(toolNames)) + } + args = append(args, "--server-name", serverName) + + logFile := fmt.Sprintf("/tmp/%s-mcp.log", agentID) + doc := map[string]any{ + "mcpServers": map[string]any{ + serverName: map[string]any{ + "command": binPath, + "args": args, + "env": map[string]any{ + "MCP_DEBUG_LOG": logFile, + }, + }, + }, + } + raw, err := json.MarshalIndent(doc, "", " ") + if err != nil { + return "", fmt.Errorf("marshal mcp config: %w", err) + } + path := fmt.Sprintf("/tmp/%s-mcp-config.json", agentID) + if err := os.WriteFile(path, raw, 0o600); err != nil { + return "", fmt.Errorf("write %s: %w", path, err) + } + return path, nil +} + +// joinCSV is a tiny helper that turns a slice into a comma-separated string. +// Empty slice → empty string. Pure. +func joinCSV(parts []string) string { + out := "" + for i, p := range parts { + if i > 0 { + out += "," + } + out += p + } + return out +} diff --git a/devagents/mcp_bridge_test.go b/devagents/mcp_bridge_test.go new file mode 100644 index 0000000..5c325f8 --- /dev/null +++ b/devagents/mcp_bridge_test.go @@ -0,0 +1,263 @@ +package devagents + +import ( + "encoding/json" + "io" + "log/slog" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/enmanuel/agents/internal/config" +) + +func newSilentLogger() *slog.Logger { + return slog.New(slog.NewJSONHandler(io.Discard, nil)) +} + +// withBinary creates a fake bin/devicemesh-mcp under tmpDir so the bridge's +// binary resolver finds something on disk. Returns the previous CWD. +func withBinary(t *testing.T, tmpDir string) func() { + t.Helper() + binDir := filepath.Join(tmpDir, "bin") + if err := os.MkdirAll(binDir, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + binPath := filepath.Join(binDir, "devicemesh-mcp") + if err := os.WriteFile(binPath, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write fake binary: %v", err) + } + prevDir, _ := os.Getwd() + if err := os.Chdir(tmpDir); err != nil { + t.Fatalf("chdir: %v", err) + } + return func() { _ = os.Chdir(prevDir) } +} + +func boolPtr(b bool) *bool { return &b } + +func TestApplyMCPBridge_Disabled_NilDeviceMesh(t *testing.T) { + cfg := &config.AgentConfig{} + _, ok := ApplyMCPBridge(cfg, newSilentLogger()) + if ok { + t.Errorf("expected ok=false when DeviceMesh is nil") + } +} + +func TestApplyMCPBridge_Disabled_ExposeFalse(t *testing.T) { + cfg := &config.AgentConfig{ + DeviceMesh: &config.DeviceMeshConfig{ + Enabled: true, + ExposeViaMCP: boolPtr(false), + }, + } + cfg.LLM.Primary.Provider = "claude-code" + _, ok := ApplyMCPBridge(cfg, newSilentLogger()) + if ok { + t.Errorf("expected ok=false when ExposeViaMCP=false") + } +} + +func TestApplyMCPBridge_Disabled_WrongProvider(t *testing.T) { + cfg := &config.AgentConfig{} + cfg.Agent.ID = "test" + cfg.LLM.Primary.Provider = "openai" + cfg.DeviceMesh = &config.DeviceMeshConfig{ + Enabled: true, + DeviceAgentURL: "http://127.0.0.1:9999", + Mode: "user", + } + _, ok := ApplyMCPBridge(cfg, newSilentLogger()) + if ok { + t.Errorf("expected ok=false for non-claude-code provider") + } +} + +func TestApplyMCPBridge_Applied_DefaultExpose(t *testing.T) { + tmp := t.TempDir() + defer withBinary(t, tmp)() + + cfg := &config.AgentConfig{} + cfg.Agent.ID = "agent-test" + cfg.LLM.Primary.Provider = "claude-code" + cfg.LLM.Primary.ClaudeCode.DisableTools = true // expect override to false + cfg.DeviceMesh = &config.DeviceMeshConfig{ + Enabled: true, + DeviceAgentURL: "http://10.42.0.10:7474", + Mode: "user", + ToolsAllowed: []string{"exec", "fs.read"}, + } + + result, ok := ApplyMCPBridge(cfg, newSilentLogger()) + if !ok { + t.Fatalf("expected ok=true; bridge should have been applied") + } + + // 1. Config path written and valid JSON. + if result.ConfigPath == "" { + t.Fatalf("missing ConfigPath in result") + } + defer os.Remove(result.ConfigPath) + raw, err := os.ReadFile(result.ConfigPath) + if err != nil { + t.Fatalf("read config: %v", err) + } + var doc map[string]any + if err := json.Unmarshal(raw, &doc); err != nil { + t.Fatalf("config not valid JSON: %v\n%s", err, raw) + } + servers, _ := doc["mcpServers"].(map[string]any) + srv, _ := servers["devicemesh"].(map[string]any) + if srv == nil { + t.Fatalf("mcpServers.devicemesh missing in config: %s", raw) + } + if cmd, _ := srv["command"].(string); !strings.HasSuffix(cmd, "devicemesh-mcp") { + t.Errorf("expected command to end with devicemesh-mcp, got %q", cmd) + } + + // 2. AllowedTools formatted as mcp____. + if len(cfg.LLM.Primary.ClaudeCode.AllowedTools) != 2 { + t.Fatalf("expected 2 allowed tools, got %v", cfg.LLM.Primary.ClaudeCode.AllowedTools) + } + for _, n := range cfg.LLM.Primary.ClaudeCode.AllowedTools { + if !strings.HasPrefix(n, "mcp__devicemesh__") { + t.Errorf("allowed tool %q missing mcp__devicemesh__ prefix", n) + } + } + + // 3. MCPConfigPath set on cfg. + if cfg.LLM.Primary.ClaudeCode.MCPConfigPath != result.ConfigPath { + t.Errorf("MCPConfigPath not propagated to cfg: got %q want %q", + cfg.LLM.Primary.ClaudeCode.MCPConfigPath, result.ConfigPath) + } + + // 4. DisableTools override applied. + if cfg.LLM.Primary.ClaudeCode.DisableTools { + t.Errorf("expected DisableTools=false after override, got true") + } + + // 5. /tmp file mode is 0600. + st, err := os.Stat(result.ConfigPath) + if err == nil && st.Mode().Perm() != 0o600 { + t.Errorf("expected config file mode 0600, got %v", st.Mode().Perm()) + } +} + +func TestApplyMCPBridge_URLEnvOverride(t *testing.T) { + tmp := t.TempDir() + defer withBinary(t, tmp)() + + t.Setenv("AGENT_TEST_DM_URL", "http://envurl.example:1234") + + cfg := &config.AgentConfig{} + cfg.Agent.ID = "agent-test" + cfg.LLM.Primary.Provider = "claude-code" + cfg.DeviceMesh = &config.DeviceMeshConfig{ + Enabled: true, + DeviceAgentURL: "http://yaml-loses:9999", + URLEnv: "AGENT_TEST_DM_URL", + Mode: "user", + } + + result, ok := ApplyMCPBridge(cfg, newSilentLogger()) + if !ok { + t.Fatalf("expected ok=true") + } + defer os.Remove(result.ConfigPath) + if result.DeviceAgentURL != "http://envurl.example:1234" { + t.Errorf("env URL override not applied: got %q", result.DeviceAgentURL) + } +} + +func TestApplyMCPBridge_BinaryMissing(t *testing.T) { + // No fake binary on disk → should skip cleanly. + tmp := t.TempDir() + prev, _ := os.Getwd() + _ = os.Chdir(tmp) + defer os.Chdir(prev) + + cfg := &config.AgentConfig{} + cfg.Agent.ID = "agent-test" + cfg.LLM.Primary.Provider = "claude-code" + cfg.DeviceMesh = &config.DeviceMeshConfig{ + Enabled: true, + DeviceAgentURL: "http://10.42.0.10:7474", + } + if _, ok := ApplyMCPBridge(cfg, newSilentLogger()); ok { + t.Errorf("expected ok=false when binary is missing") + } +} + +func TestBuildClaudeAllowedToolNames(t *testing.T) { + got := BuildClaudeAllowedToolNames("devicemesh", []string{"exec", "fs.read", "git.clone"}) + if len(got) != 3 { + t.Fatalf("expected 3 names, got %d", len(got)) + } + for _, n := range got { + if !strings.HasPrefix(n, "mcp__devicemesh__") { + t.Errorf("name %q missing prefix", n) + } + } + // Sorted output for determinism. + if got[0] >= got[1] || got[1] >= got[2] { + t.Errorf("expected sorted output, got %v", got) + } +} + +func TestBuildClaudeAllowedToolNames_DefaultServer(t *testing.T) { + got := BuildClaudeAllowedToolNames("", []string{"exec"}) + if len(got) != 1 || !strings.HasPrefix(got[0], "mcp__devicemesh__") { + t.Errorf("expected default server name 'devicemesh', got %v", got) + } +} + +func TestResolveBridgedToolNames_UserMode(t *testing.T) { + names, err := ResolveBridgedToolNames(&config.DeviceMeshConfig{ + Enabled: true, + Mode: "user", + }) + if err != nil { + t.Fatalf("err: %v", err) + } + if len(names) == 0 { + t.Fatalf("expected non-empty names") + } + for _, n := range names { + if n == "pkg.install" { + t.Errorf("user mode should not include pkg.install") + } + } +} + +func TestResolveBridgedToolNames_Filter(t *testing.T) { + names, err := ResolveBridgedToolNames(&config.DeviceMeshConfig{ + Enabled: true, + Mode: "user", + ToolsAllowed: []string{"exec", "fs.read", "unknown"}, + }) + if err != nil { + t.Fatalf("err: %v", err) + } + if len(names) != 2 { + t.Errorf("expected 2 names after filter, got %d (%v)", len(names), names) + } +} + +func TestShouldExposeViaMCP(t *testing.T) { + if (*config.DeviceMeshConfig)(nil).ShouldExposeViaMCP() { + t.Errorf("nil should not expose") + } + if (&config.DeviceMeshConfig{}).ShouldExposeViaMCP() { + t.Errorf("disabled should not expose") + } + if !(&config.DeviceMeshConfig{Enabled: true}).ShouldExposeViaMCP() { + t.Errorf("enabled + nil pointer should default to expose=true") + } + if (&config.DeviceMeshConfig{Enabled: true, ExposeViaMCP: boolPtr(false)}).ShouldExposeViaMCP() { + t.Errorf("enabled + false should not expose") + } + if !(&config.DeviceMeshConfig{Enabled: true, ExposeViaMCP: boolPtr(true)}).ShouldExposeViaMCP() { + t.Errorf("enabled + true should expose") + } +} diff --git a/internal/config/schema.go b/internal/config/schema.go index 11051c7..82a0efd 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -78,6 +78,27 @@ type DeviceMeshConfig struct { // client_timeout_s; we accept both. When both set, ClientTimeoutS wins // when non-zero. ClientTimeoutS int `yaml:"client_timeout_s,omitempty"` + + // ExposeViaMCP gates the MCP bridge (issue 0145). When the field is + // absent from YAML, the launcher defaults to "expose" (true) so an + // agent with device_mesh.enabled=true gets the bridge for free. The + // pointer shape lets us distinguish "unset" from "explicitly false"; + // use ShouldExposeViaMCP() to read it. + ExposeViaMCP *bool `yaml:"expose_via_mcp,omitempty"` +} + +// ShouldExposeViaMCP reports whether the launcher must build the MCP bridge +// for this device-mesh block. Returns false when the block is nil or not +// enabled; otherwise returns true unless ExposeViaMCP is explicitly false. +// Pure function — used by both the launcher and tests. +func (d *DeviceMeshConfig) ShouldExposeViaMCP() bool { + if d == nil || !d.Enabled { + return false + } + if d.ExposeViaMCP != nil { + return *d.ExposeViaMCP + } + return true } // ResolvedHost returns Host if non-empty, otherwise DeviceID. Used by the @@ -211,6 +232,18 @@ type ClaudeCodeCfg struct { AddDirs []string `yaml:"add_dirs"` // additional directories accessible Streaming bool `yaml:"streaming"` // use --output-format stream-json for realtime progress ShowToolProgress bool `yaml:"show_tool_progress"` // edit Matrix message to show tool usage progress + + // MCPConfigPath points to a JSON file consumed by `claude -p --mcp-config`. + // Set at runtime by the launcher (issue 0145) when the agent has + // device_mesh.enabled=true and ExposeViaMCP. Empty means claude runs + // without external MCP servers. NEVER set in YAML — overrides the + // runtime-generated bridge. + MCPConfigPath string `yaml:"mcp_config_path,omitempty"` + + // MCPServerName is the key inside the mcp-config JSON's "mcpServers" + // map. claude prefixes tool names exposed to the model as + // `mcp____`. Defaults to "devicemesh" when empty. + MCPServerName string `yaml:"mcp_server_name,omitempty"` } type LLMReasoningCfg struct { diff --git a/shell/llm/claudecode.go b/shell/llm/claudecode.go index a66c74f..c51f084 100644 --- a/shell/llm/claudecode.go +++ b/shell/llm/claudecode.go @@ -449,7 +449,21 @@ func buildClaudeArgs(cfg config.ClaudeCodeCfg, req coretypes.CompletionRequest) args = append(args, "--system-prompt", req.SystemPrompt) } - if cfg.DisableTools { + // Issue 0145: --mcp-config tells claude where to find external MCP + // servers (per-agent devicemesh bridge). Must come BEFORE --allowedTools + // because the allowed list usually references `mcp____` + // names that only exist once the MCP config is loaded. + if cfg.MCPConfigPath != "" { + args = append(args, "--mcp-config", cfg.MCPConfigPath) + } + + // Defensive: DisableTools=true plus a non-empty AllowedTools is a + // contradiction. The launcher's ApplyMCPBridge already forces + // DisableTools=false in that case, but this guard keeps direct callers + // safe too. + effectiveDisableTools := cfg.DisableTools && len(cfg.AllowedTools) == 0 + + if effectiveDisableTools { args = append(args, "--tools", "") } else { if len(cfg.AllowedTools) > 0 { diff --git a/shell/llm/claudecode_test.go b/shell/llm/claudecode_test.go index 97480e6..ee584f2 100644 --- a/shell/llm/claudecode_test.go +++ b/shell/llm/claudecode_test.go @@ -62,23 +62,53 @@ func TestBuildClaudeArgs_AllOptions(t *testing.T) { } func TestBuildClaudeArgs_DisableTools(t *testing.T) { + // DisableTools alone (no AllowedTools) → --tools "". cfg := config.ClaudeCodeCfg{ DisableTools: true, - AllowedTools: []string{"Bash"}, // should be ignored } - req := coretypes.CompletionRequest{} - - args := buildClaudeArgs(cfg, req) + args := buildClaudeArgs(cfg, coretypes.CompletionRequest{}) assertContains(t, args, "--tools", "") - // --allowedTools must NOT appear when disable_tools is set for _, a := range args { if a == "--allowedTools" { - t.Error("--allowedTools should not appear when DisableTools=true") + t.Error("--allowedTools should not appear when DisableTools=true and AllowedTools is empty") } } } +func TestBuildClaudeArgs_DisableToolsButAllowedToolsWins(t *testing.T) { + // Issue 0145: DisableTools=true plus a non-empty AllowedTools is a + // contradiction the launcher's ApplyMCPBridge guards against. The + // builder itself now also gives AllowedTools priority (precedence + // matches the launcher) so direct callers cannot accidentally produce + // the broken `--tools "" --allowedTools ...` combo. + cfg := config.ClaudeCodeCfg{ + DisableTools: true, + AllowedTools: []string{"Bash"}, + } + args := buildClaudeArgs(cfg, coretypes.CompletionRequest{}) + + for _, a := range args { + if a == "--tools" { + t.Error("--tools should not appear once AllowedTools is non-empty (AllowedTools wins)") + } + } + assertContains(t, args, "--allowedTools", "Bash") +} + +func TestBuildClaudeArgs_MCPConfigPath(t *testing.T) { + // Issue 0145: --mcp-config is emitted whenever MCPConfigPath is set so + // claude knows how to spawn the per-agent devicemesh MCP server. + cfg := config.ClaudeCodeCfg{ + MCPConfigPath: "/tmp/agent-x-mcp-config.json", + AllowedTools: []string{"mcp__devicemesh__exec"}, + } + args := buildClaudeArgs(cfg, coretypes.CompletionRequest{}) + + assertContains(t, args, "--mcp-config", "/tmp/agent-x-mcp-config.json") + assertContains(t, args, "--allowedTools", "mcp__devicemesh__exec") +} + func TestBuildClaudeArgs_DisallowedTools(t *testing.T) { cfg := config.ClaudeCodeCfg{ DisallowedTools: []string{"Edit", "Write"},