feat(0145-1): binario devicemesh-mcp + issue doc

Anade el binario standalone cmd/devicemesh-mcp/ que expone via JSON-RPC sobre stdio el catalogo de devicemesh tools (exec, shell.eval, fs.*, git.*, pkg.*, proc.*, docker.*) al claude -p parent. Arquitectura issue 0145: - main.go: flags (--device-agent, --mode, --tools-allowed, --server-name), inicializa devicemesh.Client + RegisterBuiltins + FilterByAllowed, lanza server.ServeStdio del SDK mark3labs/mcp-go (ya dep). - bridge.go: registra cada ToolSpec como mcp.Tool con WithRawInputSchema + handler que invoca ToolRegistry.Call (validate->map->HTTP->map). Resultado serializado a NewToolResultText, errores como NewToolResultError para que el modelo se autocorrija. Razon: hoy claude -p ve nuestras tool names solo como TEXTO en el system prompt y las imita sin ejecutar. Con --mcp-config apuntando a este binario, claude las descubre via tools/list e invoca via tools/call REALMENTE. Smoke OK: initialize frame produce {capabilities:{tools:{listChanged:true}}, serverInfo:{name:"devicemesh",version:"0.1.0"}}. Issue doc 0145 incluido con aceptacion A3 anti-hallucination + DoD triada. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 18:26:22 +02:00
parent 47bcf9d583
commit 15596df7e4
3 changed files with 516 additions and 0 deletions
@@ -0,0 +1,157 @@
+// bridge.go — adapter that registers every devicemesh.ToolSpec from a
+// ToolRegistry as an MCP tool on a mcp-go server.MCPServer.
+//
+// Tool name preservation: we register tools under their dotted devicemesh
+// name verbatim ("exec", "shell.eval", "fs.read"). claude exposes them to
+// the model as `mcp__<server_name>__<tool_name>` (the MCP transport prefixes
+// automatically).
+//
+// Schema: ToolSpec.InputSchema is already a JSON-Schema-lite map. We
+// marshal it to a json.RawMessage and feed it via mcp.WithRawInputSchema so
+// the LLM sees the full structure (required fields, enums, descriptions).
+//
+// Handler: each tool's handler invokes reg.Call(ctx, name, args). The
+// registry runs ValidateInput → ArgMapping → HTTP dispatch → ResultMapping
+// just like the in-process tool-use path. The result is JSON-encoded into
+// an MCP text-content block. Errors become NewToolResultError so the model
+// can self-correct on the next turn.
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/mark3labs/mcp-go/server"
+
+	"github.com/enmanuel/agents/pkg/tools/devicemesh"
+)
+
+// RegisterToolBridge walks reg and registers each spec on srv. Returns the
+// first registration error, if any. Pure data adapter except for the slog
+// debug events.
+func RegisterToolBridge(srv *server.MCPServer, reg *devicemesh.ToolRegistry, logger *slog.Logger) error {
+	if srv == nil {
+		return fmt.Errorf("RegisterToolBridge: srv is nil")
+	}
+	if reg == nil {
+		return fmt.Errorf("RegisterToolBridge: reg is nil")
+	}
+	for _, spec := range reg.List() {
+		tool, err := buildMCPTool(spec)
+		if err != nil {
+			return fmt.Errorf("build MCP tool %q: %w", spec.Name, err)
+		}
+		handler := makeHandler(reg, spec, logger)
+		srv.AddTool(tool, handler)
+		if logger != nil {
+			logger.Debug("registered MCP tool",
+				"name", spec.Name,
+				"capability", spec.Capability,
+				"requires_approval", spec.RequiresApproval,
+			)
+		}
+	}
+	return nil
+}
+
+// buildMCPTool transforms a devicemesh.ToolSpec into an mcp.Tool with the
+// raw input schema attached. The description is augmented with the
+// capability marker so the model knows the tool is remote.
+func buildMCPTool(spec devicemesh.ToolSpec) (mcp.Tool, error) {
+	desc := spec.Description
+	if spec.Capability != "" {
+		desc = fmt.Sprintf("%s [device_mesh: %s]", desc, spec.Capability)
+	}
+	if spec.RequiresApproval {
+		desc += " (approval required)"
+	}
+
+	opts := []mcp.ToolOption{mcp.WithDescription(desc)}
+	if spec.InputSchema != nil {
+		raw, err := json.Marshal(spec.InputSchema)
+		if err != nil {
+			return mcp.Tool{}, fmt.Errorf("marshal input schema: %w", err)
+		}
+		opts = append(opts, mcp.WithRawInputSchema(raw))
+	}
+	return mcp.NewTool(spec.Name, opts...), nil
+}
+
+// makeHandler returns a server.ToolHandlerFunc bound to a single spec. The
+// closure captures the registry so the HTTP dispatch goes through the same
+// validate → map → call pipeline as the in-process path.
+func makeHandler(reg *devicemesh.ToolRegistry, spec devicemesh.ToolSpec, logger *slog.Logger) server.ToolHandlerFunc {
+	return func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+		args := req.GetArguments()
+		if args == nil {
+			args = map[string]any{}
+		}
+		if logger != nil {
+			logger.Debug("tools/call received",
+				"tool", spec.Name,
+				"capability", spec.Capability,
+				"arg_keys", keysOf(args),
+			)
+		}
+
+		result, err := reg.Call(ctx, spec.Name, args)
+		if err != nil {
+			if logger != nil {
+				logger.Warn("tools/call failed",
+					"tool", spec.Name,
+					"err", err.Error(),
+				)
+			}
+			// NewToolResultError returns a CallToolResult with isError=true.
+			// Returning (result, nil) lets the model see and self-correct
+			// instead of treating it as a transport-level failure.
+			return mcp.NewToolResultError(err.Error()), nil
+		}
+
+		text := encodeResult(result)
+		if logger != nil {
+			logger.Debug("tools/call ok",
+				"tool", spec.Name,
+				"result_len", len(text),
+			)
+		}
+		return mcp.NewToolResultText(text), nil
+	}
+}
+
+// encodeResult converts a tool result (any) to the string payload the model
+// will see. Mirrors devicemesh.AdaptTool's formatToolResult so MCP and the
+// in-process path produce consistent transcripts.
+//
+//   - nil    → ""
+//   - string → returned as-is (avoids double-encoding JSON strings)
+//   - other  → json.Marshal; on failure fall back to fmt.Sprintf so we never
+//     drop data on the floor.
+func encodeResult(v any) string {
+	if v == nil {
+		return ""
+	}
+	if s, ok := v.(string); ok {
+		return s
+	}
+	b, err := json.Marshal(v)
+	if err != nil {
+		return fmt.Sprintf("%v", v)
+	}
+	return string(b)
+}
+
+// keysOf returns the sorted keys of a map for log context. Pure helper.
+func keysOf(m map[string]any) []string {
+	if len(m) == 0 {
+		return nil
+	}
+	out := make([]string, 0, len(m))
+	for k := range m {
+		out = append(out, k)
+	}
+	return out
+}
@@ -0,0 +1,208 @@
+// Command devicemesh-mcp is a per-agent MCP server (stdio) that exposes the
+// agents_and_robots device-mesh tool catalog (exec, shell.eval, fs.*, git.*,
+// pkg.*, proc.*, docker.*) to a parent `claude -p` subprocess.
+//
+// Architecture (issue 0145):
+//
+//	claude -p
+//	  ├─ spawns this binary as child via --mcp-config
+//	  ├─ JSON-RPC over stdio
+//	  ├─ initialize / tools/list / tools/call / ping / notifications/initialized
+//	  └─ tool names exposed as `mcp__<server_name>__<tool_name>` to the model
+//
+// Flags:
+//
+//	--device-agent <URL>      required — http://host:port of the remote device_agent
+//	--mode user|sudo|all      default user — filters which builtin tools are registered
+//	--tools-allowed <csv>     optional — narrows the catalog after mode filtering
+//	--server-name <name>      default "devicemesh" — only used for logs and serverInfo
+//
+// Environment:
+//
+//	MCP_DEBUG_LOG <path>      optional — write structured logs to this file
+//	                          (stderr is reserved by claude for the MCP transport
+//	                          framing in some setups, so we prefer a file sink)
+//
+// Returns non-zero on flag parse error or stdio listen error.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"log/slog"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/mark3labs/mcp-go/server"
+
+	"github.com/enmanuel/agents/pkg/tools/devicemesh"
+)
+
+// version is overwritten via -ldflags at build time when needed. Kept simple
+// so the binary stays self-contained.
+var version = "0.1.0"
+
+func main() {
+	var (
+		deviceAgentURL string
+		mode           string
+		toolsAllowed   string
+		serverName     string
+		showVersion    bool
+	)
+
+	flag.StringVar(&deviceAgentURL, "device-agent", "", "URL of the device_agent (http://host:port). Required.")
+	flag.StringVar(&mode, "mode", "user", "Tool registration mode: user|sudo|all")
+	flag.StringVar(&toolsAllowed, "tools-allowed", "", "CSV of tool names to keep after mode filtering. Empty = keep all.")
+	flag.StringVar(&serverName, "server-name", "devicemesh", "MCP server name (used in serverInfo and log context)")
+	flag.BoolVar(&showVersion, "version", false, "Print version and exit")
+	flag.Parse()
+
+	if showVersion {
+		fmt.Fprintf(os.Stdout, "devicemesh-mcp %s\n", version)
+		return
+	}
+
+	logger := newLogger()
+	logger.Info("devicemesh-mcp starting",
+		"version", version,
+		"server_name", serverName,
+		"mode", mode,
+		"device_agent_url", deviceAgentURL,
+		"tools_allowed", toolsAllowed,
+	)
+
+	if deviceAgentURL == "" {
+		logger.Error("--device-agent is required")
+		fmt.Fprintln(os.Stderr, "fatal: --device-agent is required")
+		os.Exit(2)
+	}
+
+	// Build the per-process devicemesh registry. Mirrors the launcher's
+	// buildDeviceMeshRegistry but driven by CLI flags instead of YAML.
+	reg, err := buildRegistry(deviceAgentURL, mode, splitCSV(toolsAllowed))
+	if err != nil {
+		logger.Error("build registry failed", "err", err)
+		fmt.Fprintf(os.Stderr, "fatal: %s\n", err)
+		os.Exit(1)
+	}
+	logger.Info("registry ready", "tool_count", reg.Len(), "names", reg.Names())
+
+	// Build the MCP server, wire every devicemesh tool as an MCP tool, and
+	// serve over stdio. ServeStdio handles initialize / tools/list /
+	// tools/call / ping / notifications/initialized for us — the bridge only
+	// has to register tools.
+	srv := server.NewMCPServer(serverName, version)
+	if err := RegisterToolBridge(srv, reg, logger); err != nil {
+		logger.Error("register tool bridge failed", "err", err)
+		fmt.Fprintf(os.Stderr, "fatal: %s\n", err)
+		os.Exit(1)
+	}
+
+	logger.Info("starting stdio server")
+	if err := server.ServeStdio(srv); err != nil {
+		// Stdin EOF is the normal shutdown signal when the claude parent
+		// exits; treat it as a clean exit.
+		if isCleanShutdown(err) {
+			logger.Info("stdio server exited cleanly", "err", err)
+			return
+		}
+		logger.Error("stdio server error", "err", err)
+		fmt.Fprintf(os.Stderr, "fatal: %s\n", err)
+		os.Exit(1)
+	}
+}
+
+// buildRegistry constructs the devicemesh ToolRegistry from CLI flags. Pure
+// in the sense that it does no I/O — RegisterBuiltins + FilterByAllowed are
+// data shuffling, the HTTP transport only fires when a tool is actually
+// called via reg.Call. Exposed for tests.
+func buildRegistry(deviceAgentURL, modeStr string, allowed []string) (*devicemesh.ToolRegistry, error) {
+	client := devicemesh.NewClient(deviceAgentURL)
+	// Conservative timeout: stdio frames from claude can sit in our queue for
+	// a while while the model thinks. Per-call HTTP timeout stays at the
+	// devicemesh default (30s) which is fine for exec/shell.eval.
+	client.Timeout = 60 * time.Second
+
+	mode := parseMode(modeStr)
+	reg := devicemesh.NewToolRegistry(client)
+	names := devicemesh.RegisterBuiltins(reg, mode)
+	if len(names) == 0 {
+		return nil, fmt.Errorf("RegisterBuiltins yielded zero tools for mode=%q", modeStr)
+	}
+
+	if len(allowed) > 0 {
+		filtered := devicemesh.FilterByAllowed(reg, allowed)
+		if filtered.Len() == 0 {
+			return nil, fmt.Errorf("FilterByAllowed yielded zero tools (allowed=%v, mode=%q)", allowed, modeStr)
+		}
+		reg = filtered
+	}
+	return reg, nil
+}
+
+// parseMode maps the CLI string to a devicemesh RegistrationMode. Unknown
+// modes fall back to ModeUser (safer default).
+func parseMode(s string) devicemesh.RegistrationMode {
+	switch strings.ToLower(strings.TrimSpace(s)) {
+	case "sudo":
+		return devicemesh.ModeSudo
+	case "all":
+		return devicemesh.ModeAll
+	case "user", "":
+		return devicemesh.ModeUser
+	default:
+		return devicemesh.ModeUser
+	}
+}
+
+// splitCSV splits a comma-separated list, trims spaces, and drops empties.
+// Pure helper.
+func splitCSV(s string) []string {
+	s = strings.TrimSpace(s)
+	if s == "" {
+		return nil
+	}
+	parts := strings.Split(s, ",")
+	out := make([]string, 0, len(parts))
+	for _, p := range parts {
+		p = strings.TrimSpace(p)
+		if p != "" {
+			out = append(out, p)
+		}
+	}
+	return out
+}
+
+// newLogger builds a slog.Logger that writes to MCP_DEBUG_LOG if set, or
+// io.Discard otherwise. We avoid stdout (reserved for JSON-RPC frames) and
+// stderr (transport framing varies between MCP clients).
+func newLogger() *slog.Logger {
+	logPath := os.Getenv("MCP_DEBUG_LOG")
+	var w io.Writer = io.Discard
+	if logPath != "" {
+		f, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600)
+		if err == nil {
+			w = f
+		}
+	}
+	return slog.New(slog.NewJSONHandler(w, &slog.HandlerOptions{Level: slog.LevelDebug}))
+}
+
+// isCleanShutdown reports whether err looks like a normal stdio shutdown.
+// ServeStdio returns io.EOF / "file already closed" when the parent claude
+// exits and tears down our pipes. We don't want those to flip the exit code.
+func isCleanShutdown(err error) bool {
+	if err == nil {
+		return true
+	}
+	if err == io.EOF {
+		return true
+	}
+	msg := err.Error()
+	return strings.Contains(msg, "EOF") ||
+		strings.Contains(msg, "file already closed") ||
+		strings.Contains(msg, "use of closed")
+}