feat(0145-2,3,4): schema + launcher wiring + claude --mcp-config arg

Pieza 2 — schema (internal/config/schema.go): - DeviceMeshConfig.ExposeViaMCP *bool: pointer para distinguir "no establecido" vs "false explicito". Helper ShouldExposeViaMCP() devuelve true cuando enabled && (nil || *true). - ClaudeCodeCfg.MCPConfigPath y MCPServerName: poblados en runtime por la launcher, NUNCA por YAML. Pieza 3 — launcher wiring (devagents/mcp_bridge.go + cmd/launcher/main.go): - ApplyMCPBridge(cfg, logger): si DeviceMesh.ShouldExposeViaMCP() y provider=claude-code, resuelve binario devicemesh-mcp (junto al launcher), URL device_agent (env override > YAML), lista tools allowed (RegisterBuiltins + FilterByAllowed igual que registry_build.go), y escribe /tmp/<agent_id>-mcp-config.json (0600). - Aplica overrides a cfg.LLM.Primary.ClaudeCode: MCPConfigPath, AllowedTools (formato mcp__<server>__<tool>), DisableTools=false defensivo. - Launcher main.go llama ApplyMCPBridge inmediatamente despues de config.Load, ANTES de devagents.New (que es donde se construye el CompleteFunc del provider). Pieza 4 — claude args (shell/llm/claudecode.go): - buildClaudeArgs ahora emite "--mcp-config <path>" cuando cfg.MCPConfigPath no esta vacio. - Guard defensivo: DisableTools=true + AllowedTools no vacio ahora produce solo --allowedTools (efectivamente ignora DisableTools). El launcher ya lo previene en ApplyMCPBridge, pero esto protege a callers directos. Build limpio con goolm. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 18:28:34 +02:00
parent 15596df7e4
commit b92a350023
4 changed files with 368 additions and 1 deletions
@@ -190,6 +190,14 @@ func main() {
 					continue
 				}

+				// Issue 0145: if device_mesh is enabled on this agent, wire the
+				// MCP bridge so `claude -p` invokes our tools REALLY (via
+				// stdio JSON-RPC to bin/devicemesh-mcp) instead of imitating
+				// them as text. Mutates cfg.LLM.Primary.ClaudeCode in-place.
+				if _, ok := devagents.ApplyMCPBridge(cfg, logger); ok {
+					logger.Info("device_mesh MCP bridge wired", "agent", cfg.Agent.ID)
+				}
+
 				// Per-agent logger → writes to logs/<agent-id>/YYYY-MM-DD.jsonl
 				agentLogger, agentCleanup, aErr := agentlog.NewAgentLogger(agentlog.LoggerConfig{
 					BaseDir: logDir,
@@ -0,0 +1,312 @@
+// mcp_bridge.go — runtime wiring that makes `claude -p` invoke the
+// devicemesh tool catalog via a real MCP server instead of imitating tool
+// calls as plain text in the system prompt (issue 0145).
+//
+// What this file does, per call to ApplyMCPBridge:
+//
+//  1. Detects whether the agent has device_mesh enabled AND ExposeViaMCP.
+//  2. Resolves the path to the `bin/devicemesh-mcp` binary (same directory
+//     as the launcher executable).
+//  3. Resolves the device_agent URL (env override → YAML literal, same
+//     priority as buildDeviceMeshRegistry).
+//  4. Computes the list of tool names that should be visible to claude.
+//     This is the same list buildDeviceMeshRegistry yields, so the in-
+//     process registry and the MCP-exposed registry stay in lock-step.
+//  5. Writes the mcp-config JSON to /tmp/<agent_id>-mcp-config.json (0600).
+//     The JSON tells claude how to spawn the child process and which env
+//     vars to pass through.
+//  6. Mutates cfg.LLM.Primary.ClaudeCode so the existing claudecode.go
+//     code path picks up the bridge:
+//       - MCPConfigPath  → triggers `--mcp-config <path>`
+//       - AllowedTools   → prefixed `mcp__<server>__<tool>` so claude exposes
+//         them to the model
+//       - DisableTools   → forced false (DisableTools + AllowedTools is a
+//         contradiction that previously broke startup)
+//
+// The function is best-effort: any failure logs a warning and leaves the
+// config untouched so the agent still boots, just without the bridge.
+// Tests live in mcp_bridge_test.go.
+
+package devagents
+
+import (
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"sort"
+
+	"github.com/enmanuel/agents/internal/config"
+	devicemeshtools "github.com/enmanuel/agents/pkg/tools/devicemesh"
+)
+
+// defaultMCPServerName is what we drop into the mcpServers map when the
+// config does not override it. Surfaces in tool names as
+// `mcp__devicemesh__<tool>` on the claude side.
+const defaultMCPServerName = "devicemesh"
+
+// MCPBridgeResult is what ApplyMCPBridge returns when it actually does
+// something. Exposed so callers (and tests) can log it. When the bridge is
+// not applied (e.g. device_mesh disabled), the function returns ok=false
+// and the caller should not mutate config.
+type MCPBridgeResult struct {
+	ConfigPath   string
+	ServerName   string
+	ToolNames    []string // claude-facing names: mcp__<server>__<tool>
+	BinaryPath   string
+	DeviceAgentURL string
+}
+
+// ApplyMCPBridge wires the per-agent MCP bridge into cfg.LLM.Primary.ClaudeCode
+// when device_mesh is enabled with ExposeViaMCP. Returns (result, ok). ok=false
+// means no changes were made (the agent has no device_mesh, the user opted out,
+// or something failed and the launcher should keep going without the bridge).
+func ApplyMCPBridge(cfg *config.AgentConfig, logger *slog.Logger) (MCPBridgeResult, bool) {
+	if cfg == nil || cfg.DeviceMesh == nil {
+		return MCPBridgeResult{}, false
+	}
+	dm := cfg.DeviceMesh
+	if !dm.ShouldExposeViaMCP() {
+		logger.Debug("mcp bridge skipped: device_mesh.ShouldExposeViaMCP()=false",
+			"enabled", dm.Enabled,
+			"expose_via_mcp", dm.ExposeViaMCP,
+		)
+		return MCPBridgeResult{}, false
+	}
+	// claude-code is the only provider that knows --mcp-config. For other
+	// providers the bridge is meaningless; leave it unconfigured.
+	if cfg.LLM.Primary.Provider != "claude-code" {
+		logger.Debug("mcp bridge skipped: primary provider is not claude-code",
+			"provider", cfg.LLM.Primary.Provider,
+		)
+		return MCPBridgeResult{}, false
+	}
+
+	binPath, err := ResolveDevicemeshMCPBinary()
+	if err != nil {
+		logger.Warn("mcp bridge skipped: cannot resolve binary",
+			"err", err,
+		)
+		return MCPBridgeResult{}, false
+	}
+
+	url := ResolveDeviceAgentURL(dm)
+	if url == "" {
+		logger.Warn("mcp bridge skipped: no device_agent URL resolved",
+			"url_env", dm.URLEnv,
+			"host", dm.ResolvedHost(),
+		)
+		return MCPBridgeResult{}, false
+	}
+
+	toolNames, err := ResolveBridgedToolNames(dm)
+	if err != nil {
+		logger.Warn("mcp bridge skipped: cannot resolve bridged tools",
+			"err", err,
+		)
+		return MCPBridgeResult{}, false
+	}
+	if len(toolNames) == 0 {
+		logger.Warn("mcp bridge skipped: zero tools after filtering",
+			"mode", dm.Mode,
+			"tools_allowed", dm.ToolsAllowed,
+		)
+		return MCPBridgeResult{}, false
+	}
+
+	serverName := cfg.LLM.Primary.ClaudeCode.MCPServerName
+	if serverName == "" {
+		serverName = defaultMCPServerName
+	}
+
+	configPath, err := WriteMCPConfig(cfg.Agent.ID, serverName, binPath, url, dm.Mode, toolNames)
+	if err != nil {
+		logger.Warn("mcp bridge skipped: cannot write config",
+			"err", err,
+		)
+		return MCPBridgeResult{}, false
+	}
+
+	allowed := BuildClaudeAllowedToolNames(serverName, toolNames)
+	prev := cfg.LLM.Primary.ClaudeCode
+	cfg.LLM.Primary.ClaudeCode.MCPConfigPath = configPath
+	cfg.LLM.Primary.ClaudeCode.MCPServerName = serverName
+	cfg.LLM.Primary.ClaudeCode.AllowedTools = allowed
+	// Defensive override: DisableTools=true with a non-empty AllowedTools
+	// produces `--tools "" --allowedTools ...` which claude rejects. The
+	// bridge requires AllowedTools to win.
+	if prev.DisableTools {
+		logger.Warn("mcp bridge forcing disable_tools=false (was true) — AllowedTools takes precedence",
+			"agent_id", cfg.Agent.ID,
+		)
+		cfg.LLM.Primary.ClaudeCode.DisableTools = false
+	}
+
+	result := MCPBridgeResult{
+		ConfigPath:     configPath,
+		ServerName:     serverName,
+		ToolNames:      allowed,
+		BinaryPath:     binPath,
+		DeviceAgentURL: url,
+	}
+	logger.Info("mcp bridge applied",
+		"agent_id", cfg.Agent.ID,
+		"config_path", configPath,
+		"binary", binPath,
+		"server_name", serverName,
+		"device_agent_url", url,
+		"tool_count", len(allowed),
+		"tool_names", allowed,
+	)
+	return result, true
+}
+
+// ResolveDevicemeshMCPBinary returns the absolute path to the
+// `devicemesh-mcp` executable. Strategy:
+//
+//  1. Same directory as os.Executable() (cmd/launcher/main.go → bin/launcher
+//     and bin/devicemesh-mcp ship together).
+//  2. If (1) does not exist, fall back to "bin/devicemesh-mcp" relative to
+//     CWD (covers `go run` / test scenarios).
+//  3. If neither exists, return an error.
+//
+// Pure-ish — os.Executable + os.Stat are read-only.
+func ResolveDevicemeshMCPBinary() (string, error) {
+	if exe, err := os.Executable(); err == nil {
+		dir := filepath.Dir(exe)
+		candidate := filepath.Join(dir, "devicemesh-mcp")
+		if st, err := os.Stat(candidate); err == nil && !st.IsDir() {
+			return candidate, nil
+		}
+	}
+	// Fallback: CWD/bin/devicemesh-mcp. Useful for tests and `go run` from
+	// the repo root.
+	candidate, err := filepath.Abs("bin/devicemesh-mcp")
+	if err == nil {
+		if st, err := os.Stat(candidate); err == nil && !st.IsDir() {
+			return candidate, nil
+		}
+	}
+	return "", fmt.Errorf("devicemesh-mcp binary not found (looked next to launcher and at bin/devicemesh-mcp)")
+}
+
+// ResolveDeviceAgentURL applies the env override on top of the YAML
+// literal. Same precedence as devagents.buildDeviceMeshRegistry so the
+// in-process registry and the MCP bridge never disagree about which device
+// they're talking to.
+func ResolveDeviceAgentURL(dm *config.DeviceMeshConfig) string {
+	if dm == nil {
+		return ""
+	}
+	url := dm.DeviceAgentURL
+	if dm.URLEnv != "" {
+		if v := os.Getenv(dm.URLEnv); v != "" {
+			url = v
+		}
+	}
+	return url
+}
+
+// ResolveBridgedToolNames returns the tool names that should be exposed
+// through the MCP bridge. Reuses RegisterBuiltins + FilterByAllowed so we
+// don't drift from the in-process behaviour.
+func ResolveBridgedToolNames(dm *config.DeviceMeshConfig) ([]string, error) {
+	if dm == nil {
+		return nil, fmt.Errorf("nil DeviceMeshConfig")
+	}
+	mode := normalizeMeshMode(dm.Mode)
+	reg := devicemeshtools.NewToolRegistry(nil) // no client needed — pure registration
+	names := devicemeshtools.RegisterBuiltins(reg, mode)
+	if len(dm.ToolsAllowed) > 0 {
+		filtered := devicemeshtools.FilterByAllowed(reg, dm.ToolsAllowed)
+		reg = filtered
+		// Recompute names from the filtered registry.
+		names = reg.Names()
+	}
+	_ = names // names was set above only when no filter; reg.Names() reflects current state
+	return reg.Names(), nil
+}
+
+// BuildClaudeAllowedToolNames takes raw devicemesh tool names and prefixes
+// them with `mcp__<server_name>__`, matching the format claude exposes to
+// the model. Sorted output for deterministic logging.
+func BuildClaudeAllowedToolNames(serverName string, raw []string) []string {
+	if serverName == "" {
+		serverName = defaultMCPServerName
+	}
+	out := make([]string, 0, len(raw))
+	for _, n := range raw {
+		out = append(out, fmt.Sprintf("mcp__%s__%s", serverName, n))
+	}
+	sort.Strings(out)
+	return out
+}
+
+// WriteMCPConfig serialises the mcpServers JSON document and writes it to
+// /tmp/<agent_id>-mcp-config.json with mode 0600. Returns the absolute
+// path so the caller can hand it to claude -p --mcp-config.
+//
+// The serialised shape matches the schema claude-code accepts:
+//
+//	{
+//	  "mcpServers": {
+//	    "<server_name>": {
+//	      "command": "<binary path>",
+//	      "args": ["--device-agent", "<url>", "--mode", "<mode>",
+//	               "--tools-allowed", "<csv>", "--server-name", "<name>"],
+//	      "env": {"MCP_DEBUG_LOG": "/tmp/<agent_id>-mcp.log"}
+//	    }
+//	  }
+//	}
+func WriteMCPConfig(agentID, serverName, binPath, deviceAgentURL, mode string, toolNames []string) (string, error) {
+	if agentID == "" {
+		return "", fmt.Errorf("agent_id is empty")
+	}
+	if binPath == "" {
+		return "", fmt.Errorf("binPath is empty")
+	}
+	args := []string{"--device-agent", deviceAgentURL}
+	if mode != "" {
+		args = append(args, "--mode", mode)
+	}
+	if len(toolNames) > 0 {
+		args = append(args, "--tools-allowed", joinCSV(toolNames))
+	}
+	args = append(args, "--server-name", serverName)
+
+	logFile := fmt.Sprintf("/tmp/%s-mcp.log", agentID)
+	doc := map[string]any{
+		"mcpServers": map[string]any{
+			serverName: map[string]any{
+				"command": binPath,
+				"args":    args,
+				"env": map[string]any{
+					"MCP_DEBUG_LOG": logFile,
+				},
+			},
+		},
+	}
+	raw, err := json.MarshalIndent(doc, "", "  ")
+	if err != nil {
+		return "", fmt.Errorf("marshal mcp config: %w", err)
+	}
+	path := fmt.Sprintf("/tmp/%s-mcp-config.json", agentID)
+	if err := os.WriteFile(path, raw, 0o600); err != nil {
+		return "", fmt.Errorf("write %s: %w", path, err)
+	}
+	return path, nil
+}
+
+// joinCSV is a tiny helper that turns a slice into a comma-separated string.
+// Empty slice → empty string. Pure.
+func joinCSV(parts []string) string {
+	out := ""
+	for i, p := range parts {
+		if i > 0 {
+			out += ","
+		}
+		out += p
+	}
+	return out
+}
@@ -78,6 +78,27 @@ type DeviceMeshConfig struct {
 	// client_timeout_s; we accept both. When both set, ClientTimeoutS wins
 	// when non-zero.
 	ClientTimeoutS int `yaml:"client_timeout_s,omitempty"`
+
+	// ExposeViaMCP gates the MCP bridge (issue 0145). When the field is
+	// absent from YAML, the launcher defaults to "expose" (true) so an
+	// agent with device_mesh.enabled=true gets the bridge for free. The
+	// pointer shape lets us distinguish "unset" from "explicitly false";
+	// use ShouldExposeViaMCP() to read it.
+	ExposeViaMCP *bool `yaml:"expose_via_mcp,omitempty"`
+}
+
+// ShouldExposeViaMCP reports whether the launcher must build the MCP bridge
+// for this device-mesh block. Returns false when the block is nil or not
+// enabled; otherwise returns true unless ExposeViaMCP is explicitly false.
+// Pure function — used by both the launcher and tests.
+func (d *DeviceMeshConfig) ShouldExposeViaMCP() bool {
+	if d == nil || !d.Enabled {
+		return false
+	}
+	if d.ExposeViaMCP != nil {
+		return *d.ExposeViaMCP
+	}
+	return true
 }

 // ResolvedHost returns Host if non-empty, otherwise DeviceID. Used by the
@@ -211,6 +232,18 @@ type ClaudeCodeCfg struct {
 	AddDirs          []string      `yaml:"add_dirs"`            // additional directories accessible
 	Streaming        bool          `yaml:"streaming"`           // use --output-format stream-json for realtime progress
 	ShowToolProgress bool          `yaml:"show_tool_progress"`  // edit Matrix message to show tool usage progress
+
+	// MCPConfigPath points to a JSON file consumed by `claude -p --mcp-config`.
+	// Set at runtime by the launcher (issue 0145) when the agent has
+	// device_mesh.enabled=true and ExposeViaMCP. Empty means claude runs
+	// without external MCP servers. NEVER set in YAML — overrides the
+	// runtime-generated bridge.
+	MCPConfigPath string `yaml:"mcp_config_path,omitempty"`
+
+	// MCPServerName is the key inside the mcp-config JSON's "mcpServers"
+	// map. claude prefixes tool names exposed to the model as
+	// `mcp__<MCPServerName>__<tool>`. Defaults to "devicemesh" when empty.
+	MCPServerName string `yaml:"mcp_server_name,omitempty"`
 }

 type LLMReasoningCfg struct {
@@ -449,7 +449,21 @@ func buildClaudeArgs(cfg config.ClaudeCodeCfg, req coretypes.CompletionRequest)
 		args = append(args, "--system-prompt", req.SystemPrompt)
 	}

-	if cfg.DisableTools {
+	// Issue 0145: --mcp-config tells claude where to find external MCP
+	// servers (per-agent devicemesh bridge). Must come BEFORE --allowedTools
+	// because the allowed list usually references `mcp__<server>__<tool>`
+	// names that only exist once the MCP config is loaded.
+	if cfg.MCPConfigPath != "" {
+		args = append(args, "--mcp-config", cfg.MCPConfigPath)
+	}
+
+	// Defensive: DisableTools=true plus a non-empty AllowedTools is a
+	// contradiction. The launcher's ApplyMCPBridge already forces
+	// DisableTools=false in that case, but this guard keeps direct callers
+	// safe too.
+	effectiveDisableTools := cfg.DisableTools && len(cfg.AllowedTools) == 0
+
+	if effectiveDisableTools {
 		args = append(args, "--tools", "")
 	} else {
 		if len(cfg.AllowedTools) > 0 {