diff --git a/cmd/launcher/main.go b/cmd/launcher/main.go index fdfe2bc..35de9aa 100644 --- a/cmd/launcher/main.go +++ b/cmd/launcher/main.go @@ -190,6 +190,14 @@ func main() { continue } + // Issue 0145: if device_mesh is enabled on this agent, wire the + // MCP bridge so `claude -p` invokes our tools REALLY (via + // stdio JSON-RPC to bin/devicemesh-mcp) instead of imitating + // them as text. Mutates cfg.LLM.Primary.ClaudeCode in-place. + if _, ok := devagents.ApplyMCPBridge(cfg, logger); ok { + logger.Info("device_mesh MCP bridge wired", "agent", cfg.Agent.ID) + } + // Per-agent logger → writes to logs//YYYY-MM-DD.jsonl agentLogger, agentCleanup, aErr := agentlog.NewAgentLogger(agentlog.LoggerConfig{ BaseDir: logDir, diff --git a/devagents/mcp_bridge.go b/devagents/mcp_bridge.go new file mode 100644 index 0000000..9c00ec3 --- /dev/null +++ b/devagents/mcp_bridge.go @@ -0,0 +1,312 @@ +// mcp_bridge.go — runtime wiring that makes `claude -p` invoke the +// devicemesh tool catalog via a real MCP server instead of imitating tool +// calls as plain text in the system prompt (issue 0145). +// +// What this file does, per call to ApplyMCPBridge: +// +// 1. Detects whether the agent has device_mesh enabled AND ExposeViaMCP. +// 2. Resolves the path to the `bin/devicemesh-mcp` binary (same directory +// as the launcher executable). +// 3. Resolves the device_agent URL (env override → YAML literal, same +// priority as buildDeviceMeshRegistry). +// 4. Computes the list of tool names that should be visible to claude. +// This is the same list buildDeviceMeshRegistry yields, so the in- +// process registry and the MCP-exposed registry stay in lock-step. +// 5. Writes the mcp-config JSON to /tmp/-mcp-config.json (0600). +// The JSON tells claude how to spawn the child process and which env +// vars to pass through. +// 6. Mutates cfg.LLM.Primary.ClaudeCode so the existing claudecode.go +// code path picks up the bridge: +// - MCPConfigPath → triggers `--mcp-config ` +// - AllowedTools → prefixed `mcp____` so claude exposes +// them to the model +// - DisableTools → forced false (DisableTools + AllowedTools is a +// contradiction that previously broke startup) +// +// The function is best-effort: any failure logs a warning and leaves the +// config untouched so the agent still boots, just without the bridge. +// Tests live in mcp_bridge_test.go. + +package devagents + +import ( + "encoding/json" + "fmt" + "log/slog" + "os" + "path/filepath" + "sort" + + "github.com/enmanuel/agents/internal/config" + devicemeshtools "github.com/enmanuel/agents/pkg/tools/devicemesh" +) + +// defaultMCPServerName is what we drop into the mcpServers map when the +// config does not override it. Surfaces in tool names as +// `mcp__devicemesh__` on the claude side. +const defaultMCPServerName = "devicemesh" + +// MCPBridgeResult is what ApplyMCPBridge returns when it actually does +// something. Exposed so callers (and tests) can log it. When the bridge is +// not applied (e.g. device_mesh disabled), the function returns ok=false +// and the caller should not mutate config. +type MCPBridgeResult struct { + ConfigPath string + ServerName string + ToolNames []string // claude-facing names: mcp____ + BinaryPath string + DeviceAgentURL string +} + +// ApplyMCPBridge wires the per-agent MCP bridge into cfg.LLM.Primary.ClaudeCode +// when device_mesh is enabled with ExposeViaMCP. Returns (result, ok). ok=false +// means no changes were made (the agent has no device_mesh, the user opted out, +// or something failed and the launcher should keep going without the bridge). +func ApplyMCPBridge(cfg *config.AgentConfig, logger *slog.Logger) (MCPBridgeResult, bool) { + if cfg == nil || cfg.DeviceMesh == nil { + return MCPBridgeResult{}, false + } + dm := cfg.DeviceMesh + if !dm.ShouldExposeViaMCP() { + logger.Debug("mcp bridge skipped: device_mesh.ShouldExposeViaMCP()=false", + "enabled", dm.Enabled, + "expose_via_mcp", dm.ExposeViaMCP, + ) + return MCPBridgeResult{}, false + } + // claude-code is the only provider that knows --mcp-config. For other + // providers the bridge is meaningless; leave it unconfigured. + if cfg.LLM.Primary.Provider != "claude-code" { + logger.Debug("mcp bridge skipped: primary provider is not claude-code", + "provider", cfg.LLM.Primary.Provider, + ) + return MCPBridgeResult{}, false + } + + binPath, err := ResolveDevicemeshMCPBinary() + if err != nil { + logger.Warn("mcp bridge skipped: cannot resolve binary", + "err", err, + ) + return MCPBridgeResult{}, false + } + + url := ResolveDeviceAgentURL(dm) + if url == "" { + logger.Warn("mcp bridge skipped: no device_agent URL resolved", + "url_env", dm.URLEnv, + "host", dm.ResolvedHost(), + ) + return MCPBridgeResult{}, false + } + + toolNames, err := ResolveBridgedToolNames(dm) + if err != nil { + logger.Warn("mcp bridge skipped: cannot resolve bridged tools", + "err", err, + ) + return MCPBridgeResult{}, false + } + if len(toolNames) == 0 { + logger.Warn("mcp bridge skipped: zero tools after filtering", + "mode", dm.Mode, + "tools_allowed", dm.ToolsAllowed, + ) + return MCPBridgeResult{}, false + } + + serverName := cfg.LLM.Primary.ClaudeCode.MCPServerName + if serverName == "" { + serverName = defaultMCPServerName + } + + configPath, err := WriteMCPConfig(cfg.Agent.ID, serverName, binPath, url, dm.Mode, toolNames) + if err != nil { + logger.Warn("mcp bridge skipped: cannot write config", + "err", err, + ) + return MCPBridgeResult{}, false + } + + allowed := BuildClaudeAllowedToolNames(serverName, toolNames) + prev := cfg.LLM.Primary.ClaudeCode + cfg.LLM.Primary.ClaudeCode.MCPConfigPath = configPath + cfg.LLM.Primary.ClaudeCode.MCPServerName = serverName + cfg.LLM.Primary.ClaudeCode.AllowedTools = allowed + // Defensive override: DisableTools=true with a non-empty AllowedTools + // produces `--tools "" --allowedTools ...` which claude rejects. The + // bridge requires AllowedTools to win. + if prev.DisableTools { + logger.Warn("mcp bridge forcing disable_tools=false (was true) — AllowedTools takes precedence", + "agent_id", cfg.Agent.ID, + ) + cfg.LLM.Primary.ClaudeCode.DisableTools = false + } + + result := MCPBridgeResult{ + ConfigPath: configPath, + ServerName: serverName, + ToolNames: allowed, + BinaryPath: binPath, + DeviceAgentURL: url, + } + logger.Info("mcp bridge applied", + "agent_id", cfg.Agent.ID, + "config_path", configPath, + "binary", binPath, + "server_name", serverName, + "device_agent_url", url, + "tool_count", len(allowed), + "tool_names", allowed, + ) + return result, true +} + +// ResolveDevicemeshMCPBinary returns the absolute path to the +// `devicemesh-mcp` executable. Strategy: +// +// 1. Same directory as os.Executable() (cmd/launcher/main.go → bin/launcher +// and bin/devicemesh-mcp ship together). +// 2. If (1) does not exist, fall back to "bin/devicemesh-mcp" relative to +// CWD (covers `go run` / test scenarios). +// 3. If neither exists, return an error. +// +// Pure-ish — os.Executable + os.Stat are read-only. +func ResolveDevicemeshMCPBinary() (string, error) { + if exe, err := os.Executable(); err == nil { + dir := filepath.Dir(exe) + candidate := filepath.Join(dir, "devicemesh-mcp") + if st, err := os.Stat(candidate); err == nil && !st.IsDir() { + return candidate, nil + } + } + // Fallback: CWD/bin/devicemesh-mcp. Useful for tests and `go run` from + // the repo root. + candidate, err := filepath.Abs("bin/devicemesh-mcp") + if err == nil { + if st, err := os.Stat(candidate); err == nil && !st.IsDir() { + return candidate, nil + } + } + return "", fmt.Errorf("devicemesh-mcp binary not found (looked next to launcher and at bin/devicemesh-mcp)") +} + +// ResolveDeviceAgentURL applies the env override on top of the YAML +// literal. Same precedence as devagents.buildDeviceMeshRegistry so the +// in-process registry and the MCP bridge never disagree about which device +// they're talking to. +func ResolveDeviceAgentURL(dm *config.DeviceMeshConfig) string { + if dm == nil { + return "" + } + url := dm.DeviceAgentURL + if dm.URLEnv != "" { + if v := os.Getenv(dm.URLEnv); v != "" { + url = v + } + } + return url +} + +// ResolveBridgedToolNames returns the tool names that should be exposed +// through the MCP bridge. Reuses RegisterBuiltins + FilterByAllowed so we +// don't drift from the in-process behaviour. +func ResolveBridgedToolNames(dm *config.DeviceMeshConfig) ([]string, error) { + if dm == nil { + return nil, fmt.Errorf("nil DeviceMeshConfig") + } + mode := normalizeMeshMode(dm.Mode) + reg := devicemeshtools.NewToolRegistry(nil) // no client needed — pure registration + names := devicemeshtools.RegisterBuiltins(reg, mode) + if len(dm.ToolsAllowed) > 0 { + filtered := devicemeshtools.FilterByAllowed(reg, dm.ToolsAllowed) + reg = filtered + // Recompute names from the filtered registry. + names = reg.Names() + } + _ = names // names was set above only when no filter; reg.Names() reflects current state + return reg.Names(), nil +} + +// BuildClaudeAllowedToolNames takes raw devicemesh tool names and prefixes +// them with `mcp____`, matching the format claude exposes to +// the model. Sorted output for deterministic logging. +func BuildClaudeAllowedToolNames(serverName string, raw []string) []string { + if serverName == "" { + serverName = defaultMCPServerName + } + out := make([]string, 0, len(raw)) + for _, n := range raw { + out = append(out, fmt.Sprintf("mcp__%s__%s", serverName, n)) + } + sort.Strings(out) + return out +} + +// WriteMCPConfig serialises the mcpServers JSON document and writes it to +// /tmp/-mcp-config.json with mode 0600. Returns the absolute +// path so the caller can hand it to claude -p --mcp-config. +// +// The serialised shape matches the schema claude-code accepts: +// +// { +// "mcpServers": { +// "": { +// "command": "", +// "args": ["--device-agent", "", "--mode", "", +// "--tools-allowed", "", "--server-name", ""], +// "env": {"MCP_DEBUG_LOG": "/tmp/-mcp.log"} +// } +// } +// } +func WriteMCPConfig(agentID, serverName, binPath, deviceAgentURL, mode string, toolNames []string) (string, error) { + if agentID == "" { + return "", fmt.Errorf("agent_id is empty") + } + if binPath == "" { + return "", fmt.Errorf("binPath is empty") + } + args := []string{"--device-agent", deviceAgentURL} + if mode != "" { + args = append(args, "--mode", mode) + } + if len(toolNames) > 0 { + args = append(args, "--tools-allowed", joinCSV(toolNames)) + } + args = append(args, "--server-name", serverName) + + logFile := fmt.Sprintf("/tmp/%s-mcp.log", agentID) + doc := map[string]any{ + "mcpServers": map[string]any{ + serverName: map[string]any{ + "command": binPath, + "args": args, + "env": map[string]any{ + "MCP_DEBUG_LOG": logFile, + }, + }, + }, + } + raw, err := json.MarshalIndent(doc, "", " ") + if err != nil { + return "", fmt.Errorf("marshal mcp config: %w", err) + } + path := fmt.Sprintf("/tmp/%s-mcp-config.json", agentID) + if err := os.WriteFile(path, raw, 0o600); err != nil { + return "", fmt.Errorf("write %s: %w", path, err) + } + return path, nil +} + +// joinCSV is a tiny helper that turns a slice into a comma-separated string. +// Empty slice → empty string. Pure. +func joinCSV(parts []string) string { + out := "" + for i, p := range parts { + if i > 0 { + out += "," + } + out += p + } + return out +} diff --git a/internal/config/schema.go b/internal/config/schema.go index 11051c7..82a0efd 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -78,6 +78,27 @@ type DeviceMeshConfig struct { // client_timeout_s; we accept both. When both set, ClientTimeoutS wins // when non-zero. ClientTimeoutS int `yaml:"client_timeout_s,omitempty"` + + // ExposeViaMCP gates the MCP bridge (issue 0145). When the field is + // absent from YAML, the launcher defaults to "expose" (true) so an + // agent with device_mesh.enabled=true gets the bridge for free. The + // pointer shape lets us distinguish "unset" from "explicitly false"; + // use ShouldExposeViaMCP() to read it. + ExposeViaMCP *bool `yaml:"expose_via_mcp,omitempty"` +} + +// ShouldExposeViaMCP reports whether the launcher must build the MCP bridge +// for this device-mesh block. Returns false when the block is nil or not +// enabled; otherwise returns true unless ExposeViaMCP is explicitly false. +// Pure function — used by both the launcher and tests. +func (d *DeviceMeshConfig) ShouldExposeViaMCP() bool { + if d == nil || !d.Enabled { + return false + } + if d.ExposeViaMCP != nil { + return *d.ExposeViaMCP + } + return true } // ResolvedHost returns Host if non-empty, otherwise DeviceID. Used by the @@ -211,6 +232,18 @@ type ClaudeCodeCfg struct { AddDirs []string `yaml:"add_dirs"` // additional directories accessible Streaming bool `yaml:"streaming"` // use --output-format stream-json for realtime progress ShowToolProgress bool `yaml:"show_tool_progress"` // edit Matrix message to show tool usage progress + + // MCPConfigPath points to a JSON file consumed by `claude -p --mcp-config`. + // Set at runtime by the launcher (issue 0145) when the agent has + // device_mesh.enabled=true and ExposeViaMCP. Empty means claude runs + // without external MCP servers. NEVER set in YAML — overrides the + // runtime-generated bridge. + MCPConfigPath string `yaml:"mcp_config_path,omitempty"` + + // MCPServerName is the key inside the mcp-config JSON's "mcpServers" + // map. claude prefixes tool names exposed to the model as + // `mcp____`. Defaults to "devicemesh" when empty. + MCPServerName string `yaml:"mcp_server_name,omitempty"` } type LLMReasoningCfg struct { diff --git a/shell/llm/claudecode.go b/shell/llm/claudecode.go index a66c74f..c51f084 100644 --- a/shell/llm/claudecode.go +++ b/shell/llm/claudecode.go @@ -449,7 +449,21 @@ func buildClaudeArgs(cfg config.ClaudeCodeCfg, req coretypes.CompletionRequest) args = append(args, "--system-prompt", req.SystemPrompt) } - if cfg.DisableTools { + // Issue 0145: --mcp-config tells claude where to find external MCP + // servers (per-agent devicemesh bridge). Must come BEFORE --allowedTools + // because the allowed list usually references `mcp____` + // names that only exist once the MCP config is loaded. + if cfg.MCPConfigPath != "" { + args = append(args, "--mcp-config", cfg.MCPConfigPath) + } + + // Defensive: DisableTools=true plus a non-empty AllowedTools is a + // contradiction. The launcher's ApplyMCPBridge already forces + // DisableTools=false in that case, but this guard keeps direct callers + // safe too. + effectiveDisableTools := cfg.DisableTools && len(cfg.AllowedTools) == 0 + + if effectiveDisableTools { args = append(args, "--tools", "") } else { if len(cfg.AllowedTools) > 0 {