feat(0144a): tool registry framework para device-mesh

Anade pkg/tools/devicemesh con Client HTTP al device_agent + ToolRegistry con 16 tools standard (exec, fs.*, git.*, docker.*, proc.*, pkg.*, shell.eval). RegisterBuiltins filtra por mode user/sudo via RequiresApproval flag. Hook al pkg/decision con ActionKindDeviceMesh + DeviceMeshAction. Runner soporta dispatch via NewRunnerWithDeviceMesh (back-compat NewRunner). Tests: 25 nuevos en devicemesh + 4 en runner. Build clean.
2026-05-24 14:07:13 +02:00
parent 71b3b2bca9
commit bcd246bf85
14 changed files with 3080 additions and 3 deletions
@@ -0,0 +1,24 @@
+// devicemesh.go: pure data type for "call a device mesh tool" actions.
+//
+// The runtime decides which agent has which tool registry (user vs sudo).
+// The decision layer only describes *what* to call; the runner in
+// shell/effects/ resolves the registry and dispatches.
+package decision
+
+// DeviceMeshAction describes an invocation of a registered devicemesh tool.
+// It is a pure value — no client, no registry, just the name + input.
+//
+// Fields:
+//
+//   - Tool: the registered tool name in the agent's devicemesh.ToolRegistry
+//     (ex "exec", "fs.read", "fs.write").
+//   - Input: LLM-supplied arguments. Will be validated by the registry
+//     before reaching the network.
+//   - ResultKey: optional. The runtime stores the tool result under this key
+//     in the conversation state so the LLM can refer to it later. Empty
+//     string means "do not store, just send back as a tool message".
+type DeviceMeshAction struct {
+	Tool      string
+	Input     map[string]any
+	ResultKey string
+}
@@ -31,6 +31,7 @@ const (
 	ActionKindMCP     ActionKind = "mcp"
 	ActionKindLLM     ActionKind = "llm"
 	ActionKindDelegate ActionKind = "delegate"
+	ActionKindDeviceMesh ActionKind = "device_mesh"
 )

 // Action is a pure description of what the shell should do.
@@ -45,6 +46,7 @@ type Action struct {
 	MCP      *tools.MCPCallSpec
 	LLM      *LLMAction
 	Delegate *DelegateAction
+	DeviceMesh *DeviceMeshAction
 }

 type ReplyAction struct {
@@ -0,0 +1,199 @@
+# pkg/tools/devicemesh
+
+Tool registry framework that lets an LLM agent in `agents_and_robots` (VPS) call capabilities exposed by a remote `device_agent` over the WireGuard mesh.
+
+Issue: [0144a](../../../dev/issues/0144-agent-per-machine-llm.md) (POC for the broader 0144 spec).
+
+## What it does
+
+```
+LLM (Claude)
+  │  tool_call exec {argv:["ls","/tmp"]}
+  ▼
+ToolRegistry.Call("exec", input)
+  │  1. ValidateInput against tool's InputSchema
+  │  2. ArgMapping(input) → device-facing args
+  │  3. Client.Call(CapabilityRequest{capability: "shell.exec", args})
+  │  4. ResultMapping(resp.Result) → LLM-facing output
+  ▼
+HTTP POST http://10.42.0.10:7474/capability   (over mesh WG)
+  ▼
+device_agent on home-wsl runs the binary, returns audit_hash + result
+```
+
+The LLM never sees the HTTP layer; it sees a flat list of named tools with JSON-Schema inputs.
+
+## Pieces
+
+| File | Purpose |
+|---|---|
+| `client.go` | HTTP client to `POST /capability` and `GET /health` of the remote `device_agent`. Generates `request_id` (req_<12bytehex>) and `nonce` (16 random bytes base64) when missing. |
+| `types.go` | `ToolSpec` + `ToolRegistry`. Thread-safe registry, `Call` is the single dispatch entry point. |
+| `schema.go` | Mini JSON-Schema validator (object/array/string/integer/number/boolean + required + additionalProperties + enum). Enough to reject LLM mistakes without pulling a heavy dep. |
+| `tools_builtin.go` | The standard catalog: exec, shell.eval, fs.read, fs.write, fs.list, fs.stat, git.clone, git.commit, git.push, pkg.install, pkg.search, proc.list, proc.kill, docker.list, docker.exec, docker.logs. `RegisterBuiltins(reg, ModeUser|ModeSudo|ModeAll)` filters by `RequiresApproval`. `shell.eval` is special-cased to be registered in BOTH modes, with `RequiresApproval=true` forced in `ModeSudo` via `withApprovalRequired`. |
+
+## How to register a new tool
+
+```go
+import "github.com/enmanuel/agents/pkg/tools/devicemesh"
+
+reg.Register(devicemesh.ToolSpec{
+    Name:        "screenshot",
+    Description: "Capture the display on the remote device. Returns PNG base64.",
+    Capability:  "display.capture",
+    InputSchema: map[string]any{
+        "type":                 "object",
+        "additionalProperties": false,
+        "properties": map[string]any{
+            "format": map[string]any{"type": "string", "enum": []any{"png", "jpeg"}},
+        },
+    },
+    ArgMapping: func(in map[string]any) (map[string]any, error) {
+        // pure transform LLM → device
+        return in, nil
+    },
+    ResultMapping: func(r map[string]any) (any, error) {
+        // pure transform device → LLM
+        return r, nil
+    },
+    RequiresApproval: false, // user-scope
+})
+```
+
+Then add the tool name to `cfg.DeviceMesh.ToolsAllowed` in the agent's `config.yaml`.
+
+## Wiring (issue 0144c — done)
+
+The launcher now constructs the device mesh registry from `cfg.DeviceMesh` and surfaces every spec as a regular `tools.Tool` consumed by the existing LLM tool-use loop. No special LLM path; the LLM does not know (or care) that the tool's `Exec` ends up making an HTTP call over WireGuard.
+
+```
+config.AgentConfig.DeviceMesh (yaml block)
+    │
+    ▼  buildDeviceMeshRegistry(cfg, logger)   ← devagents/registry_build.go
+    │   1. resolve URL (env var override wins when present + non-empty)
+    │   2. NewClient(url) + apply Timeout
+    │   3. RegisterBuiltins(reg, mode)        ← user | sudo | all
+    │   4. FilterByAllowed(reg, tools_allowed)
+    │
+    ▼  devicemesh.ToolsForLLM(reg)            ← pkg/tools/devicemesh/adapter.go
+    │   1 tools.Tool per spec; Def.Parameters
+    │   compressed from JSON-Schema; Exec
+    │   closure routes through reg.Call
+    │
+    ▼  tools.Registry.Register(...)           ← devagents/registry_build.go
+    │
+    ▼  devagents/llm.go runLLM tool-use loop  ← unchanged
+```
+
+The same `*ToolRegistry` is also passed to `effects.NewRunnerWithDeviceMesh` so any rule that emits `decision.ActionKindDeviceMesh` (orchestrator pipelines, `!exec` builtin command, etc.) hits the same dispatcher. Both paths produce the same JSON envelope, so audit chains line up regardless of where the call originated.
+
+### Config block
+
+The agent's `config.yaml` opts in via:
+
+```yaml
+device_mesh:
+  enabled: true
+  device_id: home-wsl                # logged as audit context; aliased as "host"
+  mode: user                         # user | sudo | all
+  device_agent_url: "http://10.42.0.10:7474"
+  device_agent_url_env: AGENT_HOME_WSL_DEVICE_MESH_URL  # optional; wins when set + non-empty
+  manifest_id: manifest_home-wsl_v1  # metadata only; the device enforces
+  client_timeout_s: 60               # aliased as "timeout_seconds"
+  tools_allowed:                     # whitelist; empty = keep everything mode allowed
+    - exec
+    - fs.read
+    - fs.list
+```
+
+Names in `tools_allowed` that the catalog does not provide are logged with a `WARN device_mesh tools_allowed lists unknown tool` and dropped. The template ships extras like `project.create`, `memory.recall`, etc. that arrive in 0144d/e — they degrade gracefully today.
+
+### LLM-side view of a device tool
+
+The adapter compresses the device-mesh `InputSchema` into the flatter `tools.Def.Parameters` shape (each top-level property becomes one `tools.Param`). The description is enriched with a stable marker so the model can spot remote tools at a glance:
+
+```
+exec  →  "Execute a command on the remote device. argv is parsed as exec.Command (NO shell). ... [device_mesh: shell.exec]"
+pkg.install  →  "Install an OS package ... [device_mesh: pkg.install] (approval required)"
+```
+
+When `RequiresApproval=true`, the marker also reminds the model the call may be queued, which feeds back into the system prompt rules of `agent-<host>-sudo`.
+
+### Approval flow + LLM tool-result mapping
+
+When the device_agent returns `approval_status="queued"` and the operator does not click 👍 within the timeout (0134 §6.5), the device returns `approval_status="timeout"` or `ok=false, error="approval_required"`. The adapter does NOT silence this — it surfaces the error verbatim:
+
+```
+ToolRegistry.Call(...) → returns err = "devicemesh: shell.exec: approval_required"
+tools.Result{Err: err}
+runLLM → appends `role='tool'` message with `error: devicemesh: shell.exec: approval_required`
+LLM next iteration → can apologize to operator and ask for retry.
+```
+
+The actual approval UX (operator clicks 👍 in `#operator-approvals`) is the device_agent's responsibility (issue 0134 §6, validated end-to-end in flow 0009). Nothing new on the agents_and_robots side.
+
+### What this issue does NOT do
+
+- **Matrix-side approval rendering** is 0144f — `!preapprove`, `!approve req_id`, pre-approval cache.
+- **ed25519 manifest signing** is 0144h — today the wire format is correct but unsigned.
+- **`call_monitor` telemetry hook** that emits `function_id = capability_<name>_<lang>_<domain>` per call is 0144 §13 (separate plumbing in the audit writer).
+- **Cross-room correlation** (`delegate_sudo` posting to `#<host>-sudo` and the bot copying the reply back) is its own issue (0144 main spec §3.3 + 0144c original plan — left intentionally for the room/bus layer once approval is wired).
+
+## shell.eval — the powerful tool
+
+`shell.eval` is the **only** built-in tool that lets the LLM execute arbitrary free-form shell text on the device. Every other tool has a tightly-scoped JSON schema (paths, argv lists, container ids); `shell.eval` accepts a single string that the device hands to bash (Linux/WSL) or PowerShell (Windows) unmodified.
+
+It exists because no structured tool can cover every legal shell idiom: pipes, redirects, here-docs, `$()` expansions, complex globs, environment-aware composition. Without `shell.eval`, the LLM resorts to multi-step `exec` chains that lose fidelity (no shell metacharacters allowed in `exec`'s `argv`). With it, the LLM can ask for "give me the size of every `.log` in `/var/log` sorted desc" in one round-trip.
+
+### Guardrails (all device-side)
+
+The flag on `ToolSpec.RequiresApproval` is metadata only. The real protections live in the `device_agent`:
+
+1. **Hardcoded blocklist** — destructive patterns (`rm -rf /`, `dd if=/dev/...`, `mkfs`, fork-bombs `:(){:|:&};:`, `shutdown`, `reboot`, `:>/dev/sda`, ...) always reject regardless of agent role or operator. There is no override.
+2. **Auto-approve whitelist** — read-only / inspection patterns (`^git `, `^ls `, `^cat `, `^grep `, `^ps `, `^uptime`, `^df `, ...) execute directly without operator prompt. The whitelist lives in the device manifest, not here.
+3. **Operator approval** — anything that is neither blocked nor auto-approved returns `approval_status="queued"` in the result. The device sends an approval request to `#operator-approvals` in Element and waits up to 60s for the operator to confirm; on timeout the call returns `approval_status="timeout"` and the LLM must reword or `!retry`.
+
+The fields the LLM gets back from `shell.eval`: `stdout`, `stderr`, `exit_code`, `approval_status`, `cmd_executed` (post-normalization), `truncated` (true if output was capped), `duration_ms`.
+
+### When the LLM should call shell.eval
+
+Use it as the **fallback** for cases none of the structured tools cover:
+
+- Pipes, redirects, sub-shells, here-docs.
+- One-liners that combine `find` + `xargs` + `awk`.
+- Quick sanity checks (`uptime && df -h`).
+- Composing CLI tools the agent isn't going to call enough to warrant a dedicated tool spec.
+
+Avoid it for things that *do* have a structured tool: `fs.read`, `fs.list`, `git.commit`, `docker.exec`, etc. Those have predictable JSON shapes, narrower attack surface, and richer result mapping.
+
+### Designing manifests for user vs sudo agents
+
+`RegisterBuiltins` registers `shell.eval` in **both** `ModeUser` and `ModeSudo` because the device_agent — not the registry — decides what is safe. Recommended manifest defaults:
+
+| Agent role | `RequiresApproval` (LLM-facing metadata) | Device manifest |
+|---|---|---|
+| `agent-<host>` (user) | `false` | Auto-approve whitelist + operator approval for anything else. Hardcoded blocklist active. |
+| `agent-<host>-sudo` (sudo) | `true` (forced via `withApprovalRequired`) | **Every** invocation requires explicit operator approval. No auto-approve whitelist. Hardcoded blocklist active. |
+
+The `withApprovalRequired` helper clones the spec returned by `shellEvalSpec()` and flips `RequiresApproval=true` without mutating the source, so `ModeUser` registries that re-register after a `ModeSudo` run still get the unmodified spec. See `tools_builtin.go::RegisterBuiltins` for the special-case wiring.
+
+See also: `apps/device_agent/` (where the blocklist + auto-approve whitelist + approval flow live) and issue 0144 §6.4 for the RBAC design.
+
+## POC limitations (intentional)
+
+These are out of scope for 0144a and tracked in sibling issues:
+
+- **No retry**. A single `Call` failure surfaces immediately. The spec accepts this: tool failures go back to the LLM as a `role='tool'` error message and the LLM decides what to do (issue 0144 §7.1 reglas operativas 2).
+- **No pre-approval cache**. `RequiresApproval` is metadata only; the actual gate lives on the device_agent (0144 §3) and the pre-approvals table (0144f).
+- **No streaming**. Tools are request/response. Long-running commands (`apt-get install` of a 200MB package) block until done or timeout. Streaming for logs is its own future issue.
+- **No exponential backoff**. The Go HTTP client's transport defaults apply (TCP retries on connect, no per-request retry).
+- **No output sanitization**. The Runner formats the result as JSON; sanitization against prompt-injection payloads is 0144g.
+- **No telemetry to `call_monitor`**. The hook for `function_id = capability_<name>_<lang>_<domain>` is part of the agent runtime wiring (0144c) — this package emits no metrics on its own.
+- **No manifest signing on the request side**. The Client envelope matches the 0134 §2.1 wire format but does NOT sign; manifest signing arrives in 0144h.
+
+## Why these specific design choices
+
+- `Args map[string]any` (object) NOT `[]string` (positional). The current `device_agent` POC uses `[]string` for `shell.exec` (see `apps/device_agent/capability.go`). The 0134 protocol and 0144 spec call for object-shaped args because most capabilities (`fs.read`, `git.clone`, `docker.exec`) are not naturally positional. 0144h migrates the device_agent.
+- `ResultMapping` returns `any` instead of `map[string]any`. Some tools (eg the test's `echo` example) collapse their output to a string. The Runner JSON-encodes whatever comes back so the LLM always sees a stable representation.
+- `Capability` is a field on `ToolSpec`, not derived from `Name`. The 1:1 mapping is the common case (`fs.read` → `fs.read`), but `docker.list` → `docker.container.list` and `project.create` (future) compose multiple capabilities, so the indirection pays for itself.
+- Pure/impure split inside one package. `ToolSpec`, schema, mappings, registry are pure data and pure functions. Only `Client.Call` and `Client.Health` do I/O. The runtime composes them; tests substitute the Client.
@@ -0,0 +1,212 @@
+// adapter.go: bridges devicemesh.ToolSpec → tools.Tool so device-mesh tools
+// can ride the same registry + LLM tool-use loop that already handles
+// http/ssh/file/memory tools.
+//
+// The agents_and_robots tool stack is:
+//
+//	tools.Tool { Def: tools.Def{Name, Description, Parameters}, Exec: ToolFunc }
+//	  → tools.Registry.Register / ToLLMSpecs / ExecuteForRoom
+//	    → devagents/llm.go runLLM tool-use loop
+//
+// Device-mesh tools speak a richer language (full JSON-Schema in
+// InputSchema, capability indirection). The adapter compresses this into the
+// flatter tools.Param shape that the LLM-side codec already understands,
+// then routes Exec through ToolRegistry.Call so the schema validator,
+// ArgMapping, capability dispatch and ResultMapping all still run.
+//
+// Pure data + one impure closure: the returned tools.Tool's Exec hits the
+// network via the embedded Client, but everything outside Exec (Def, Param
+// extraction) is a pure transform.
+package devicemesh
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"sort"
+
+	"github.com/enmanuel/agents/tools"
+)
+
+// ToolsForLLM walks the registry and returns one tools.Tool per registered
+// ToolSpec. Names are alpha-sorted for stable prompt-caching on the LLM side.
+//
+// Order matters: the returned slice is what the launcher feeds to
+// tools.Registry.Register, and the LLM sees the tools in registration order
+// when ToLLMSpecs() preserves it (it does — registry.Names is sorted).
+//
+// Returns an empty slice (never nil) when reg has no tools or is nil.
+func ToolsForLLM(reg *ToolRegistry) []tools.Tool {
+	if reg == nil {
+		return []tools.Tool{}
+	}
+	specs := reg.List()
+	out := make([]tools.Tool, 0, len(specs))
+	for _, spec := range specs {
+		out = append(out, AdaptTool(reg, spec))
+	}
+	return out
+}
+
+// AdaptTool wraps a single ToolSpec as a tools.Tool. Useful when callers
+// build a custom subset (ex tests that register one tool and exercise it
+// through the LLM loop). For the common "register all" case use ToolsForLLM.
+func AdaptTool(reg *ToolRegistry, spec ToolSpec) tools.Tool {
+	return tools.Tool{
+		Def: tools.Def{
+			Name:        spec.Name,
+			Description: enrichDescription(spec),
+			Parameters:  paramsFromSchema(spec.InputSchema),
+		},
+		Exec: func(ctx context.Context, args map[string]any) tools.Result {
+			if args == nil {
+				args = map[string]any{}
+			}
+			result, err := reg.Call(ctx, spec.Name, args)
+			if err != nil {
+				// Surface approval / validation / dispatch errors verbatim so
+				// the LLM tool-use loop can render them as tool messages and
+				// give the model a chance to self-correct on the next turn.
+				return tools.Result{Err: err}
+			}
+			return tools.Result{Output: formatToolResult(result)}
+		},
+	}
+}
+
+// enrichDescription appends a one-line marker to the spec description so the
+// LLM (and any human reading logs) can see at a glance that this tool is
+// remote and which capability it maps to. The format is stable and short to
+// avoid bloating the system prompt token budget.
+//
+// Example:
+//
+//	"Execute a command on the remote device. argv ... [device_mesh: shell.exec]"
+//
+// When RequiresApproval is true we also append " (approval required)" so the
+// model knows the call may be queued / rejected.
+func enrichDescription(spec ToolSpec) string {
+	desc := spec.Description
+	suffix := fmt.Sprintf(" [device_mesh: %s]", spec.Capability)
+	if spec.RequiresApproval {
+		suffix += " (approval required)"
+	}
+	return desc + suffix
+}
+
+// paramsFromSchema flattens a top-level JSON-Schema-lite (the shape device
+// mesh ToolSpec.InputSchema uses) into the slice of tools.Param the LLM
+// codec expects. Only the top-level properties are emitted; nested objects
+// get type "object" and the LLM is told to pass them through verbatim.
+//
+// Required fields from the schema's "required" array are reflected onto each
+// Param. Unknown shapes degrade gracefully — we never panic, we just emit
+// what we can. Pure function.
+func paramsFromSchema(schema map[string]any) []tools.Param {
+	if schema == nil {
+		return nil
+	}
+	props, _ := schema["properties"].(map[string]any)
+	if len(props) == 0 {
+		return nil
+	}
+
+	requiredSet := make(map[string]bool)
+	if reqRaw, ok := schema["required"]; ok {
+		switch req := reqRaw.(type) {
+		case []string:
+			for _, n := range req {
+				requiredSet[n] = true
+			}
+		case []any:
+			for _, n := range req {
+				if s, ok := n.(string); ok {
+					requiredSet[s] = true
+				}
+			}
+		}
+	}
+
+	// Sort property names to make the output deterministic — ToLLMSpecs sorts
+	// by tool name but does not sort param order; LLMs are sensitive to
+	// reordering when prompt-caching kicks in.
+	names := make([]string, 0, len(props))
+	for n := range props {
+		names = append(names, n)
+	}
+	sort.Strings(names)
+
+	params := make([]tools.Param, 0, len(names))
+	for _, name := range names {
+		propVal, _ := props[name].(map[string]any)
+		p := tools.Param{
+			Name:     name,
+			Required: requiredSet[name],
+		}
+		if propVal != nil {
+			if t, ok := propVal["type"].(string); ok {
+				p.Type = t
+			}
+			if d, ok := propVal["description"].(string); ok {
+				p.Description = d
+			}
+		}
+		if p.Type == "" {
+			p.Type = "string"
+		}
+		params = append(params, p)
+	}
+	return params
+}
+
+// formatToolResult renders the device_agent's reply as the JSON string that
+// gets shoved into the role='tool' message of the LLM transcript.
+//
+// - nil → ""
+// - string → returned as-is (avoids double-encoding)
+// - everything else → json.Marshal; on marshal failure fall back to a Go
+//   printf so we never drop data on the floor.
+//
+// Note: this mirrors shell/effects/runner.go::formatDeviceMeshResult so
+// ActionKindDeviceMesh and the adapter path produce consistent transcripts.
+func formatToolResult(v any) string {
+	if v == nil {
+		return ""
+	}
+	if s, ok := v.(string); ok {
+		return s
+	}
+	b, err := json.Marshal(v)
+	if err != nil {
+		return fmt.Sprintf("%v", v)
+	}
+	return string(b)
+}
+
+// FilterByAllowed returns a copy of reg containing only tools whose names
+// appear in the allowed set. Empty allowed → reg returned unchanged. Names
+// in `allowed` that do not match any tool are silently skipped (the
+// launcher logs them; this function is pure).
+//
+// The returned registry shares the same Client as the source, so dispatches
+// reach the same device_agent. Re-registering means we keep ArgMapping /
+// ResultMapping intact — no schema or spec recompute on the hot path.
+func FilterByAllowed(reg *ToolRegistry, allowed []string) *ToolRegistry {
+	if reg == nil {
+		return nil
+	}
+	if len(allowed) == 0 {
+		return reg
+	}
+	allowSet := make(map[string]bool, len(allowed))
+	for _, n := range allowed {
+		allowSet[n] = true
+	}
+	out := NewToolRegistry(reg.Client())
+	for _, spec := range reg.List() {
+		if allowSet[spec.Name] {
+			out.Register(spec)
+		}
+	}
+	return out
+}
@@ -0,0 +1,219 @@
+package devicemesh
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+func TestToolsForLLM_EmptyRegistry(t *testing.T) {
+	if got := ToolsForLLM(nil); len(got) != 0 {
+		t.Errorf("nil reg → expected 0 tools, got %d", len(got))
+	}
+	reg := NewToolRegistry(nil)
+	if got := ToolsForLLM(reg); len(got) != 0 {
+		t.Errorf("empty reg → expected 0 tools, got %d", len(got))
+	}
+}
+
+func TestToolsForLLM_PreservesNamesAndDescription(t *testing.T) {
+	reg := NewToolRegistry(NewClient("http://nowhere.invalid"))
+	reg.Register(ToolSpec{
+		Name:        "exec",
+		Capability:  "shell.exec",
+		Description: "Run a command",
+		InputSchema: map[string]any{
+			"type":     "object",
+			"required": []string{"argv"},
+			"properties": map[string]any{
+				"argv": map[string]any{"type": "array", "description": "argument vector"},
+			},
+		},
+	})
+	reg.Register(ToolSpec{
+		Name:             "pkg.install",
+		Capability:       "pkg.install",
+		Description:      "Install a package",
+		RequiresApproval: true,
+	})
+
+	got := ToolsForLLM(reg)
+	if len(got) != 2 {
+		t.Fatalf("expected 2 tools, got %d", len(got))
+	}
+
+	// Alpha-sorted by name
+	if got[0].Def.Name != "exec" || got[1].Def.Name != "pkg.install" {
+		t.Errorf("name order: %v", []string{got[0].Def.Name, got[1].Def.Name})
+	}
+
+	if !strings.Contains(got[0].Def.Description, "device_mesh: shell.exec") {
+		t.Errorf("description missing device_mesh marker: %q", got[0].Def.Description)
+	}
+	if !strings.Contains(got[1].Def.Description, "(approval required)") {
+		t.Errorf("approval-required marker missing: %q", got[1].Def.Description)
+	}
+
+	// Param extraction
+	if len(got[0].Def.Parameters) != 1 || got[0].Def.Parameters[0].Name != "argv" {
+		t.Errorf("expected one param 'argv', got %+v", got[0].Def.Parameters)
+	}
+	if !got[0].Def.Parameters[0].Required {
+		t.Errorf("expected argv to be required")
+	}
+}
+
+func TestAdaptTool_ExecRoutesThroughRegistry(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req CapabilityRequest
+		body, _ := io.ReadAll(r.Body)
+		_ = json.Unmarshal(body, &req)
+		// Echo the args back so we can assert ArgMapping ran.
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{
+			RequestID: req.RequestID,
+			OK:        true,
+			Result:    map[string]any{"got": req.Args},
+		})
+	}))
+	defer srv.Close()
+
+	reg := NewToolRegistry(NewClient(srv.URL))
+	spec := ToolSpec{
+		Name:       "echo",
+		Capability: "x.echo",
+		InputSchema: map[string]any{
+			"type":     "object",
+			"required": []string{"msg"},
+			"properties": map[string]any{
+				"msg": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(in map[string]any) (map[string]any, error) {
+			return map[string]any{"msg_upper": strings.ToUpper(in["msg"].(string))}, nil
+		},
+	}
+	reg.Register(spec)
+	tool := AdaptTool(reg, spec)
+
+	res := tool.Exec(context.Background(), map[string]any{"msg": "hi"})
+	if res.Err != nil {
+		t.Fatalf("exec err: %v", res.Err)
+	}
+	if !strings.Contains(res.Output, "HI") {
+		t.Errorf("expected HI in output, got %q", res.Output)
+	}
+}
+
+func TestAdaptTool_PropagatesValidationError(t *testing.T) {
+	reg := NewToolRegistry(NewClient("http://nowhere.invalid"))
+	spec := ToolSpec{
+		Name:       "needs_int",
+		Capability: "x.y",
+		InputSchema: map[string]any{
+			"type":     "object",
+			"required": []string{"n"},
+			"properties": map[string]any{
+				"n": map[string]any{"type": "integer"},
+			},
+			"additionalProperties": false,
+		},
+	}
+	reg.Register(spec)
+	tool := AdaptTool(reg, spec)
+
+	res := tool.Exec(context.Background(), map[string]any{"n": "not-an-int"})
+	if res.Err == nil {
+		t.Fatalf("expected validation error")
+	}
+	if !strings.Contains(res.Err.Error(), "needs_int") {
+		t.Errorf("error should mention tool name: %v", res.Err)
+	}
+}
+
+func TestFormatToolResult(t *testing.T) {
+	if got := formatToolResult(nil); got != "" {
+		t.Errorf("nil → expected empty, got %q", got)
+	}
+	if got := formatToolResult("plain"); got != "plain" {
+		t.Errorf("string passthrough: %q", got)
+	}
+	if got := formatToolResult(map[string]any{"a": 1}); got != `{"a":1}` {
+		t.Errorf("map encode: %q", got)
+	}
+}
+
+func TestFilterByAllowed(t *testing.T) {
+	reg := NewToolRegistry(NewClient("http://x"))
+	reg.Register(ToolSpec{Name: "a", Capability: "x.a"})
+	reg.Register(ToolSpec{Name: "b", Capability: "x.b"})
+	reg.Register(ToolSpec{Name: "c", Capability: "x.c"})
+
+	// Empty allow-list = passthrough
+	if got := FilterByAllowed(reg, nil); got.Len() != 3 {
+		t.Errorf("nil allowed → expected 3, got %d", got.Len())
+	}
+
+	// Subset
+	filtered := FilterByAllowed(reg, []string{"a", "c", "zzz"}) // zzz is silently dropped
+	if filtered.Len() != 2 {
+		t.Fatalf("expected 2 filtered, got %d", filtered.Len())
+	}
+	names := filtered.Names()
+	if names[0] != "a" || names[1] != "c" {
+		t.Errorf("unexpected names after filter: %v", names)
+	}
+
+	// Same Client shared
+	if filtered.Client() != reg.Client() {
+		t.Errorf("filtered should share Client with source")
+	}
+
+	// Nil source
+	if FilterByAllowed(nil, []string{"a"}) != nil {
+		t.Errorf("nil source → expected nil")
+	}
+}
+
+func TestParamsFromSchema_EdgeCases(t *testing.T) {
+	if got := paramsFromSchema(nil); got != nil {
+		t.Errorf("nil schema → expected nil, got %v", got)
+	}
+	// Missing properties
+	if got := paramsFromSchema(map[string]any{"type": "object"}); got != nil {
+		t.Errorf("no properties → expected nil, got %v", got)
+	}
+	// "required" as []any (json.Unmarshal default)
+	got := paramsFromSchema(map[string]any{
+		"required": []any{"foo"},
+		"properties": map[string]any{
+			"foo": map[string]any{"type": "string"},
+			"bar": map[string]any{"type": "integer"},
+		},
+	})
+	if len(got) != 2 {
+		t.Fatalf("expected 2 params, got %d", len(got))
+	}
+	// Sorted alpha: bar, foo
+	if got[0].Name != "bar" || got[1].Name != "foo" {
+		t.Errorf("expected sorted [bar, foo], got %+v", got)
+	}
+	if got[0].Required {
+		t.Errorf("bar should not be required")
+	}
+	if !got[1].Required {
+		t.Errorf("foo should be required")
+	}
+	// Type defaulting
+	got2 := paramsFromSchema(map[string]any{
+		"properties": map[string]any{
+			"x": map[string]any{},
+		},
+	})
+	if len(got2) != 1 || got2[0].Type != "string" {
+		t.Errorf("expected type default 'string', got %+v", got2)
+	}
+}
@@ -0,0 +1,259 @@
+// Package devicemesh provides a Go HTTP client and tool registry for invoking
+// capabilities exposed by a remote device_agent over the WireGuard mesh.
+//
+// Architecture: the LLM agent runs in the VPS (agents_and_robots). It needs to
+// execute capabilities on a remote PC (home-wsl, aurgi-pc, ...) reached via
+// mesh WG. The remote PC runs device_agent which exposes POST /capability.
+// This package is the "right arm" between the LLM (which only sees a tool
+// registry) and the device (which only sees capability envelopes).
+//
+// Pure/impure split: the registry, tool specs, schema validation, and arg
+// mappings are pure (no I/O). Client.Call is impure (HTTP). Both live in this
+// package to keep the surface area small, but Call is the only function that
+// touches the network.
+package devicemesh
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"encoding/base64"
+	"encoding/binary"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+)
+
+// DefaultTimeout is applied when Client.Timeout is zero.
+const DefaultTimeout = 30 * time.Second
+
+// CapabilityRequest is the JSON envelope sent to POST /capability of the
+// remote device_agent. Matches the protocol defined in issue 0134 §2.1.
+//
+// `Args` is map[string]any (NOT []string like the current POC device_agent).
+// This matches the spec 0134 which uses object-shaped args. The device_agent
+// will migrate to this shape in issue 0144h alongside manifest signing.
+type CapabilityRequest struct {
+	RequestID  string         `json:"request_id"`
+	Capability string         `json:"capability"`
+	Args       map[string]any `json:"args"`
+	Nonce      string         `json:"nonce"`
+	Timestamp  int64          `json:"ts"`
+}
+
+// CapabilityResponse is the JSON envelope returned by the device_agent.
+// Result is decoded as `map[string]any` so tool mappings can normalize it.
+type CapabilityResponse struct {
+	RequestID  string         `json:"request_id"`
+	OK         bool           `json:"ok"`
+	Result     map[string]any `json:"result,omitempty"`
+	Error      string         `json:"error,omitempty"`
+	DurationMs int64          `json:"duration_ms"`
+	AuditHash  string         `json:"audit_hash,omitempty"`
+}
+
+// Client is an HTTP client to a single device_agent endpoint.
+//
+// One Client per remote device. The agent runtime constructs it from
+// cfg.DeviceMesh.DeviceAgentURL at startup and injects it into the tool
+// registry.
+type Client struct {
+	BaseURL    string
+	Timeout    time.Duration
+	HTTPClient *http.Client // optional override, useful for tests
+}
+
+// NewClient builds a Client with sensible defaults. BaseURL is used as-is;
+// callers are responsible for including scheme and port (ex
+// "http://10.42.0.10:7474").
+func NewClient(baseURL string) *Client {
+	return &Client{
+		BaseURL: baseURL,
+		Timeout: DefaultTimeout,
+	}
+}
+
+// httpClient returns the effective *http.Client. If the caller injected one
+// (HTTPClient != nil), use it as-is (tests rely on this). Otherwise build a
+// fresh one with Timeout. Defaults to DefaultTimeout when Timeout is zero.
+func (c *Client) httpClient() *http.Client {
+	if c.HTTPClient != nil {
+		return c.HTTPClient
+	}
+	t := c.Timeout
+	if t == 0 {
+		t = DefaultTimeout
+	}
+	return &http.Client{Timeout: t}
+}
+
+// Call sends a CapabilityRequest envelope to POST {BaseURL}/capability and
+// decodes the response.
+//
+// Side-effects:
+//   - Generates request_id (if empty) as a 12-byte random hex (24 chars).
+//   - Generates nonce (if empty) as 16 random bytes base64.
+//   - Sets ts to time.Now().Unix() if zero.
+//   - Network call.
+//
+// Errors:
+//   - Returns a non-nil error for transport failures, non-2xx HTTP statuses,
+//     or unparseable JSON.
+//   - A successful HTTP call with `ok=false` is NOT an error from Call's
+//     perspective — it returns the response with Error populated and lets the
+//     caller decide. This mirrors the spec: a failed capability is still a
+//     valid envelope.
+func (c *Client) Call(ctx context.Context, req CapabilityRequest) (*CapabilityResponse, error) {
+	if c == nil {
+		return nil, fmt.Errorf("devicemesh.Client: nil receiver")
+	}
+	if c.BaseURL == "" {
+		return nil, fmt.Errorf("devicemesh.Client: BaseURL is empty")
+	}
+	if req.Capability == "" {
+		return nil, fmt.Errorf("devicemesh.Call: capability is required")
+	}
+
+	if req.RequestID == "" {
+		id, err := randomRequestID()
+		if err != nil {
+			return nil, fmt.Errorf("generate request_id: %w", err)
+		}
+		req.RequestID = id
+	}
+	if req.Nonce == "" {
+		nonce, err := randomNonce()
+		if err != nil {
+			return nil, fmt.Errorf("generate nonce: %w", err)
+		}
+		req.Nonce = nonce
+	}
+	if req.Timestamp == 0 {
+		req.Timestamp = time.Now().Unix()
+	}
+	if req.Args == nil {
+		req.Args = map[string]any{}
+	}
+
+	body, err := json.Marshal(req)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	url := c.BaseURL + "/capability"
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("build http request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	httpReq.Header.Set("Accept", "application/json")
+
+	resp, err := c.httpClient().Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("http call: %w", err)
+	}
+	defer resp.Body.Close()
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("read response body: %w", err)
+	}
+
+	// The device_agent returns 500 with a CapabilityResponse body when the
+	// capability itself failed (see capability.go::capabilityHandler). We try
+	// to decode the body regardless of status — if it parses as a
+	// CapabilityResponse, return it (OK=false). Only when decoding fails do
+	// we surface an HTTP-level error.
+	var out CapabilityResponse
+	if err := json.Unmarshal(respBody, &out); err != nil {
+		return nil, fmt.Errorf("decode response (status=%d, body=%q): %w",
+			resp.StatusCode, truncate(string(respBody), 200), err)
+	}
+
+	// If the body didn't include any recognizable field and status is non-2xx,
+	// surface the HTTP error.
+	if resp.StatusCode >= 400 && out.RequestID == "" && out.Error == "" {
+		return nil, fmt.Errorf("http %d: %s", resp.StatusCode,
+			truncate(string(respBody), 200))
+	}
+
+	return &out, nil
+}
+
+// Health pings the device_agent's /health endpoint and returns the device
+// identity. Returns empty strings if the endpoint does not provide them.
+//
+// Expected response shape (loose):
+//
+//	{"device_id":"home-wsl","version":"0.1.0","ok":true}
+func (c *Client) Health(ctx context.Context) (deviceID, version string, err error) {
+	if c == nil {
+		return "", "", fmt.Errorf("devicemesh.Client: nil receiver")
+	}
+	if c.BaseURL == "" {
+		return "", "", fmt.Errorf("devicemesh.Client: BaseURL is empty")
+	}
+
+	url := c.BaseURL + "/health"
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return "", "", fmt.Errorf("build http request: %w", err)
+	}
+	resp, err := c.httpClient().Do(httpReq)
+	if err != nil {
+		return "", "", fmt.Errorf("http call: %w", err)
+	}
+	defer resp.Body.Close()
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", "", fmt.Errorf("read response body: %w", err)
+	}
+	if resp.StatusCode >= 400 {
+		return "", "", fmt.Errorf("health http %d: %s", resp.StatusCode,
+			truncate(string(respBody), 200))
+	}
+	var out struct {
+		DeviceID string `json:"device_id"`
+		Version  string `json:"version"`
+	}
+	if err := json.Unmarshal(respBody, &out); err != nil {
+		return "", "", fmt.Errorf("decode health body: %w", err)
+	}
+	return out.DeviceID, out.Version, nil
+}
+
+// randomRequestID returns a 24-char hex string seeded from crypto/rand.
+// Format is deliberately compact and URL-safe so it can appear in logs and
+// audit chains without escaping.
+func randomRequestID() (string, error) {
+	var buf [12]byte
+	// Stamp the high 4 bytes with seconds-since-epoch for rough sortability;
+	// the lower 8 bytes are random. This is not a ULID but plays the same role.
+	binary.BigEndian.PutUint32(buf[:4], uint32(time.Now().Unix()))
+	if _, err := rand.Read(buf[4:]); err != nil {
+		return "", err
+	}
+	return "req_" + hex.EncodeToString(buf[:]), nil
+}
+
+// randomNonce returns 16 random bytes base64-encoded (no padding) suitable
+// for the device_agent's nonce dedupe table.
+func randomNonce() (string, error) {
+	var buf [16]byte
+	if _, err := rand.Read(buf[:]); err != nil {
+		return "", err
+	}
+	return base64.RawStdEncoding.EncodeToString(buf[:]), nil
+}
+
+// truncate clips a string for error messages so giant payloads don't pollute logs.
+func truncate(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return s[:n] + "..."
+}
@@ -0,0 +1,235 @@
+package devicemesh
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestClient_Call_RoundTrip(t *testing.T) {
+	var received CapabilityRequest
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			t.Errorf("expected POST, got %s", r.Method)
+		}
+		if r.URL.Path != "/capability" {
+			t.Errorf("expected /capability path, got %s", r.URL.Path)
+		}
+		body, _ := io.ReadAll(r.Body)
+		if err := json.Unmarshal(body, &received); err != nil {
+			t.Fatalf("decode body: %v", err)
+		}
+		w.Header().Set("Content-Type", "application/json")
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{
+			RequestID:  received.RequestID,
+			OK:         true,
+			Result:     map[string]any{"echo": "ok"},
+			DurationMs: 5,
+			AuditHash:  "abc123",
+		})
+	}))
+	defer srv.Close()
+
+	c := NewClient(srv.URL)
+	resp, err := c.Call(context.Background(), CapabilityRequest{
+		Capability: "shell.exec",
+		Args:       map[string]any{"argv": []string{"ls"}},
+	})
+	if err != nil {
+		t.Fatalf("call: %v", err)
+	}
+	if !resp.OK {
+		t.Fatalf("expected ok=true, got %+v", resp)
+	}
+	if resp.AuditHash != "abc123" {
+		t.Errorf("audit hash mismatch: %q", resp.AuditHash)
+	}
+	if received.RequestID == "" {
+		t.Errorf("expected client to populate request_id")
+	}
+	if !strings.HasPrefix(received.RequestID, "req_") {
+		t.Errorf("request_id should have req_ prefix, got %q", received.RequestID)
+	}
+	if received.Nonce == "" {
+		t.Errorf("expected client to populate nonce")
+	}
+	if received.Timestamp == 0 {
+		t.Errorf("expected client to populate ts")
+	}
+	if received.Capability != "shell.exec" {
+		t.Errorf("capability mismatch: %q", received.Capability)
+	}
+}
+
+func TestClient_Call_PreservesProvidedIDs(t *testing.T) {
+	var received CapabilityRequest
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		_ = json.Unmarshal(body, &received)
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{RequestID: received.RequestID, OK: true})
+	}))
+	defer srv.Close()
+
+	c := NewClient(srv.URL)
+	_, err := c.Call(context.Background(), CapabilityRequest{
+		RequestID:  "req_custom_123",
+		Capability: "fs.read",
+		Args:       map[string]any{"path": "/tmp/x"},
+		Nonce:      "fixed_nonce",
+		Timestamp:  1234567890,
+	})
+	if err != nil {
+		t.Fatalf("call: %v", err)
+	}
+	if received.RequestID != "req_custom_123" {
+		t.Errorf("request_id overwritten: %q", received.RequestID)
+	}
+	if received.Nonce != "fixed_nonce" {
+		t.Errorf("nonce overwritten: %q", received.Nonce)
+	}
+	if received.Timestamp != 1234567890 {
+		t.Errorf("ts overwritten: %d", received.Timestamp)
+	}
+}
+
+func TestClient_Call_OKFalseSurfacedNotError(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Device returns 500 with body; mimics device_agent capability handler.
+		w.WriteHeader(http.StatusInternalServerError)
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{
+			RequestID: "req_x",
+			OK:        false,
+			Error:     "binary not whitelisted",
+		})
+	}))
+	defer srv.Close()
+
+	c := NewClient(srv.URL)
+	resp, err := c.Call(context.Background(), CapabilityRequest{Capability: "shell.exec"})
+	if err != nil {
+		t.Fatalf("expected nil error (body parseable), got: %v", err)
+	}
+	if resp.OK {
+		t.Errorf("expected ok=false")
+	}
+	if resp.Error == "" {
+		t.Errorf("expected error message populated")
+	}
+}
+
+func TestClient_Call_HTTPErrorWithUnparseableBody(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadGateway)
+		_, _ = w.Write([]byte("nginx html garbage"))
+	}))
+	defer srv.Close()
+
+	c := NewClient(srv.URL)
+	_, err := c.Call(context.Background(), CapabilityRequest{Capability: "shell.exec"})
+	if err == nil {
+		t.Fatalf("expected error for unparseable 502 body")
+	}
+}
+
+func TestClient_Call_ContextCancel(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(500 * time.Millisecond)
+	}))
+	defer srv.Close()
+
+	c := NewClient(srv.URL)
+	ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
+	defer cancel()
+	_, err := c.Call(ctx, CapabilityRequest{Capability: "shell.exec"})
+	if err == nil {
+		t.Fatalf("expected timeout error, got nil")
+	}
+	if !errors.Is(err, context.DeadlineExceeded) && !strings.Contains(err.Error(), "deadline") && !strings.Contains(err.Error(), "context") {
+		t.Errorf("expected context-related error, got: %v", err)
+	}
+}
+
+func TestClient_Call_RejectsEmptyCapability(t *testing.T) {
+	c := NewClient("http://nowhere.invalid")
+	_, err := c.Call(context.Background(), CapabilityRequest{})
+	if err == nil {
+		t.Fatalf("expected error for empty capability")
+	}
+	if !strings.Contains(err.Error(), "capability") {
+		t.Errorf("expected capability-related error, got: %v", err)
+	}
+}
+
+func TestClient_Call_RejectsEmptyBaseURL(t *testing.T) {
+	c := &Client{}
+	_, err := c.Call(context.Background(), CapabilityRequest{Capability: "shell.exec"})
+	if err == nil {
+		t.Fatalf("expected error for empty BaseURL")
+	}
+}
+
+func TestClient_Health(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/health" {
+			t.Errorf("expected /health, got %s", r.URL.Path)
+		}
+		_ = json.NewEncoder(w).Encode(map[string]string{
+			"device_id": "home-wsl",
+			"version":   "0.2.0",
+		})
+	}))
+	defer srv.Close()
+
+	c := NewClient(srv.URL)
+	id, v, err := c.Health(context.Background())
+	if err != nil {
+		t.Fatalf("health: %v", err)
+	}
+	if id != "home-wsl" {
+		t.Errorf("device_id mismatch: %q", id)
+	}
+	if v != "0.2.0" {
+		t.Errorf("version mismatch: %q", v)
+	}
+}
+
+func TestClient_Call_NoRetry(t *testing.T) {
+	// Confirm that a single failure does NOT trigger a retry — POC behavior
+	// per the README. The handler counts hits.
+	hits := 0
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		hits++
+		w.WriteHeader(http.StatusBadGateway)
+		_, _ = w.Write([]byte("oops"))
+	}))
+	defer srv.Close()
+
+	c := NewClient(srv.URL)
+	_, _ = c.Call(context.Background(), CapabilityRequest{Capability: "shell.exec"})
+	if hits != 1 {
+		t.Errorf("expected exactly 1 hit (no retry), got %d", hits)
+	}
+}
+
+func TestRandomRequestID_UniqueAndPrefixed(t *testing.T) {
+	a, err := randomRequestID()
+	if err != nil {
+		t.Fatalf("randomRequestID: %v", err)
+	}
+	b, err := randomRequestID()
+	if err != nil {
+		t.Fatalf("randomRequestID: %v", err)
+	}
+	if a == b {
+		t.Errorf("collision: %q == %q", a, b)
+	}
+	if !strings.HasPrefix(a, "req_") {
+		t.Errorf("missing req_ prefix: %q", a)
+	}
+}
@@ -0,0 +1,147 @@
+package devicemesh
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+func TestToolRegistry_RegisterListGet(t *testing.T) {
+	reg := NewToolRegistry(nil)
+	reg.Register(ToolSpec{Name: "a", Capability: "x.a"})
+	reg.Register(ToolSpec{Name: "b", Capability: "x.b"})
+
+	got, ok := reg.Get("a")
+	if !ok {
+		t.Fatalf("Get(a) not found")
+	}
+	if got.Capability != "x.a" {
+		t.Errorf("capability: %q", got.Capability)
+	}
+
+	names := reg.Names()
+	if len(names) != 2 || names[0] != "a" || names[1] != "b" {
+		t.Errorf("Names sort: %v", names)
+	}
+}
+
+func TestToolRegistry_Call_UnknownTool(t *testing.T) {
+	reg := NewToolRegistry(NewClient("http://nowhere.invalid"))
+	_, err := reg.Call(context.Background(), "no.such.tool", nil)
+	if err == nil {
+		t.Fatalf("expected error for unknown tool")
+	}
+	if !strings.Contains(err.Error(), "unknown tool") {
+		t.Errorf("error message: %v", err)
+	}
+}
+
+func TestToolRegistry_Call_NilClient(t *testing.T) {
+	reg := NewToolRegistry(nil)
+	reg.Register(ToolSpec{Name: "x", Capability: "x.y"})
+	_, err := reg.Call(context.Background(), "x", nil)
+	if err == nil {
+		t.Fatalf("expected error when client is nil")
+	}
+}
+
+func TestToolRegistry_Call_InvalidInput(t *testing.T) {
+	reg := NewToolRegistry(NewClient("http://nowhere.invalid"))
+	reg.Register(ToolSpec{
+		Name:       "needs_string",
+		Capability: "x.y",
+		InputSchema: map[string]any{
+			"type":     "object",
+			"required": []string{"foo"},
+			"properties": map[string]any{
+				"foo": map[string]any{"type": "string"},
+			},
+			"additionalProperties": false,
+		},
+	})
+
+	// Missing required
+	_, err := reg.Call(context.Background(), "needs_string", map[string]any{})
+	if err == nil {
+		t.Errorf("expected error for missing required field")
+	}
+
+	// Wrong type
+	_, err = reg.Call(context.Background(), "needs_string", map[string]any{"foo": 42})
+	if err == nil {
+		t.Errorf("expected error for wrong type")
+	}
+
+	// Extra field
+	_, err = reg.Call(context.Background(), "needs_string", map[string]any{"foo": "bar", "extra": 1})
+	if err == nil {
+		t.Errorf("expected error for additional property")
+	}
+}
+
+func TestToolRegistry_Call_HappyPath(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req CapabilityRequest
+		body, _ := io.ReadAll(r.Body)
+		_ = json.Unmarshal(body, &req)
+		// Echo back the args under "received".
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{
+			RequestID: req.RequestID,
+			OK:        true,
+			Result:    map[string]any{"received": req.Args},
+		})
+	}))
+	defer srv.Close()
+
+	reg := NewToolRegistry(NewClient(srv.URL))
+	reg.Register(ToolSpec{
+		Name:       "echo",
+		Capability: "x.echo",
+		InputSchema: map[string]any{
+			"type":     "object",
+			"required": []string{"msg"},
+			"properties": map[string]any{
+				"msg": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(in map[string]any) (map[string]any, error) {
+			return map[string]any{"upper_msg": strings.ToUpper(in["msg"].(string))}, nil
+		},
+		ResultMapping: func(r map[string]any) (any, error) {
+			received := r["received"].(map[string]any)
+			return received["upper_msg"], nil
+		},
+	})
+
+	out, err := reg.Call(context.Background(), "echo", map[string]any{"msg": "hola"})
+	if err != nil {
+		t.Fatalf("call: %v", err)
+	}
+	if out != "HOLA" {
+		t.Errorf("expected HOLA, got %v", out)
+	}
+}
+
+func TestToolRegistry_Call_DeviceErrorPropagates(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{
+			OK:    false,
+			Error: "binary not whitelisted",
+		})
+	}))
+	defer srv.Close()
+
+	reg := NewToolRegistry(NewClient(srv.URL))
+	reg.Register(ToolSpec{Name: "exec", Capability: "shell.exec"})
+	_, err := reg.Call(context.Background(), "exec", nil)
+	if err == nil {
+		t.Fatalf("expected device-side error to propagate")
+	}
+	if !strings.Contains(err.Error(), "binary not whitelisted") {
+		t.Errorf("error message lost: %v", err)
+	}
+}
@@ -0,0 +1,244 @@
+package devicemesh
+
+import (
+	"fmt"
+	"sort"
+)
+
+// schema.go: minimal JSON-Schema-like validator. We do NOT depend on a full
+// JSON Schema implementation — the surface we use is small and stable:
+//
+//   - type: "object" | "string" | "number" | "integer" | "boolean" | "array"
+//   - required: []string (names of fields that must be present and non-nil)
+//   - properties: map[string]<sub-schema>
+//   - items: <sub-schema> for arrays
+//   - enum: []any — allowed scalar values
+//   - additionalProperties: false (strict; default true)
+//
+// This is enough to catch LLM-induced typos (extra fields, wrong types) and
+// gives the runtime a place to grow if we need oneOf/pattern later.
+
+// ValidateInput checks the spec.InputSchema against the provided input map.
+// Returns nil on success, a descriptive error otherwise. The error path is
+// surfaced back to the LLM so it can self-correct.
+func ValidateInput(spec ToolSpec, input map[string]any) error {
+	if spec.InputSchema == nil {
+		// No schema means "anything goes". Tools without a schema are rare
+		// (mostly internal ones like memory.recall in 0144d).
+		return nil
+	}
+	return validateValue("input", input, spec.InputSchema)
+}
+
+func validateValue(path string, value any, schema map[string]any) error {
+	typ, _ := schema["type"].(string)
+	if typ == "" {
+		// No type declared: accept as-is.
+		return nil
+	}
+
+	// nil handling: only allowed if the field is not required (handled by parent).
+	if value == nil {
+		return fmt.Errorf("%s: expected %s, got null", path, typ)
+	}
+
+	switch typ {
+	case "object":
+		obj, ok := value.(map[string]any)
+		if !ok {
+			return fmt.Errorf("%s: expected object, got %T", path, value)
+		}
+		return validateObject(path, obj, schema)
+	case "array":
+		arr, ok := coerceToAnySlice(value)
+		if !ok {
+			return fmt.Errorf("%s: expected array, got %T", path, value)
+		}
+		return validateArray(path, arr, schema)
+	case "string":
+		if _, ok := value.(string); !ok {
+			return fmt.Errorf("%s: expected string, got %T", path, value)
+		}
+		return validateEnum(path, value, schema)
+	case "integer":
+		if !isInteger(value) {
+			return fmt.Errorf("%s: expected integer, got %T (%v)", path, value, value)
+		}
+		return validateEnum(path, value, schema)
+	case "number":
+		if !isNumber(value) {
+			return fmt.Errorf("%s: expected number, got %T", path, value)
+		}
+		return validateEnum(path, value, schema)
+	case "boolean":
+		if _, ok := value.(bool); !ok {
+			return fmt.Errorf("%s: expected boolean, got %T", path, value)
+		}
+	default:
+		return fmt.Errorf("%s: unknown schema type %q", path, typ)
+	}
+	return nil
+}
+
+func validateObject(path string, obj map[string]any, schema map[string]any) error {
+	// Required fields must be present and non-nil.
+	if reqRaw, ok := schema["required"]; ok {
+		req, _ := asStringSlice(reqRaw)
+		// Deterministic ordering of errors helps tests and LLM correction.
+		sort.Strings(req)
+		for _, name := range req {
+			v, present := obj[name]
+			if !present || v == nil {
+				return fmt.Errorf("%s.%s: required field missing", path, name)
+			}
+		}
+	}
+	props, _ := schema["properties"].(map[string]any)
+
+	// Strict additionalProperties: reject unknown keys when explicitly false.
+	additional := true
+	if ap, ok := schema["additionalProperties"]; ok {
+		if b, isBool := ap.(bool); isBool {
+			additional = b
+		}
+	}
+	if !additional && props != nil {
+		keys := make([]string, 0, len(obj))
+		for k := range obj {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		for _, k := range keys {
+			if _, known := props[k]; !known {
+				return fmt.Errorf("%s.%s: unknown field (additionalProperties=false)", path, k)
+			}
+		}
+	}
+
+	if props == nil {
+		return nil
+	}
+	// Walk known properties.
+	names := make([]string, 0, len(props))
+	for k := range props {
+		names = append(names, k)
+	}
+	sort.Strings(names)
+	for _, name := range names {
+		sub, _ := props[name].(map[string]any)
+		if sub == nil {
+			continue
+		}
+		v, present := obj[name]
+		if !present {
+			continue // absent + not required ⇒ ok
+		}
+		if v == nil {
+			continue // nil + not required ⇒ ok
+		}
+		if err := validateValue(path+"."+name, v, sub); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func validateArray(path string, arr []any, schema map[string]any) error {
+	itemSchema, _ := schema["items"].(map[string]any)
+	if itemSchema == nil {
+		return nil
+	}
+	for i, v := range arr {
+		if err := validateValue(fmt.Sprintf("%s[%d]", path, i), v, itemSchema); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func validateEnum(path string, value any, schema map[string]any) error {
+	enumRaw, ok := schema["enum"]
+	if !ok {
+		return nil
+	}
+	enum, _ := enumRaw.([]any)
+	if len(enum) == 0 {
+		return nil
+	}
+	for _, allowed := range enum {
+		if fmt.Sprint(allowed) == fmt.Sprint(value) {
+			return nil
+		}
+	}
+	return fmt.Errorf("%s: value %v not in enum %v", path, value, enum)
+}
+
+func isInteger(v any) bool {
+	switch n := v.(type) {
+	case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64:
+		return true
+	case float32:
+		return float64(n) == float64(int64(n))
+	case float64:
+		return n == float64(int64(n))
+	}
+	return false
+}
+
+func isNumber(v any) bool {
+	switch v.(type) {
+	case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64:
+		return true
+	}
+	return false
+}
+
+// coerceToAnySlice accepts []any or any typed slice ([]string, []int, ...)
+// and returns it as []any. This keeps the schema validator forgiving when
+// callers pass native Go slices directly (common in tests and ArgMapping
+// outputs) instead of JSON-decoded []any.
+func coerceToAnySlice(v any) ([]any, bool) {
+	switch s := v.(type) {
+	case []any:
+		return s, true
+	case []string:
+		out := make([]any, len(s))
+		for i, e := range s {
+			out[i] = e
+		}
+		return out, true
+	case []int:
+		out := make([]any, len(s))
+		for i, e := range s {
+			out[i] = e
+		}
+		return out, true
+	case []float64:
+		out := make([]any, len(s))
+		for i, e := range s {
+			out[i] = e
+		}
+		return out, true
+	}
+	return nil, false
+}
+
+func asStringSlice(v any) ([]string, bool) {
+	switch s := v.(type) {
+	case []string:
+		out := make([]string, len(s))
+		copy(out, s)
+		return out, true
+	case []any:
+		out := make([]string, 0, len(s))
+		for _, e := range s {
+			str, ok := e.(string)
+			if !ok {
+				return nil, false
+			}
+			out = append(out, str)
+		}
+		return out, true
+	}
+	return nil, false
+}
@@ -0,0 +1,775 @@
+package devicemesh
+
+import (
+	"fmt"
+	"strings"
+)
+
+// tools_builtin.go: declarative catalog of the standard tools an LLM agent
+// gets when its config enables device_mesh. The list mirrors issue 0144 §2.1.
+//
+// Each ToolSpec is pure data: descriptions for the LLM, JSON-Schema-lite for
+// validation, and pure ArgMapping / ResultMapping functions. No I/O.
+//
+// Mode "user" registers the tools allowed for the unprivileged agent (uid
+// lucas in home-wsl). Mode "sudo" registers tools whose underlying
+// capability requires_approval: true on the device_agent side. The
+// separation is physical, not just RBAC — the user-agent process literally
+// never sees pkg.install in its registry, so prompt injection cannot
+// surface it (issue 0144 §1.2).
+
+// RegistrationMode controls which subset of the built-in catalog is
+// registered. "user" gets non-approval tools. "sudo" gets only the approval
+// gated tools. "all" gets everything (mainly for tests and tooling).
+type RegistrationMode string
+
+const (
+	ModeUser RegistrationMode = "user"
+	ModeSudo RegistrationMode = "sudo"
+	ModeAll  RegistrationMode = "all"
+)
+
+// RegisterBuiltins registers the standard catalog of devicemesh tools into
+// the given registry, filtered by the requested mode.
+//
+// Returns the list of registered tool names so callers can log it.
+//
+// shell.eval is a special case: it is always registered in BOTH ModeUser and
+// ModeSudo, but the sudo variant is rewritten via withApprovalRequired so the
+// LLM sees RequiresApproval=true. The real guardrail (blocklist +
+// auto-approve patterns + operator approval) lives in the device_agent — the
+// flag here is metadata that drives RBAC at the device_mesh edge.
+func RegisterBuiltins(reg *ToolRegistry, mode RegistrationMode) []string {
+	if reg == nil {
+		return nil
+	}
+	all := builtinSpecs()
+	registered := make([]string, 0, len(all))
+	for _, spec := range all {
+		switch mode {
+		case ModeUser:
+			if spec.RequiresApproval {
+				continue
+			}
+		case ModeSudo:
+			// In sudo mode, force RequiresApproval=true on shell.eval so the
+			// metadata exposed to the LLM matches the device manifest. Other
+			// non-approval tools are skipped (sudo agents only see approval
+			// gated tools).
+			if spec.Name == "shell.eval" {
+				spec = withApprovalRequired(spec)
+			} else if !spec.RequiresApproval {
+				continue
+			}
+		case ModeAll:
+			// fallthrough — accept everything
+		default:
+			// Unknown mode: behave like "user" (safer default).
+			if spec.RequiresApproval {
+				continue
+			}
+		}
+		reg.Register(spec)
+		registered = append(registered, spec.Name)
+	}
+	return registered
+}
+
+// withApprovalRequired returns a clone of spec with RequiresApproval set to
+// true. Used to upgrade a tool that defaults to "no approval" (user scope)
+// into its sudo equivalent without mutating the original spec returned by
+// builtinSpecs(). Pure function — no side effects.
+func withApprovalRequired(spec ToolSpec) ToolSpec {
+	spec.RequiresApproval = true
+	return spec
+}
+
+// builtinSpecs returns the full catalog (both user and sudo). The split into
+// scopes happens in RegisterBuiltins. Defined as a function so future
+// builders can compose this with host-specific overrides.
+func builtinSpecs() []ToolSpec {
+	return []ToolSpec{
+		execSpec(),
+		shellEvalSpec(),
+		fsReadSpec(),
+		fsWriteSpec(),
+		fsListSpec(),
+		fsStatSpec(),
+		gitCloneSpec(),
+		gitCommitSpec(),
+		gitPushSpec(),
+		pkgInstallSpec(),
+		pkgSearchSpec(),
+		procListSpec(),
+		procKillSpec(),
+		dockerListSpec(),
+		dockerExecSpec(),
+		dockerLogsSpec(),
+	}
+}
+
+// ----- exec -----
+
+func execSpec() ToolSpec {
+	return ToolSpec{
+		Name: "exec",
+		Description: "Execute a command on the remote device. argv is parsed as exec.Command (NO shell). " +
+			"Returns stdout, stderr, exit_code, duration_ms. Use this for: listing files, running scripts, " +
+			"invoking CLIs already installed. Do NOT use this for shell redirection, pipes, or globs.",
+		Capability: "shell.exec",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"argv"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"argv": map[string]any{
+					"type":  "array",
+					"items": map[string]any{"type": "string"},
+				},
+				"cwd":       map[string]any{"type": "string"},
+				"timeout_s": map[string]any{"type": "integer"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			argv, err := requireStringSlice(input, "argv")
+			if err != nil {
+				return nil, err
+			}
+			if len(argv) == 0 {
+				return nil, fmt.Errorf("argv must not be empty")
+			}
+			out := map[string]any{"argv": argv}
+			if cwd, ok := input["cwd"].(string); ok && cwd != "" {
+				out["cwd"] = cwd
+			}
+			if timeout, ok := input["timeout_s"]; ok {
+				out["timeout_s"] = toInt(timeout, 30)
+			}
+			return out, nil
+		},
+		ResultMapping: func(result map[string]any) (any, error) {
+			// Pass through but normalize: ensure exit_code is int.
+			if result == nil {
+				return map[string]any{
+					"stdout":    "",
+					"stderr":    "",
+					"exit_code": 0,
+				}, nil
+			}
+			out := map[string]any{
+				"stdout":    getString(result, "stdout"),
+				"stderr":    getString(result, "stderr"),
+				"exit_code": toInt(result["exit_code"], 0),
+			}
+			if dur, ok := result["duration_ms"]; ok {
+				out["duration_ms"] = toInt(dur, 0)
+			}
+			return out, nil
+		},
+	}
+}
+
+// ----- shell.eval -----
+
+// shellEvalSpec is the "powerful tool": a free-form shell command evaluator.
+// Unlike exec (positional argv, no shell), shell.eval accepts a single string
+// passed verbatim to bash or powershell on the device.
+//
+// Its existence is justified because no structured tool can cover every legal
+// shell idiom (pipes, redirects, here-docs, $() expansions, complex globs).
+// Without it the LLM resorts to multi-step exec chains and loses fidelity.
+//
+// Safety: this tool's RequiresApproval default is false in ModeUser. The real
+// guardrails live device-side:
+//
+//   - Hardcoded blocklist (rm -rf /, dd, mkfs, fork-bombs, shutdown, ...)
+//     always rejects regardless of agent or operator.
+//   - Auto-approve whitelist ('^git ', '^ls ', '^cat ', ...) bypasses the
+//     operator and executes directly.
+//   - Anything else returns approval_status='queued' and waits for the
+//     operator to confirm in #operator-approvals.
+//
+// For sudo agents, RegisterBuiltins promotes RequiresApproval=true via
+// withApprovalRequired so the LLM-facing metadata matches the device manifest.
+func shellEvalSpec() ToolSpec {
+	return ToolSpec{
+		Name: "shell.eval",
+		Description: "Evaluate a free-form shell command on the device. Auto-detects bash (Linux/WSL) or powershell (Windows). " +
+			"Hardcoded safety blocklist applies (rm -rf /, dd, mkfs, fork-bombs, shutdown, etc.) — these always reject. " +
+			"Auto-approve patterns ('^git ', '^ls ', '^cat ', etc.) execute directly. Other commands may require operator " +
+			"approval (returns approval_status='queued' and the operator must confirm in Element).",
+		Capability: "shell.eval",
+		// RequiresApproval is false here so user mode picks it up. Sudo mode
+		// rewrites this via withApprovalRequired in RegisterBuiltins.
+		RequiresApproval: false,
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"cmd"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"cmd": map[string]any{
+					"type":        "string",
+					"description": "Shell command string. Bash or PowerShell syntax depending on device OS.",
+					"minLength":   1,
+				},
+				"shell": map[string]any{
+					"type":        "string",
+					"enum":        []any{"auto", "bash", "powershell"},
+					"description": "Force shell. 'auto' (default) picks by device OS.",
+				},
+				"cwd": map[string]any{
+					"type":        "string",
+					"description": "Optional absolute path to run from.",
+				},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			cmd, err := requireString(input, "cmd")
+			if err != nil {
+				return nil, err
+			}
+			if cmd == "" {
+				return nil, fmt.Errorf("cmd must not be empty")
+			}
+			out := map[string]any{"cmd": cmd}
+			if s, ok := input["shell"].(string); ok && s != "" {
+				out["shell"] = s
+			}
+			if c, ok := input["cwd"].(string); ok && c != "" {
+				out["cwd"] = c
+			}
+			return out, nil
+		},
+		ResultMapping: func(result map[string]any) (any, error) {
+			// Pass result through — the LLM sees fields like stdout, stderr,
+			// exit_code, approval_status, cmd_executed, truncated, duration_ms
+			// as the device_agent returns them. No normalization here because
+			// the device contract is richer than exec (approval_status etc.)
+			// and we do not want to drop fields the device may add later.
+			if result == nil {
+				return map[string]any{}, nil
+			}
+			return result, nil
+		},
+	}
+}
+
+// ----- fs.read -----
+
+func fsReadSpec() ToolSpec {
+	return ToolSpec{
+		Name: "fs.read",
+		Description: "Read a file on the remote device. Returns content_b64 (base64) or content (utf8), " +
+			"size, mtime. Use max_bytes to cap large files.",
+		Capability: "fs.read",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"path"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"path":      map[string]any{"type": "string"},
+				"max_bytes": map[string]any{"type": "integer"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			path, err := requireString(input, "path")
+			if err != nil {
+				return nil, err
+			}
+			out := map[string]any{"path": path}
+			if mb, ok := input["max_bytes"]; ok {
+				out["max_bytes"] = toInt(mb, 0)
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- fs.write -----
+
+func fsWriteSpec() ToolSpec {
+	return ToolSpec{
+		Name: "fs.write",
+		Description: "Write a file on the remote device. Creates parent dirs if missing. Overwrites if " +
+			"the file exists. Use content_b64 for binary; use content for utf8. Optional mode (octal int).",
+		Capability: "fs.write",
+		// fs.write to system paths requires_approval is enforced device-side by
+		// the manifest. The tool itself is registered for both modes.
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"path"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"path":        map[string]any{"type": "string"},
+				"content":     map[string]any{"type": "string"},
+				"content_b64": map[string]any{"type": "string"},
+				"mode":        map[string]any{"type": "integer"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			path, err := requireString(input, "path")
+			if err != nil {
+				return nil, err
+			}
+			content, hasContent := input["content"].(string)
+			contentB64, hasB64 := input["content_b64"].(string)
+			if !hasContent && !hasB64 {
+				return nil, fmt.Errorf("fs.write requires content or content_b64")
+			}
+			out := map[string]any{"path": path}
+			if hasContent {
+				out["content"] = content
+			}
+			if hasB64 {
+				out["content_b64"] = contentB64
+			}
+			if mode, ok := input["mode"]; ok {
+				out["mode"] = toInt(mode, 0)
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- fs.list -----
+
+func fsListSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "fs.list",
+		Description: "List a directory on the remote device. Returns entries: [{name, kind, size, mtime}]. Optional glob filter.",
+		Capability:  "fs.list",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"dir"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"dir":  map[string]any{"type": "string"},
+				"glob": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			dir, err := requireString(input, "dir")
+			if err != nil {
+				return nil, err
+			}
+			out := map[string]any{"dir": dir}
+			if glob, ok := input["glob"].(string); ok && glob != "" {
+				out["glob"] = glob
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- fs.stat -----
+
+func fsStatSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "fs.stat",
+		Description: "Stat a file or dir on the remote device. Returns kind, size, mtime, mode.",
+		Capability:  "fs.stat",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"path"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"path": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			path, err := requireString(input, "path")
+			if err != nil {
+				return nil, err
+			}
+			return map[string]any{"path": path}, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- git.clone -----
+
+func gitCloneSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "git.clone",
+		Description: "Clone a git repository on the remote device. Returns commit_sha and branch.",
+		Capability:  "git.clone",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"url", "dest"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"url":    map[string]any{"type": "string"},
+				"dest":   map[string]any{"type": "string"},
+				"branch": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			url, err := requireString(input, "url")
+			if err != nil {
+				return nil, err
+			}
+			dest, err := requireString(input, "dest")
+			if err != nil {
+				return nil, err
+			}
+			out := map[string]any{"url": url, "dest": dest}
+			if branch, ok := input["branch"].(string); ok && branch != "" {
+				out["branch"] = branch
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- git.commit -----
+
+func gitCommitSpec() ToolSpec {
+	return ToolSpec{
+		Name: "git.commit",
+		Description: "Stage and commit changes in a repo on the remote device. Stages all changes by " +
+			"default; pass files: [\"a\",\"b\"] to stage a subset. Returns commit_sha.",
+		Capability: "git.commit",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"repo", "message"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"repo":    map[string]any{"type": "string"},
+				"message": map[string]any{"type": "string"},
+				"files":   map[string]any{"type": "array", "items": map[string]any{"type": "string"}},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			repo, err := requireString(input, "repo")
+			if err != nil {
+				return nil, err
+			}
+			msg, err := requireString(input, "message")
+			if err != nil {
+				return nil, err
+			}
+			out := map[string]any{"repo": repo, "message": msg}
+			if files, ok := input["files"]; ok {
+				if slice, e := asStringSliceLoose(files); e == nil && len(slice) > 0 {
+					out["files"] = slice
+				}
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- git.push -----
+
+func gitPushSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "git.push",
+		Description: "Push the current branch of a repo. Optional remote (default origin) and branch (default current).",
+		Capability:  "git.push",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"repo"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"repo":   map[string]any{"type": "string"},
+				"remote": map[string]any{"type": "string"},
+				"branch": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			repo, err := requireString(input, "repo")
+			if err != nil {
+				return nil, err
+			}
+			out := map[string]any{"repo": repo}
+			if r, ok := input["remote"].(string); ok && r != "" {
+				out["remote"] = r
+			}
+			if b, ok := input["branch"].(string); ok && b != "" {
+				out["branch"] = b
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- pkg.install -----
+
+func pkgInstallSpec() ToolSpec {
+	return ToolSpec{
+		Name: "pkg.install",
+		Description: "Install an OS package (apt/dnf/pacman depending on host). Requires approval — the " +
+			"operator must accept the action in #operator-approvals before it executes.",
+		Capability:       "pkg.install",
+		RequiresApproval: true,
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"name"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"name": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			name, err := requireString(input, "name")
+			if err != nil {
+				return nil, err
+			}
+			return map[string]any{"name": name}, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- pkg.search -----
+
+func pkgSearchSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "pkg.search",
+		Description: "Search the OS package cache. No install. Returns matching packages.",
+		Capability:  "pkg.search",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"query"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"query": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			q, err := requireString(input, "query")
+			if err != nil {
+				return nil, err
+			}
+			return map[string]any{"query": q}, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- proc.list -----
+
+func procListSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "proc.list",
+		Description: "List processes on the remote device. Optional filters: user, name_like.",
+		Capability:  "proc.list",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"user":      map[string]any{"type": "string"},
+				"name_like": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			out := map[string]any{}
+			if u, ok := input["user"].(string); ok && u != "" {
+				out["user"] = u
+			}
+			if n, ok := input["name_like"].(string); ok && n != "" {
+				out["name_like"] = n
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- proc.kill -----
+
+func procKillSpec() ToolSpec {
+	return ToolSpec{
+		Name: "proc.kill",
+		Description: "Send a signal to a process. Signal default TERM. Killing destructive signals on " +
+			"processes owned by another uid requires approval.",
+		Capability:       "proc.kill",
+		RequiresApproval: true,
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"pid"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"pid":    map[string]any{"type": "integer"},
+				"signal": map[string]any{"type": "string"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			pidRaw, ok := input["pid"]
+			if !ok {
+				return nil, fmt.Errorf("proc.kill: pid is required")
+			}
+			out := map[string]any{"pid": toInt(pidRaw, 0)}
+			if sig, ok := input["signal"].(string); ok && sig != "" {
+				out["signal"] = strings.ToUpper(sig)
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- docker.list -----
+
+func dockerListSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "docker.list",
+		Description: "List Docker containers on the remote device. Pass all=true to include stopped.",
+		Capability:  "docker.container.list",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"all": map[string]any{"type": "boolean"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			out := map[string]any{}
+			if all, ok := input["all"].(bool); ok {
+				out["all"] = all
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- docker.exec -----
+
+func dockerExecSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "docker.exec",
+		Description: "Exec a command in a Docker container. argv is a string list (no shell).",
+		Capability:  "docker.container.exec",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"container", "argv"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"container": map[string]any{"type": "string"},
+				"argv":      map[string]any{"type": "array", "items": map[string]any{"type": "string"}},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			container, err := requireString(input, "container")
+			if err != nil {
+				return nil, err
+			}
+			argv, err := requireStringSlice(input, "argv")
+			if err != nil {
+				return nil, err
+			}
+			if len(argv) == 0 {
+				return nil, fmt.Errorf("argv must not be empty")
+			}
+			return map[string]any{"container": container, "argv": argv}, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- docker.logs -----
+
+func dockerLogsSpec() ToolSpec {
+	return ToolSpec{
+		Name:        "docker.logs",
+		Description: "Read the last N lines of a Docker container's logs.",
+		Capability:  "docker.container.logs",
+		InputSchema: map[string]any{
+			"type":                 "object",
+			"required":             []string{"container"},
+			"additionalProperties": false,
+			"properties": map[string]any{
+				"container": map[string]any{"type": "string"},
+				"tail":      map[string]any{"type": "integer"},
+			},
+		},
+		ArgMapping: func(input map[string]any) (map[string]any, error) {
+			container, err := requireString(input, "container")
+			if err != nil {
+				return nil, err
+			}
+			out := map[string]any{"container": container}
+			if t, ok := input["tail"]; ok {
+				out["tail"] = toInt(t, 100)
+			}
+			return out, nil
+		},
+		ResultMapping: passthrough,
+	}
+}
+
+// ----- helpers -----
+
+func passthrough(result map[string]any) (any, error) { return result, nil }
+
+func requireString(input map[string]any, key string) (string, error) {
+	v, ok := input[key]
+	if !ok || v == nil {
+		return "", fmt.Errorf("%s is required", key)
+	}
+	s, ok := v.(string)
+	if !ok {
+		return "", fmt.Errorf("%s must be a string, got %T", key, v)
+	}
+	return s, nil
+}
+
+func requireStringSlice(input map[string]any, key string) ([]string, error) {
+	v, ok := input[key]
+	if !ok || v == nil {
+		return nil, fmt.Errorf("%s is required", key)
+	}
+	return asStringSliceLoose(v)
+}
+
+func asStringSliceLoose(v any) ([]string, error) {
+	switch s := v.(type) {
+	case []string:
+		out := make([]string, len(s))
+		copy(out, s)
+		return out, nil
+	case []any:
+		out := make([]string, 0, len(s))
+		for i, e := range s {
+			str, ok := e.(string)
+			if !ok {
+				return nil, fmt.Errorf("index %d: expected string, got %T", i, e)
+			}
+			out = append(out, str)
+		}
+		return out, nil
+	}
+	return nil, fmt.Errorf("expected array of strings, got %T", v)
+}
+
+func getString(m map[string]any, key string) string {
+	if m == nil {
+		return ""
+	}
+	s, _ := m[key].(string)
+	return s
+}
+
+func toInt(v any, def int) int {
+	switch n := v.(type) {
+	case int:
+		return n
+	case int32:
+		return int(n)
+	case int64:
+		return int(n)
+	case float32:
+		return int(n)
+	case float64:
+		return int(n)
+	}
+	return def
+}
@@ -0,0 +1,430 @@
+package devicemesh
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestRegisterBuiltins_UserExcludesApprovalTools(t *testing.T) {
+	reg := NewToolRegistry(nil)
+	names := RegisterBuiltins(reg, ModeUser)
+	want := map[string]bool{
+		"exec":        true,
+		"shell.eval":  true,
+		"fs.read":     true,
+		"fs.write":    true,
+		"fs.list":     true,
+		"fs.stat":     true,
+		"git.clone":   true,
+		"git.commit":  true,
+		"git.push":    true,
+		"pkg.search":  true,
+		"proc.list":   true,
+		"docker.list": true,
+		"docker.exec": true,
+		"docker.logs": true,
+	}
+	got := map[string]bool{}
+	for _, n := range names {
+		got[n] = true
+	}
+	for w := range want {
+		if !got[w] {
+			t.Errorf("user mode missing tool %q", w)
+		}
+	}
+	if got["pkg.install"] {
+		t.Errorf("user mode should NOT include pkg.install")
+	}
+	if got["proc.kill"] {
+		t.Errorf("user mode should NOT include proc.kill (RequiresApproval)")
+	}
+}
+
+func TestRegisterBuiltins_SudoIncludesOnlyApprovalTools(t *testing.T) {
+	reg := NewToolRegistry(nil)
+	names := RegisterBuiltins(reg, ModeSudo)
+	got := map[string]bool{}
+	for _, n := range names {
+		got[n] = true
+	}
+	if !got["pkg.install"] {
+		t.Errorf("sudo mode should include pkg.install")
+	}
+	if !got["proc.kill"] {
+		t.Errorf("sudo mode should include proc.kill")
+	}
+	if !got["shell.eval"] {
+		t.Errorf("sudo mode should include shell.eval (special-cased with RequiresApproval=true)")
+	}
+	if got["exec"] {
+		t.Errorf("sudo mode should NOT include exec (no RequiresApproval)")
+	}
+	if got["fs.read"] {
+		t.Errorf("sudo mode should NOT include fs.read")
+	}
+}
+
+func TestRegisterBuiltins_ModeAll(t *testing.T) {
+	reg := NewToolRegistry(nil)
+	names := RegisterBuiltins(reg, ModeAll)
+	if len(names) < 16 {
+		t.Errorf("expected all 16 builtins, got %d: %v", len(names), names)
+	}
+	got := map[string]bool{}
+	for _, n := range names {
+		got[n] = true
+	}
+	if !got["exec"] || !got["pkg.install"] {
+		t.Errorf("ModeAll should include both exec and pkg.install")
+	}
+}
+
+func TestBuiltins_Exec_HappyPath(t *testing.T) {
+	var received CapabilityRequest
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		_ = json.Unmarshal(body, &received)
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{
+			RequestID: received.RequestID,
+			OK:        true,
+			Result: map[string]any{
+				"stdout":      "hello\n",
+				"stderr":      "",
+				"exit_code":   float64(0), // JSON numbers decode as float64
+				"duration_ms": float64(12),
+			},
+		})
+	}))
+	defer srv.Close()
+
+	reg := NewToolRegistry(NewClient(srv.URL))
+	RegisterBuiltins(reg, ModeUser)
+
+	out, err := reg.Call(context.Background(), "exec", map[string]any{
+		"argv":      []string{"echo", "hello"},
+		"cwd":       "/tmp",
+		"timeout_s": 5,
+	})
+	if err != nil {
+		t.Fatalf("exec call: %v", err)
+	}
+
+	// Result should be a normalized map.
+	m, ok := out.(map[string]any)
+	if !ok {
+		t.Fatalf("expected map result, got %T", out)
+	}
+	if m["stdout"].(string) != "hello\n" {
+		t.Errorf("stdout: %v", m["stdout"])
+	}
+	if m["exit_code"].(int) != 0 {
+		t.Errorf("exit_code: %v (%T)", m["exit_code"], m["exit_code"])
+	}
+
+	// Verify the request that was sent.
+	if received.Capability != "shell.exec" {
+		t.Errorf("capability: %q", received.Capability)
+	}
+	argv, ok := received.Args["argv"].([]any)
+	if !ok {
+		t.Fatalf("argv not []any: %T", received.Args["argv"])
+	}
+	if len(argv) != 2 || argv[0].(string) != "echo" {
+		t.Errorf("argv content: %v", argv)
+	}
+	if received.Args["cwd"].(string) != "/tmp" {
+		t.Errorf("cwd: %v", received.Args["cwd"])
+	}
+	if int(received.Args["timeout_s"].(float64)) != 5 {
+		t.Errorf("timeout_s: %v", received.Args["timeout_s"])
+	}
+}
+
+func TestBuiltins_Exec_RejectsEmptyArgv(t *testing.T) {
+	reg := NewToolRegistry(NewClient("http://nowhere.invalid"))
+	RegisterBuiltins(reg, ModeUser)
+
+	_, err := reg.Call(context.Background(), "exec", map[string]any{
+		"argv": []string{},
+	})
+	if err == nil {
+		t.Fatalf("expected error for empty argv")
+	}
+}
+
+func TestBuiltins_FSRead_HappyPath(t *testing.T) {
+	var received CapabilityRequest
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		_ = json.Unmarshal(body, &received)
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{
+			RequestID: received.RequestID,
+			OK:        true,
+			Result: map[string]any{
+				"content": "file contents here",
+				"size":    float64(18),
+			},
+		})
+	}))
+	defer srv.Close()
+
+	reg := NewToolRegistry(NewClient(srv.URL))
+	RegisterBuiltins(reg, ModeUser)
+
+	out, err := reg.Call(context.Background(), "fs.read", map[string]any{
+		"path":      "/etc/os-release",
+		"max_bytes": 1024,
+	})
+	if err != nil {
+		t.Fatalf("fs.read: %v", err)
+	}
+	m := out.(map[string]any)
+	if m["content"].(string) != "file contents here" {
+		t.Errorf("content: %v", m["content"])
+	}
+
+	if received.Capability != "fs.read" {
+		t.Errorf("capability: %q", received.Capability)
+	}
+	if received.Args["path"].(string) != "/etc/os-release" {
+		t.Errorf("path: %v", received.Args["path"])
+	}
+	if int(received.Args["max_bytes"].(float64)) != 1024 {
+		t.Errorf("max_bytes: %v", received.Args["max_bytes"])
+	}
+}
+
+func TestBuiltins_FSWrite_RequiresContentOrB64(t *testing.T) {
+	reg := NewToolRegistry(NewClient("http://nowhere.invalid"))
+	RegisterBuiltins(reg, ModeUser)
+
+	_, err := reg.Call(context.Background(), "fs.write", map[string]any{
+		"path": "/tmp/x",
+	})
+	if err == nil {
+		t.Fatalf("expected error when neither content nor content_b64 provided")
+	}
+}
+
+func TestBuiltins_FSWrite_AcceptsContent(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{OK: true, Result: map[string]any{"bytes_written": float64(11)}})
+	}))
+	defer srv.Close()
+
+	reg := NewToolRegistry(NewClient(srv.URL))
+	RegisterBuiltins(reg, ModeUser)
+	_, err := reg.Call(context.Background(), "fs.write", map[string]any{
+		"path":    "/tmp/x",
+		"content": "hello world",
+	})
+	if err != nil {
+		t.Fatalf("fs.write: %v", err)
+	}
+}
+
+func TestBuiltins_PkgInstall_RegisteredOnlyInSudo(t *testing.T) {
+	// Build user reg
+	user := NewToolRegistry(nil)
+	RegisterBuiltins(user, ModeUser)
+	if _, ok := user.Get("pkg.install"); ok {
+		t.Errorf("pkg.install should NOT be in user registry")
+	}
+	// Build sudo reg
+	sudo := NewToolRegistry(nil)
+	RegisterBuiltins(sudo, ModeSudo)
+	if _, ok := sudo.Get("pkg.install"); !ok {
+		t.Errorf("pkg.install should be in sudo registry")
+	}
+}
+
+// ----- shell.eval -----
+
+func TestBuiltins_ShellEval_PresentInUserModeWithoutApproval(t *testing.T) {
+	reg := NewToolRegistry(nil)
+	RegisterBuiltins(reg, ModeUser)
+	spec, ok := reg.Get("shell.eval")
+	if !ok {
+		t.Fatalf("shell.eval should be registered in ModeUser")
+	}
+	if spec.RequiresApproval {
+		t.Errorf("shell.eval in ModeUser should have RequiresApproval=false, got true")
+	}
+	if spec.Capability != "shell.eval" {
+		t.Errorf("capability mismatch: %q", spec.Capability)
+	}
+}
+
+func TestBuiltins_ShellEval_PresentInSudoModeWithApproval(t *testing.T) {
+	reg := NewToolRegistry(nil)
+	RegisterBuiltins(reg, ModeSudo)
+	spec, ok := reg.Get("shell.eval")
+	if !ok {
+		t.Fatalf("shell.eval should be registered in ModeSudo")
+	}
+	if !spec.RequiresApproval {
+		t.Errorf("shell.eval in ModeSudo should have RequiresApproval=true, got false")
+	}
+	// Ensure withApprovalRequired did not mutate the original spec returned
+	// from builtinSpecs (other registries should still see false).
+	userReg := NewToolRegistry(nil)
+	RegisterBuiltins(userReg, ModeUser)
+	userSpec, _ := userReg.Get("shell.eval")
+	if userSpec.RequiresApproval {
+		t.Errorf("ModeUser shell.eval should remain RequiresApproval=false; sudo registration leaked")
+	}
+}
+
+func TestBuiltins_ShellEval_InputSchemaValidation(t *testing.T) {
+	reg := NewToolRegistry(nil)
+	RegisterBuiltins(reg, ModeUser)
+	spec, ok := reg.Get("shell.eval")
+	if !ok {
+		t.Fatalf("shell.eval not registered")
+	}
+
+	// Happy: minimal valid input.
+	if err := ValidateInput(spec, map[string]any{"cmd": "git status"}); err != nil {
+		t.Errorf("expected valid input to pass, got %v", err)
+	}
+	// Happy: with shell enum.
+	if err := ValidateInput(spec, map[string]any{"cmd": "ls -la", "shell": "bash"}); err != nil {
+		t.Errorf("shell=bash should be valid, got %v", err)
+	}
+	if err := ValidateInput(spec, map[string]any{"cmd": "Get-Process", "shell": "powershell"}); err != nil {
+		t.Errorf("shell=powershell should be valid, got %v", err)
+	}
+	if err := ValidateInput(spec, map[string]any{"cmd": "ls", "shell": "auto"}); err != nil {
+		t.Errorf("shell=auto should be valid, got %v", err)
+	}
+
+	// Reject: shell not in enum.
+	if err := ValidateInput(spec, map[string]any{"cmd": "ls", "shell": "zsh"}); err == nil {
+		t.Errorf("shell=zsh should be rejected by enum")
+	}
+	// Reject: missing required cmd.
+	if err := ValidateInput(spec, map[string]any{}); err == nil {
+		t.Errorf("empty input should fail (cmd required)")
+	}
+	// Reject: unknown property (additionalProperties=false).
+	if err := ValidateInput(spec, map[string]any{"cmd": "ls", "extra": "x"}); err == nil {
+		t.Errorf("unknown property should be rejected by additionalProperties=false")
+	}
+	// Reject: cmd not a string.
+	if err := ValidateInput(spec, map[string]any{"cmd": 42}); err == nil {
+		t.Errorf("cmd as integer should be rejected")
+	}
+}
+
+func TestBuiltins_ShellEval_ArgMapping(t *testing.T) {
+	spec := shellEvalSpec()
+
+	// Pass cmd alone.
+	out, err := spec.ArgMapping(map[string]any{"cmd": "git status"})
+	if err != nil {
+		t.Fatalf("argmap cmd-only: %v", err)
+	}
+	if out["cmd"].(string) != "git status" {
+		t.Errorf("cmd not passed through: %v", out["cmd"])
+	}
+	if _, ok := out["shell"]; ok {
+		t.Errorf("shell should be absent when not provided")
+	}
+	if _, ok := out["cwd"]; ok {
+		t.Errorf("cwd should be absent when not provided")
+	}
+
+	// Pass all fields.
+	out, err = spec.ArgMapping(map[string]any{
+		"cmd":   "ls -la",
+		"shell": "bash",
+		"cwd":   "/home/lucas",
+	})
+	if err != nil {
+		t.Fatalf("argmap full: %v", err)
+	}
+	if out["shell"].(string) != "bash" {
+		t.Errorf("shell not propagated: %v", out["shell"])
+	}
+	if out["cwd"].(string) != "/home/lucas" {
+		t.Errorf("cwd not propagated: %v", out["cwd"])
+	}
+
+	// Empty strings for optional fields are filtered out.
+	out, err = spec.ArgMapping(map[string]any{"cmd": "ls", "shell": "", "cwd": ""})
+	if err != nil {
+		t.Fatalf("argmap empty optionals: %v", err)
+	}
+	if _, ok := out["shell"]; ok {
+		t.Errorf("empty shell should be filtered, got %v", out["shell"])
+	}
+	if _, ok := out["cwd"]; ok {
+		t.Errorf("empty cwd should be filtered, got %v", out["cwd"])
+	}
+
+	// Missing cmd is an error.
+	if _, err := spec.ArgMapping(map[string]any{}); err == nil {
+		t.Errorf("ArgMapping should error on missing cmd")
+	}
+}
+
+func TestBuiltins_ShellEval_SmokeCall(t *testing.T) {
+	var received CapabilityRequest
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		_ = json.Unmarshal(body, &received)
+		_ = json.NewEncoder(w).Encode(CapabilityResponse{
+			RequestID: received.RequestID,
+			OK:        true,
+			Result: map[string]any{
+				"stdout":          "hola\n",
+				"stderr":          "",
+				"exit_code":       float64(0),
+				"approval_status": "auto_approved",
+				"cmd_executed":    "echo hola",
+				"truncated":       false,
+				"duration_ms":     float64(7),
+			},
+		})
+	}))
+	defer srv.Close()
+
+	reg := NewToolRegistry(NewClient(srv.URL))
+	RegisterBuiltins(reg, ModeUser)
+
+	out, err := reg.Call(context.Background(), "shell.eval", map[string]any{
+		"cmd": "echo hola",
+	})
+	if err != nil {
+		t.Fatalf("shell.eval call: %v", err)
+	}
+	m, ok := out.(map[string]any)
+	if !ok {
+		t.Fatalf("expected map result, got %T", out)
+	}
+	if m["stdout"].(string) != "hola\n" {
+		t.Errorf("stdout: %v", m["stdout"])
+	}
+	if m["approval_status"].(string) != "auto_approved" {
+		t.Errorf("approval_status: %v", m["approval_status"])
+	}
+	if m["cmd_executed"].(string) != "echo hola" {
+		t.Errorf("cmd_executed: %v", m["cmd_executed"])
+	}
+
+	// Verify the device-facing request envelope.
+	if received.Capability != "shell.eval" {
+		t.Errorf("capability: %q", received.Capability)
+	}
+	if received.Args["cmd"].(string) != "echo hola" {
+		t.Errorf("cmd: %v", received.Args["cmd"])
+	}
+	if _, ok := received.Args["shell"]; ok {
+		t.Errorf("shell should be absent when omitted by caller")
+	}
+}
@@ -0,0 +1,178 @@
+package devicemesh
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"sync"
+)
+
+// ToolSpec describes a single tool exposed to the LLM. It mirrors the
+// agents_and_robots tool pattern (`tools.Def` + `tools.Tool`) but pinned to
+// the device mesh transport: every tool maps to exactly one capability of a
+// remote device_agent, with a deterministic input/output mapping.
+//
+// Fields:
+//
+//   - Name: the dotted name exposed to the LLM ("exec", "fs.read", ...).
+//   - Description: shown to the LLM. Tells it WHEN to use the tool, NOT how.
+//   - InputSchema: a minimal JSON-Schema-like map. Used by ValidateInput to
+//     reject malformed args before they hit the network. See schema.go.
+//   - Capability: the device_agent capability id ("shell.exec", "fs.read").
+//   - ArgMapping: pure transform from tool input (LLM-facing) to capability
+//     args (device-facing). Defaults to identity if nil.
+//   - ResultMapping: pure transform from capability result (raw map) to the
+//     tool output the LLM sees. Defaults to passthrough if nil.
+//   - RequiresApproval: whether the underlying capability requires the
+//     human-in-the-loop approval flow on the device_agent side. Used by
+//     RegisterBuiltins to decide which tools belong to the user vs sudo
+//     agent registry. This field is metadata; the actual approval gate
+//     lives in the device_agent manifest (see issue 0144 §3).
+type ToolSpec struct {
+	Name             string
+	Description      string
+	InputSchema      map[string]any
+	Capability       string
+	ArgMapping       func(input map[string]any) (map[string]any, error)
+	ResultMapping    func(result map[string]any) (any, error)
+	RequiresApproval bool
+}
+
+// ToolRegistry holds the set of tools the LLM can invoke via the device mesh.
+// One registry per agent process. Lookups are by tool name.
+//
+// Thread-safe for read while Register may run concurrently — the agent
+// runtime registers all tools at startup, but tests do it incrementally.
+type ToolRegistry struct {
+	mu     sync.RWMutex
+	client *Client
+	tools  map[string]ToolSpec
+}
+
+// NewToolRegistry builds an empty registry bound to a Client. The client is
+// what tools use to dispatch; it's stored once so tools don't have to know
+// about the transport.
+func NewToolRegistry(client *Client) *ToolRegistry {
+	return &ToolRegistry{
+		client: client,
+		tools:  make(map[string]ToolSpec),
+	}
+}
+
+// Register adds or replaces a tool spec. Replacing is allowed by design so
+// the agent runtime can override built-ins from config (ex add a custom
+// ResultMapping for a host-specific tool).
+func (r *ToolRegistry) Register(spec ToolSpec) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.tools[spec.Name] = spec
+}
+
+// Get returns the ToolSpec for a name. Second return is false when unknown.
+func (r *ToolRegistry) Get(name string) (ToolSpec, bool) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	spec, ok := r.tools[name]
+	return spec, ok
+}
+
+// List returns all registered tool specs sorted by Name. Sort is alpha to
+// give the LLM a stable order across turns (useful for prompt caching).
+func (r *ToolRegistry) List() []ToolSpec {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	out := make([]ToolSpec, 0, len(r.tools))
+	for _, t := range r.tools {
+		out = append(out, t)
+	}
+	sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
+	return out
+}
+
+// Len returns the number of registered tools. Useful for logging and
+// for callers that want to short-circuit when the registry is empty.
+func (r *ToolRegistry) Len() int {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return len(r.tools)
+}
+
+// Names returns the sorted list of registered tool names.
+func (r *ToolRegistry) Names() []string {
+	specs := r.List()
+	out := make([]string, len(specs))
+	for i, s := range specs {
+		out[i] = s.Name
+	}
+	return out
+}
+
+// Client returns the bound Client. Useful for tools that compose multiple
+// capability calls (project.create, future work in 0144e).
+func (r *ToolRegistry) Client() *Client { return r.client }
+
+// Call resolves a tool by name, validates its input, maps it to a capability
+// envelope, dispatches via the bound Client, and returns the mapped result.
+//
+// The caller is the LLM tool-use loop in the agent runtime. The registry is
+// the single entry point for tool invocations so we have one place to plug
+// in audit, metrics, retries, etc.
+func (r *ToolRegistry) Call(ctx context.Context, toolName string, input map[string]any) (any, error) {
+	if r == nil {
+		return nil, fmt.Errorf("devicemesh.ToolRegistry: nil receiver")
+	}
+	spec, ok := r.Get(toolName)
+	if !ok {
+		return nil, fmt.Errorf("devicemesh: unknown tool %q", toolName)
+	}
+	if input == nil {
+		input = map[string]any{}
+	}
+	if err := ValidateInput(spec, input); err != nil {
+		return nil, fmt.Errorf("devicemesh: invalid input for %q: %w", toolName, err)
+	}
+
+	// Map LLM-facing input → device-facing args.
+	var args map[string]any
+	if spec.ArgMapping != nil {
+		mapped, err := spec.ArgMapping(input)
+		if err != nil {
+			return nil, fmt.Errorf("devicemesh: arg mapping for %q: %w", toolName, err)
+		}
+		args = mapped
+	} else {
+		args = input
+	}
+
+	if r.client == nil {
+		return nil, fmt.Errorf("devicemesh: registry has no Client (cannot dispatch %q)", toolName)
+	}
+
+	resp, err := r.client.Call(ctx, CapabilityRequest{
+		Capability: spec.Capability,
+		Args:       args,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("devicemesh: dispatch %q: %w", toolName, err)
+	}
+	if !resp.OK {
+		// Surface the device-side error as a plain Go error. The runner is
+		// in charge of formatting this back to the LLM as a tool result with
+		// non-zero status; we don't fabricate fake output here.
+		errMsg := resp.Error
+		if errMsg == "" {
+			errMsg = "capability returned ok=false with no error message"
+		}
+		return nil, fmt.Errorf("devicemesh: %s: %s", spec.Capability, errMsg)
+	}
+
+	// Map device result → LLM-facing output.
+	if spec.ResultMapping != nil {
+		mapped, err := spec.ResultMapping(resp.Result)
+		if err != nil {
+			return nil, fmt.Errorf("devicemesh: result mapping for %q: %w", toolName, err)
+		}
+		return mapped, nil
+	}
+	return resp.Result, nil
+}
@@ -3,15 +3,27 @@ package effects

 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"log/slog"
 	"time"

 	"github.com/enmanuel/agents/pkg/decision"
+	"github.com/enmanuel/agents/pkg/tools/devicemesh"
 	"github.com/enmanuel/agents/shell/logger"
 	"github.com/enmanuel/agents/shell/ssh"
 )

+// DeviceMeshCaller is the minimal interface that the Runner needs from a
+// devicemesh.ToolRegistry. It is an interface (rather than a concrete type)
+// so tests can mock without spinning up an HTTP server.
+type DeviceMeshCaller interface {
+	Call(ctx context.Context, toolName string, input map[string]any) (any, error)
+}
+
+// Compile-time check: the real registry satisfies the interface.
+var _ DeviceMeshCaller = (*devicemesh.ToolRegistry)(nil)
+
 // Result holds the outcome of executing a single action.
 type Result struct {
 	Action decision.Action
@@ -32,16 +44,27 @@ type MatrixSender interface {

 // Runner interprets actions and executes them.
 type Runner struct {
-	matrix MatrixSender
-	ssh    *ssh.Executor
-	logger *slog.Logger
+	matrix     MatrixSender
+	ssh        *ssh.Executor
+	deviceMesh DeviceMeshCaller
+	logger     *slog.Logger
 }

 // NewRunner creates a Runner with the provided dependencies.
+// The device mesh tool registry is left nil; ActionKindDeviceMesh actions
+// will be rejected with a clear error. Use NewRunnerWithDeviceMesh to wire
+// the mesh caller.
 func NewRunner(matrix MatrixSender, ssh *ssh.Executor, logger *slog.Logger) *Runner {
 	return &Runner{matrix: matrix, ssh: ssh, logger: logger}
 }

+// NewRunnerWithDeviceMesh wires a Runner with a DeviceMeshCaller, enabling
+// ActionKindDeviceMesh dispatch. Used by the launcher when an agent has
+// cfg.DeviceMesh.Enabled = true (wiring lives in 0144c).
+func NewRunnerWithDeviceMesh(matrix MatrixSender, ssh *ssh.Executor, dm DeviceMeshCaller, logger *slog.Logger) *Runner {
+	return &Runner{matrix: matrix, ssh: ssh, deviceMesh: dm, logger: logger}
+}
+
 // Execute runs each action sequentially and returns results.
 func (r *Runner) Execute(ctx context.Context, roomID string, actions []decision.Action) []Result {
 	r.logger.Debug("effects_batch", "room", roomID, "count", len(actions))
@@ -89,7 +112,36 @@ func (r *Runner) executeOne(ctx context.Context, roomID string, a decision.Actio
 		}
 		return Result{Action: a, Output: output, Err: res.Err}

+	case decision.ActionKindDeviceMesh:
+		if a.DeviceMesh == nil {
+			return Result{Action: a, Err: fmt.Errorf("nil device_mesh action")}
+		}
+		if r.deviceMesh == nil {
+			return Result{Action: a, Err: fmt.Errorf("device_mesh action received but Runner has no DeviceMeshCaller (build with NewRunnerWithDeviceMesh)")}
+		}
+		result, err := r.deviceMesh.Call(ctx, a.DeviceMesh.Tool, a.DeviceMesh.Input)
+		output := formatDeviceMeshResult(result)
+		return Result{Action: a, Output: output, Err: err}
+
 	default:
 		return Result{Action: a, Err: fmt.Errorf("unhandled action kind: %s", a.Kind)}
 	}
 }
+
+// formatDeviceMeshResult renders the tool result as a stable JSON string
+// suitable for embedding in a tool_result message to the LLM. Errors during
+// marshaling collapse to a printable Go representation — never panic, never
+// drop data on the floor.
+func formatDeviceMeshResult(v any) string {
+	if v == nil {
+		return ""
+	}
+	if s, ok := v.(string); ok {
+		return s
+	}
+	b, err := json.Marshal(v)
+	if err != nil {
+		return fmt.Sprintf("%v", v)
+	}
+	return string(b)
+}
@@ -0,0 +1,101 @@
+package effects
+
+import (
+	"context"
+	"errors"
+	"io"
+	"log/slog"
+	"strings"
+	"testing"
+
+	"github.com/enmanuel/agents/pkg/decision"
+)
+
+// stubMeshCaller is a minimal DeviceMeshCaller for runner tests.
+type stubMeshCaller struct {
+	tool   string
+	input  map[string]any
+	result any
+	err    error
+}
+
+func (s *stubMeshCaller) Call(_ context.Context, toolName string, input map[string]any) (any, error) {
+	s.tool = toolName
+	s.input = input
+	return s.result, s.err
+}
+
+func newSilentLogger() *slog.Logger {
+	return slog.New(slog.NewTextHandler(io.Discard, nil))
+}
+
+func TestRunner_DeviceMesh_Success(t *testing.T) {
+	stub := &stubMeshCaller{result: map[string]any{"stdout": "hello", "exit_code": 0}}
+	r := NewRunnerWithDeviceMesh(nil, nil, stub, newSilentLogger())
+
+	results := r.Execute(context.Background(), "!room", []decision.Action{{
+		Kind: decision.ActionKindDeviceMesh,
+		DeviceMesh: &decision.DeviceMeshAction{
+			Tool:  "exec",
+			Input: map[string]any{"argv": []string{"echo", "hello"}},
+		},
+	}})
+
+	if len(results) != 1 {
+		t.Fatalf("expected 1 result, got %d", len(results))
+	}
+	res := results[0]
+	if res.Err != nil {
+		t.Fatalf("expected no error, got %v", res.Err)
+	}
+	if stub.tool != "exec" {
+		t.Errorf("stub.tool=%q", stub.tool)
+	}
+	if !strings.Contains(res.Output, "hello") {
+		t.Errorf("output missing 'hello': %q", res.Output)
+	}
+	if !strings.Contains(res.Output, "exit_code") {
+		t.Errorf("output should be JSON containing exit_code: %q", res.Output)
+	}
+}
+
+func TestRunner_DeviceMesh_PropagatesError(t *testing.T) {
+	stub := &stubMeshCaller{err: errors.New("approval timeout")}
+	r := NewRunnerWithDeviceMesh(nil, nil, stub, newSilentLogger())
+	results := r.Execute(context.Background(), "!room", []decision.Action{{
+		Kind:       decision.ActionKindDeviceMesh,
+		DeviceMesh: &decision.DeviceMeshAction{Tool: "pkg.install", Input: map[string]any{"name": "jq"}},
+	}})
+	if results[0].Err == nil {
+		t.Fatalf("expected error to propagate")
+	}
+	if !strings.Contains(results[0].Err.Error(), "approval") {
+		t.Errorf("error mismatch: %v", results[0].Err)
+	}
+}
+
+func TestRunner_DeviceMesh_NilAction(t *testing.T) {
+	r := NewRunnerWithDeviceMesh(nil, nil, &stubMeshCaller{}, newSilentLogger())
+	results := r.Execute(context.Background(), "!room", []decision.Action{{
+		Kind: decision.ActionKindDeviceMesh,
+		// DeviceMesh field is nil
+	}})
+	if results[0].Err == nil {
+		t.Fatalf("expected error for nil DeviceMesh field")
+	}
+}
+
+func TestRunner_DeviceMesh_NoCaller(t *testing.T) {
+	// Using NewRunner (legacy) — should fail gracefully on DeviceMesh action.
+	r := NewRunner(nil, nil, newSilentLogger())
+	results := r.Execute(context.Background(), "!room", []decision.Action{{
+		Kind:       decision.ActionKindDeviceMesh,
+		DeviceMesh: &decision.DeviceMeshAction{Tool: "exec", Input: map[string]any{"argv": []string{"x"}}},
+	}})
+	if results[0].Err == nil {
+		t.Fatalf("expected error when Runner has no DeviceMeshCaller")
+	}
+	if !strings.Contains(results[0].Err.Error(), "DeviceMeshCaller") {
+		t.Errorf("error should mention DeviceMeshCaller: %v", results[0].Err)
+	}
+}