diff --git a/cmd/membershipd/main.go b/cmd/membershipd/main.go index 5bbf76d1..a7ce3383 100644 --- a/cmd/membershipd/main.go +++ b/cmd/membershipd/main.go @@ -118,6 +118,14 @@ func main() { } srv := membership.NewServer(store, blobs, authMode) + // On a public (non-loopback) bind, disable cleartext rooms: the embedded NATS + // has no per-subject ACL, so cleartext content would be readable by any + // registered peer. Forcing E2E keeps message content confidential regardless + // (audit H4 minimum defense; see dev/0004d-dataplane-acl.md). + if !isLoopbackBind(*bind) { + srv.RequireEncryptedRooms = true + log.Printf("cleartext rooms: DISABLED (public bind requires end-to-end encryption)") + } log.Printf("control-plane auth: %s", authMode) addr := *bind + ":" + *httpPort httpSrv := &http.Server{ diff --git a/dev/0004d-dataplane-acl.md b/dev/0004d-dataplane-acl.md new file mode 100644 index 00000000..e21a05aa --- /dev/null +++ b/dev/0004d-dataplane-acl.md @@ -0,0 +1,80 @@ +# 0004d — Data-plane access control on NATS (audit H4) + +## The finding + +The NATS authenticator (`pkg/busauth`) decides one thing per connection: +*is this identity registered on the bus?* It does **not** scope what a connected +client may subscribe to or publish. There is a single NATS account with no +`Permissions`, so any registered peer can subscribe to, or publish on, **any** +subject. Concretely: + +- A cleartext room (`ModeNATS`) carries its payload in the clear on its subject. + A registered peer that knows or guesses the subject subscribes and reads the + content directly (the auditor's `TestAudit_NoSubjectACL`: eve, never invited, + receives `"internal: salary numbers"`). +- An encrypted room (`ModeMatrix`) keeps its **content** confidential (the + payload is AEAD ciphertext), but the **metadata of traffic** — that a subject + is active, message sizes and timing, who is publishing — is still observable by + any registered peer that subscribes to the subject. + +## Why the "complete" fix does not fit here + +The preferred fix is per-subject permissions derived from room membership: when a +client connects, the authenticator looks up the rooms it belongs to and grants +`Sub`/`Pub` only on those subjects. NATS supports this — `CustomClientAuthentication` +can register a `*server.User` carrying `Permissions`. + +The blocker is that **NATS evaluates permissions once, at connect time, and never +re-evaluates them on a live connection.** unibus clients routinely *connect → create +or get invited to a room → publish/subscribe* within the **same** connection +(`TestSecureBusEndToEnd` does exactly this: A connects, then creates `room.secure`, +then publishes to it). Permissions frozen at connect time would not include a room +created or joined afterwards, so the legitimate owner could not publish to the room +it just made. Making per-subject ACLs work would therefore require the client to +**reconnect on every membership change**, an invasive change to the client library +and to every peer (worker, chat, mobile) — and the prompt for this issue scopes the +client changes to the minimum. + +That dynamic-membership reconnection model is precisely the redesign that issue +**0003** (decentralization) already has to do: it moves the control-plane state to a +replicated JetStream KV and reworks how nodes and clients (re)establish sessions. Per +the issue's own guidance ("if a complete strategy does not fit, implement the minimum +defense and document the rest"), the full subject ACL is deferred to 0003, where the +session/permission model is being rebuilt anyway. + +## The strategy implemented here: forbid cleartext rooms in public + +`Server.RequireEncryptedRooms` (set by `membershipd` on any non-loopback bind) +refuses to create a cleartext (`ModeNATS`) room. Every room on a public deployment +is therefore end-to-end encrypted, so **message content stays confidential even +though the transport offers no subject isolation**: a peer that sniffs another +room's subject receives only AEAD ciphertext it has no key for. + +This composes with the 0004c control-plane authorization: a non-member cannot even +learn a room's subject through the control plane (`GET /rooms/{id}` → 403), so to +sniff it an attacker must already know or guess the subject out of band. + +## What this does NOT close (residual exposure, by design) + +- **Traffic metadata.** A registered peer that already knows a subject can still + subscribe and observe that the subject is active, the ciphertext sizes, and the + timing/cadence of messages. It cannot read content. +- **Cross-room publish.** A registered peer can still *publish* arbitrary bytes on + any subject. In an encrypted room those bytes fail AEAD open and the signature + check (`SignMsgs`), so receivers drop them — it is a nuisance/spam vector, not a + confidentiality or integrity break. +- **WireGuard-only deployments** may still use cleartext rooms (the guard only trips + on a public bind), because the network already restricts who can reach the bus. + +Closing the residual metadata exposure requires the per-subject ACL described above, +tracked for issue 0003. + +## Regression evidence + +- `pkg/membership` — `TestRequireEncryptedRoomsRejectsCleartext`: with + `RequireEncryptedRooms` on, `POST /rooms` for a cleartext policy returns 403 while + an encrypted-room create returns 201. +- `pkg/client` — `TestAudit_NoSubjectACL`: under the public posture, creating a + `ModeNATS` room fails; alice creates an encrypted room and publishes; eve (a + registered non-member) raw-subscribes to the subject and receives only ciphertext — + she never recovers the plaintext. diff --git a/pkg/membership/server.go b/pkg/membership/server.go index 99d0b4f5..3062e8f2 100644 --- a/pkg/membership/server.go +++ b/pkg/membership/server.go @@ -61,6 +61,16 @@ type Server struct { authMode AuthMode nonces *nonceCache limiter *ipRateLimiter + + // RequireEncryptedRooms, when true, refuses to create cleartext (ModeNATS) + // rooms. It is the minimum-defensive control for the data plane (audit H4): + // the embedded NATS has no per-subject ACL, so a cleartext room is readable by + // any registered peer that knows (or guesses) its subject. Forcing every room + // to be end-to-end encrypted keeps message CONTENT confidential even when the + // transport offers no subject isolation. The command sets this on a public + // (non-loopback) bind. See dev/0004d-dataplane-acl.md for the full rationale + // and the residual metadata exposure this does NOT close. + RequireEncryptedRooms bool } // NewServer wires the membership store and blob store into an http.Handler. The @@ -341,6 +351,14 @@ func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) { writeErr(w, http.StatusBadRequest, "subject and owner.endpoint required") return } + // Data-plane minimum defense (audit H4): on a public deployment cleartext + // rooms are disabled, so no message ever rides the un-ACL'd NATS subject in + // the clear for another registered peer to sniff. + if s.RequireEncryptedRooms && !req.Policy.Encrypt { + writeErr(w, http.StatusForbidden, + "cleartext rooms are disabled on this deployment; create an encrypted (Matrix-policy) room") + return + } roomID := newULID() info := RoomInfo{ RoomID: roomID,