diff --git a/pkg/membership/auth.go b/pkg/membership/auth.go index 719b8c2b..c4cf3541 100644 --- a/pkg/membership/auth.go +++ b/pkg/membership/auth.go @@ -75,6 +75,11 @@ const ( const ( clockSkew = 30 * time.Second nonceTTL = 60 * time.Second + // maxNonceCacheEntries bounds the replay cache so it cannot grow without limit + // (audit H7). With IsAuthorized now gating insertion, only authorized traffic + // is cached, so this ceiling is only approached under a legitimate burst; at + // the cap the oldest nonce is evicted (its TTL is nearly up anyway). + maxNonceCacheEntries = 100_000 ) // CanonicalRequest returns the exact bytes that are signed and verified for a @@ -91,34 +96,64 @@ func CanonicalRequest(method, path, ts, nonce string, body []byte) []byte { } // nonceCache remembers recently-seen nonces to reject replays. It is an -// in-memory map guarded by a mutex with lazy expiry — sufficient for a single -// membershipd process (the spec's chosen tradeoff over a server-issued nonce -// round-trip). A distributed deployment would need a shared store. +// in-memory store guarded by a mutex — sufficient for a single membershipd +// process (the spec's chosen tradeoff over a server-issued nonce round-trip). A +// distributed deployment would need a shared store (tracked for issue 0003). +// +// Pruning is O(expired), not O(n): because the TTL is constant, insertion order +// equals expiry order, so the oldest entries (front of `order`) are exactly the +// ones that expire first (audit H7 — the previous full-map scan under the mutex +// was a CPU-amplification vector). A size cap bounds memory. type nonceCache struct { - mu sync.Mutex - seen map[string]time.Time - ttl time.Duration + mu sync.Mutex + seen map[string]time.Time // nonce -> expiry + order []string // nonces in insertion order == expiry order + ttl time.Duration + cap int } -func newNonceCache(ttl time.Duration) *nonceCache { - return &nonceCache{seen: make(map[string]time.Time), ttl: ttl} +func newNonceCache(ttl time.Duration, capacity int) *nonceCache { + return &nonceCache{seen: make(map[string]time.Time), ttl: ttl, cap: capacity} } // rememberOrReject records nonce and returns true if it was unseen, or false if -// it is a replay (still live in the cache). Expired entries are pruned lazily on -// each call so the map cannot grow without bound under steady traffic. +// it is a replay (still live in the cache). func (n *nonceCache) rememberOrReject(nonce string, now time.Time) bool { n.mu.Lock() defer n.mu.Unlock() - for k, exp := range n.seen { - if exp.Before(now) { - delete(n.seen, k) + + // Prune expired entries from the front (oldest first). The first live entry + // ends the scan — everything behind it was inserted later and is newer. + cut := 0 + for cut < len(n.order) { + exp, ok := n.seen[n.order[cut]] + if !ok { + cut++ // already evicted by the cap path below + continue } + if !exp.Before(now) { + break + } + delete(n.seen, n.order[cut]) + cut++ } + if cut > 0 { + n.order = append(n.order[:0], n.order[cut:]...) + } + if exp, ok := n.seen[nonce]; ok && !exp.Before(now) { - return false + return false // a live replay } + + // Bound memory: at capacity, evict the oldest entry (its TTL is nearly up). + for len(n.seen) >= n.cap && len(n.order) > 0 { + oldest := n.order[0] + n.order = n.order[1:] + delete(n.seen, oldest) + } + n.seen[nonce] = now.Add(n.ttl) + n.order = append(n.order, nonce) return true } @@ -172,10 +207,9 @@ func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (auth return authResult{}, fmt.Errorf("invalid signature") } - if !s.nonces.rememberOrReject(nonce, now) { - return authResult{}, fmt.Errorf("replayed nonce") - } - + // Authorize BEFORE touching the replay cache (audit H7): an unregistered + // identity can mint valid signatures for free, so caching its nonces would let + // it poison/grow the cache pre-auth. Only authorized identities are remembered. if !s.store.IsAuthorized(pubHex) { return authResult{}, fmt.Errorf("identity not authorized") } @@ -185,5 +219,12 @@ func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (auth // IsAuthorized passed but the row vanished (race with revoke): fail closed. return authResult{}, fmt.Errorf("identity not authorized") } + + // Anti-replay last: a replayed request from an authorized identity is still + // rejected here (the nonce is already live in the cache from its first use). + if !s.nonces.rememberOrReject(nonce, now) { + return authResult{}, fmt.Errorf("replayed nonce") + } + return authResult{pubHex: pubHex, endpoint: frame.EndpointID(pub), user: user}, nil } diff --git a/pkg/membership/nonce_cache_test.go b/pkg/membership/nonce_cache_test.go new file mode 100644 index 00000000..0ff102ad --- /dev/null +++ b/pkg/membership/nonce_cache_test.go @@ -0,0 +1,51 @@ +package membership + +import ( + "strconv" + "testing" + "time" +) + +// TestNonceCacheRememberPrune covers the replay/expiry behavior directly on the +// cache: a fresh nonce is accepted (golden), an immediate repeat is rejected +// (error), and after the TTL the same nonce is accepted again because its entry +// was pruned (edge). +func TestNonceCacheRememberPrune(t *testing.T) { + nc := newNonceCache(50*time.Millisecond, 1000) + base := time.Now() + + if !nc.rememberOrReject("a", base) { + t.Fatalf("first sighting should be accepted") + } + if nc.rememberOrReject("a", base) { + t.Fatalf("an immediate replay should be rejected") + } + if !nc.rememberOrReject("a", base.Add(60*time.Millisecond)) { + t.Fatalf("after the TTL the nonce should be accepted again (pruned)") + } +} + +// TestNonceCacheCapBounded covers the memory bound (audit H7): with a long TTL so +// nothing expires, inserting far more nonces than the cap must still keep the +// cache at or under the cap (oldest evicted), and the order queue must not drift +// from the map. +func TestNonceCacheCapBounded(t *testing.T) { + const capacity = 100 + nc := newNonceCache(time.Hour, capacity) + base := time.Now() + for i := 0; i < 500; i++ { + nc.rememberOrReject("n"+strconv.Itoa(i), base) + } + + nc.mu.Lock() + size := len(nc.seen) + orderLen := len(nc.order) + nc.mu.Unlock() + + if size > capacity { + t.Fatalf("cache exceeded its cap: %d > %d", size, capacity) + } + if orderLen != size { + t.Fatalf("order queue drifted from the map: order=%d seen=%d", orderLen, size) + } +} diff --git a/pkg/membership/owner_nonce_test.go b/pkg/membership/owner_nonce_test.go new file mode 100644 index 00000000..7e2e77f3 --- /dev/null +++ b/pkg/membership/owner_nonce_test.go @@ -0,0 +1,88 @@ +package membership + +import ( + "encoding/json" + "net/http" + "strings" + "testing" + "time" + + cs "fn-registry/functions/cybersecurity" + + "github.com/enmanuel/unibus/pkg/frame" +) + +// TestAudit_OwnerSpoof ports the auditor's H6 finding: handleCreateRoom did not +// bind the body's declared owner to the request signer, so a registered peer +// could create rooms in another identity's name. Now the owner endpoint AND the +// owner signing key must both be the authenticated signer's. +func TestAudit_OwnerSpoof(t *testing.T) { + h := newAuthHarness(t, AuthEnforce) + + bob, _ := cs.GenerateIdentity() + register(t, h, bob, "bob") + bobEp := frame.EndpointID(bob.SignPub) + victim, _ := cs.GenerateIdentity() + + post := func(id cs.Identity, owner endpointJSON, nonce string) int { + body, _ := json.Marshal(createRoomReq{Subject: "some.room", Owner: owner}) + code, _ := do(t, signedReq(t, h.ts.URL, "POST", "/rooms", body, id, time.Now().Unix(), nonce)) + return code + } + + // Error path: bob signs, body claims victim as owner -> 403. + if code := post(bob, endpointJSON{Endpoint: frame.EndpointID(victim.SignPub), SignPub: victim.SignPub, KexPub: victim.KexPub}, "spoof-1"); code != http.StatusForbidden { + t.Fatalf("owner-spoofed create should be 403, got %d", code) + } + + // Edge: bob declares his own endpoint but a foreign signing key -> 403 (the + // key, not just the endpoint string, is bound to the signer). + if code := post(bob, endpointJSON{Endpoint: bobEp, SignPub: victim.SignPub, KexPub: victim.KexPub}, "spoof-2"); code != http.StatusForbidden { + t.Fatalf("create with a foreign owner key should be 403, got %d", code) + } + + // Golden: alice creates a room owned by herself -> 201. + aliceEp := frame.EndpointID(h.alice.SignPub) + if code := post(h.alice, endpointJSON{Endpoint: aliceEp, SignPub: h.alice.SignPub, KexPub: h.alice.KexPub}, "owner-ok"); code != http.StatusCreated { + t.Fatalf("self-owned create should be 201, got %d", code) + } +} + +// TestAudit_NonceCachePoisonPreAuth ports the auditor's H7 finding: the replay +// cache was populated BEFORE the allowlist check, so any unregistered identity +// (Ed25519 keys are free) could seed nonces into it. Now IsAuthorized runs first, +// so an unauthorized identity's nonce is never cached: a repeat of the same nonce +// still fails as "not authorized", not "replayed nonce". +func TestAudit_NonceCachePoisonPreAuth(t *testing.T) { + h := newAuthHarness(t, AuthEnforce) + + eve, _ := cs.GenerateIdentity() // valid signatures, NOT on the allowlist + now := time.Now().Unix() + + code1, body1 := do(t, signedReq(t, h.ts.URL, "GET", "/rooms/x", nil, eve, now, "poison-nonce")) + if code1 != http.StatusUnauthorized || !strings.Contains(body1, "not authorized") { + t.Fatalf("unregistered first request should be 401 not-authorized, got %d (%s)", code1, body1) + } + + // Same nonce again: if the nonce had been cached, this would report "replayed + // nonce". It must still be "not authorized" — proving the nonce was NOT cached. + code2, body2 := do(t, signedReq(t, h.ts.URL, "GET", "/rooms/x", nil, eve, now, "poison-nonce")) + if code2 != http.StatusUnauthorized { + t.Fatalf("unregistered replay should still be 401, got %d", code2) + } + if strings.Contains(body2, "replayed") { + t.Fatalf("an unauthorized identity's nonce was cached pre-auth: %s", body2) + } + if !strings.Contains(body2, "not authorized") { + t.Fatalf("second unregistered request should still be not-authorized, got: %s", body2) + } + + // Positive control: an AUTHORIZED identity's replay IS still rejected, so the + // reorder did not weaken anti-replay for legitimate traffic. + if code, _ := do(t, signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "alice-live")); code != http.StatusOK { + t.Fatalf("alice's first request should be 200, got %d", code) + } + if code, body := do(t, signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "alice-live")); code != http.StatusUnauthorized || !strings.Contains(body, "replayed") { + t.Fatalf("alice's replay should be 401 replayed nonce, got %d (%s)", code, body) + } +} diff --git a/pkg/membership/server.go b/pkg/membership/server.go index 3062e8f2..c5a2724d 100644 --- a/pkg/membership/server.go +++ b/pkg/membership/server.go @@ -19,6 +19,7 @@ import ( "golang.org/x/time/rate" "github.com/enmanuel/unibus/pkg/blobstore" + "github.com/enmanuel/unibus/pkg/frame" ) // Body-size ceilings for the control plane. They bound how much an unauthenticated @@ -84,7 +85,7 @@ func NewServer(store *Store, blobs *blobstore.Store, authMode AuthMode) *Server blobs: blobs, mux: http.NewServeMux(), authMode: authMode, - nonces: newNonceCache(nonceTTL), + nonces: newNonceCache(nonceTTL, maxNonceCacheEntries), limiter: newIPRateLimiter(defaultRatePerSec, defaultRateBurst, rateBucketTTL), } s.routes() @@ -307,6 +308,15 @@ func writeErr(w http.ResponseWriter, code int, msg string) { writeJSON(w, code, map[string]string{"error": msg}) } +// writeServerErr logs the internal error detail and returns ONLY a generic +// message to the client (audit H12): raw store/blob errors embed SQL fragments +// and filesystem paths, which must not leak to a caller. Use it for any error +// that originates inside the server (5xx, or a not-found wrapping a store error). +func writeServerErr(w http.ResponseWriter, r *http.Request, code int, publicMsg string, err error) { + log.Printf("[handler] %s %s -> %d: %v", r.Method, r.URL.Path, code, err) + writeErr(w, code, publicMsg) +} + // canonicalSig returns the bytes to verify for a request: the request struct // re-marshaled with its Sig field cleared. The caller passes a copy with Sig // already zeroed. This is symmetric with how the client signs. @@ -359,6 +369,16 @@ func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) { "cleartext rooms are disabled on this deployment; create an encrypted (Matrix-policy) room") return } + // Owner binding (audit H6): the declared owner must BE the authenticated + // signer — both the endpoint id and the signing key. Otherwise a registered + // peer could create rooms in another identity's name. Enforced only when an + // authenticated signer is present (AuthOff/dev trusts the caller). + if signer, ok := signerEndpoint(r); ok { + if req.Owner.Endpoint != signer || frame.EndpointID(req.Owner.SignPub) != signer { + writeErr(w, http.StatusForbidden, "forbidden: room owner must be the authenticated signer") + return + } + } roomID := newULID() info := RoomInfo{ RoomID: roomID, @@ -369,7 +389,7 @@ func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) { OwnerEndpoint: req.Owner.Endpoint, } if err := s.store.CreateRoom(info, req.Owner.SignPub, req.Owner.KexPub, req.SealedKeySelf); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } writeJSON(w, http.StatusCreated, createRoomResp{RoomID: roomID}) @@ -391,7 +411,7 @@ func (s *Server) handleInvite(w http.ResponseWriter, r *http.Request) { } info, err := s.store.GetRoom(roomID) if err != nil { - writeErr(w, http.StatusNotFound, err.Error()) + writeServerErr(w, r, http.StatusNotFound, "room not found", err) return } m := Member{ @@ -401,7 +421,7 @@ func (s *Server) handleInvite(w http.ResponseWriter, r *http.Request) { KexPub: req.Member.KexPub, } if err := s.store.AddMember(roomID, m, info.Epoch, req.SealedKey); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } writeJSON(w, http.StatusOK, map[string]string{"status": "invited"}) @@ -441,7 +461,7 @@ func (s *Server) handleGetKey(w http.ResponseWriter, r *http.Request) { "not invited to this encrypted room: no key has been sealed for your identity. Ask the room owner to invite you before joining.") return } - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } writeJSON(w, http.StatusOK, keyResp{Epoch: ep, SealedKey: sealed}) @@ -533,7 +553,7 @@ func (s *Server) handleRekey(w http.ResponseWriter, r *http.Request) { // Bump epoch, then store the fresh sealed keys for the remaining members, // then remove the kicked/left members. if err := s.store.BumpEpoch(roomID, req.NewEpoch); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } keys := make(map[string][]byte, len(req.Keys)) @@ -542,13 +562,13 @@ func (s *Server) handleRekey(w http.ResponseWriter, r *http.Request) { } if len(keys) > 0 { if err := s.store.PutSealedKeys(roomID, req.NewEpoch, keys); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } } for _, ep := range req.Remove { if err := s.store.RemoveMember(roomID, ep); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } } @@ -572,7 +592,7 @@ func (s *Server) handlePutBlob(w http.ResponseWriter, r *http.Request) { } hash, err := s.blobs.Put(data) if err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } writeJSON(w, http.StatusOK, blobResp{Hash: hash}) @@ -586,7 +606,7 @@ func (s *Server) handleGetBlob(w http.ResponseWriter, r *http.Request) { } data, err := s.blobs.Get(hash) if err != nil { - writeErr(w, http.StatusNotFound, err.Error()) + writeServerErr(w, r, http.StatusNotFound, "not found", err) return } w.Header().Set("Content-Type", "application/octet-stream")