From 0aa2caae43aca3efb3e0ad6e2fd79003b2542525 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Sun, 7 Jun 2026 14:36:22 +0200 Subject: [PATCH 1/2] feat(membership): owner binding, pre-auth nonce-cache fix, generic errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three medium audit findings. H6 (owner spoof): handleCreateRoom now binds the body's declared owner to the authenticated signer — both the endpoint id and the signing key must be the signer's — so a registered peer cannot create rooms in another identity's name. Enforced only when an authenticated signer is present. H7 (nonce-cache poison pre-auth): IsAuthorized now runs BEFORE the replay cache is touched, so an unregistered identity (Ed25519 keys are free) can no longer seed nonces into it. The cache is rewritten with O(expired) pruning (insertion order equals expiry order under a constant TTL) instead of the previous O(n) full-map scan under the mutex, plus a size cap with oldest-eviction. This is the prerequisite the 0003 replicated nonce store builds on. H12 (error leak): internal store/blob errors are logged and replaced with a generic client message via writeServerErr, so SQL fragments and filesystem paths no longer reach the caller. Crafted 4xx messages (owner-sig, validation) are kept. Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/membership/auth.go | 77 ++++++++++++++++++++++++++++++---------- pkg/membership/server.go | 40 +++++++++++++++------ 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/pkg/membership/auth.go b/pkg/membership/auth.go index 719b8c2b..c4cf3541 100644 --- a/pkg/membership/auth.go +++ b/pkg/membership/auth.go @@ -75,6 +75,11 @@ const ( const ( clockSkew = 30 * time.Second nonceTTL = 60 * time.Second + // maxNonceCacheEntries bounds the replay cache so it cannot grow without limit + // (audit H7). With IsAuthorized now gating insertion, only authorized traffic + // is cached, so this ceiling is only approached under a legitimate burst; at + // the cap the oldest nonce is evicted (its TTL is nearly up anyway). + maxNonceCacheEntries = 100_000 ) // CanonicalRequest returns the exact bytes that are signed and verified for a @@ -91,34 +96,64 @@ func CanonicalRequest(method, path, ts, nonce string, body []byte) []byte { } // nonceCache remembers recently-seen nonces to reject replays. It is an -// in-memory map guarded by a mutex with lazy expiry — sufficient for a single -// membershipd process (the spec's chosen tradeoff over a server-issued nonce -// round-trip). A distributed deployment would need a shared store. +// in-memory store guarded by a mutex — sufficient for a single membershipd +// process (the spec's chosen tradeoff over a server-issued nonce round-trip). A +// distributed deployment would need a shared store (tracked for issue 0003). +// +// Pruning is O(expired), not O(n): because the TTL is constant, insertion order +// equals expiry order, so the oldest entries (front of `order`) are exactly the +// ones that expire first (audit H7 — the previous full-map scan under the mutex +// was a CPU-amplification vector). A size cap bounds memory. type nonceCache struct { - mu sync.Mutex - seen map[string]time.Time - ttl time.Duration + mu sync.Mutex + seen map[string]time.Time // nonce -> expiry + order []string // nonces in insertion order == expiry order + ttl time.Duration + cap int } -func newNonceCache(ttl time.Duration) *nonceCache { - return &nonceCache{seen: make(map[string]time.Time), ttl: ttl} +func newNonceCache(ttl time.Duration, capacity int) *nonceCache { + return &nonceCache{seen: make(map[string]time.Time), ttl: ttl, cap: capacity} } // rememberOrReject records nonce and returns true if it was unseen, or false if -// it is a replay (still live in the cache). Expired entries are pruned lazily on -// each call so the map cannot grow without bound under steady traffic. +// it is a replay (still live in the cache). func (n *nonceCache) rememberOrReject(nonce string, now time.Time) bool { n.mu.Lock() defer n.mu.Unlock() - for k, exp := range n.seen { - if exp.Before(now) { - delete(n.seen, k) + + // Prune expired entries from the front (oldest first). The first live entry + // ends the scan — everything behind it was inserted later and is newer. + cut := 0 + for cut < len(n.order) { + exp, ok := n.seen[n.order[cut]] + if !ok { + cut++ // already evicted by the cap path below + continue } + if !exp.Before(now) { + break + } + delete(n.seen, n.order[cut]) + cut++ } + if cut > 0 { + n.order = append(n.order[:0], n.order[cut:]...) + } + if exp, ok := n.seen[nonce]; ok && !exp.Before(now) { - return false + return false // a live replay } + + // Bound memory: at capacity, evict the oldest entry (its TTL is nearly up). + for len(n.seen) >= n.cap && len(n.order) > 0 { + oldest := n.order[0] + n.order = n.order[1:] + delete(n.seen, oldest) + } + n.seen[nonce] = now.Add(n.ttl) + n.order = append(n.order, nonce) return true } @@ -172,10 +207,9 @@ func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (auth return authResult{}, fmt.Errorf("invalid signature") } - if !s.nonces.rememberOrReject(nonce, now) { - return authResult{}, fmt.Errorf("replayed nonce") - } - + // Authorize BEFORE touching the replay cache (audit H7): an unregistered + // identity can mint valid signatures for free, so caching its nonces would let + // it poison/grow the cache pre-auth. Only authorized identities are remembered. if !s.store.IsAuthorized(pubHex) { return authResult{}, fmt.Errorf("identity not authorized") } @@ -185,5 +219,12 @@ func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (auth // IsAuthorized passed but the row vanished (race with revoke): fail closed. return authResult{}, fmt.Errorf("identity not authorized") } + + // Anti-replay last: a replayed request from an authorized identity is still + // rejected here (the nonce is already live in the cache from its first use). + if !s.nonces.rememberOrReject(nonce, now) { + return authResult{}, fmt.Errorf("replayed nonce") + } + return authResult{pubHex: pubHex, endpoint: frame.EndpointID(pub), user: user}, nil } diff --git a/pkg/membership/server.go b/pkg/membership/server.go index 3062e8f2..c5a2724d 100644 --- a/pkg/membership/server.go +++ b/pkg/membership/server.go @@ -19,6 +19,7 @@ import ( "golang.org/x/time/rate" "github.com/enmanuel/unibus/pkg/blobstore" + "github.com/enmanuel/unibus/pkg/frame" ) // Body-size ceilings for the control plane. They bound how much an unauthenticated @@ -84,7 +85,7 @@ func NewServer(store *Store, blobs *blobstore.Store, authMode AuthMode) *Server blobs: blobs, mux: http.NewServeMux(), authMode: authMode, - nonces: newNonceCache(nonceTTL), + nonces: newNonceCache(nonceTTL, maxNonceCacheEntries), limiter: newIPRateLimiter(defaultRatePerSec, defaultRateBurst, rateBucketTTL), } s.routes() @@ -307,6 +308,15 @@ func writeErr(w http.ResponseWriter, code int, msg string) { writeJSON(w, code, map[string]string{"error": msg}) } +// writeServerErr logs the internal error detail and returns ONLY a generic +// message to the client (audit H12): raw store/blob errors embed SQL fragments +// and filesystem paths, which must not leak to a caller. Use it for any error +// that originates inside the server (5xx, or a not-found wrapping a store error). +func writeServerErr(w http.ResponseWriter, r *http.Request, code int, publicMsg string, err error) { + log.Printf("[handler] %s %s -> %d: %v", r.Method, r.URL.Path, code, err) + writeErr(w, code, publicMsg) +} + // canonicalSig returns the bytes to verify for a request: the request struct // re-marshaled with its Sig field cleared. The caller passes a copy with Sig // already zeroed. This is symmetric with how the client signs. @@ -359,6 +369,16 @@ func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) { "cleartext rooms are disabled on this deployment; create an encrypted (Matrix-policy) room") return } + // Owner binding (audit H6): the declared owner must BE the authenticated + // signer — both the endpoint id and the signing key. Otherwise a registered + // peer could create rooms in another identity's name. Enforced only when an + // authenticated signer is present (AuthOff/dev trusts the caller). + if signer, ok := signerEndpoint(r); ok { + if req.Owner.Endpoint != signer || frame.EndpointID(req.Owner.SignPub) != signer { + writeErr(w, http.StatusForbidden, "forbidden: room owner must be the authenticated signer") + return + } + } roomID := newULID() info := RoomInfo{ RoomID: roomID, @@ -369,7 +389,7 @@ func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) { OwnerEndpoint: req.Owner.Endpoint, } if err := s.store.CreateRoom(info, req.Owner.SignPub, req.Owner.KexPub, req.SealedKeySelf); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } writeJSON(w, http.StatusCreated, createRoomResp{RoomID: roomID}) @@ -391,7 +411,7 @@ func (s *Server) handleInvite(w http.ResponseWriter, r *http.Request) { } info, err := s.store.GetRoom(roomID) if err != nil { - writeErr(w, http.StatusNotFound, err.Error()) + writeServerErr(w, r, http.StatusNotFound, "room not found", err) return } m := Member{ @@ -401,7 +421,7 @@ func (s *Server) handleInvite(w http.ResponseWriter, r *http.Request) { KexPub: req.Member.KexPub, } if err := s.store.AddMember(roomID, m, info.Epoch, req.SealedKey); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } writeJSON(w, http.StatusOK, map[string]string{"status": "invited"}) @@ -441,7 +461,7 @@ func (s *Server) handleGetKey(w http.ResponseWriter, r *http.Request) { "not invited to this encrypted room: no key has been sealed for your identity. Ask the room owner to invite you before joining.") return } - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } writeJSON(w, http.StatusOK, keyResp{Epoch: ep, SealedKey: sealed}) @@ -533,7 +553,7 @@ func (s *Server) handleRekey(w http.ResponseWriter, r *http.Request) { // Bump epoch, then store the fresh sealed keys for the remaining members, // then remove the kicked/left members. if err := s.store.BumpEpoch(roomID, req.NewEpoch); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } keys := make(map[string][]byte, len(req.Keys)) @@ -542,13 +562,13 @@ func (s *Server) handleRekey(w http.ResponseWriter, r *http.Request) { } if len(keys) > 0 { if err := s.store.PutSealedKeys(roomID, req.NewEpoch, keys); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } } for _, ep := range req.Remove { if err := s.store.RemoveMember(roomID, ep); err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } } @@ -572,7 +592,7 @@ func (s *Server) handlePutBlob(w http.ResponseWriter, r *http.Request) { } hash, err := s.blobs.Put(data) if err != nil { - writeErr(w, http.StatusInternalServerError, err.Error()) + writeServerErr(w, r, http.StatusInternalServerError, "internal error", err) return } writeJSON(w, http.StatusOK, blobResp{Hash: hash}) @@ -586,7 +606,7 @@ func (s *Server) handleGetBlob(w http.ResponseWriter, r *http.Request) { } data, err := s.blobs.Get(hash) if err != nil { - writeErr(w, http.StatusNotFound, err.Error()) + writeServerErr(w, r, http.StatusNotFound, "not found", err) return } w.Header().Set("Content-Type", "application/octet-stream") From 1bcca987a44d301492851222dcf7724d1ea87987 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Sun, 7 Jun 2026 14:36:22 +0200 Subject: [PATCH 2/2] test(membership): regression for H6 owner spoof and H7 nonce-cache poison TestAudit_OwnerSpoof: a body declaring a foreign owner endpoint or signing key is 403; a self-owned create is 201. TestAudit_NonceCachePoisonPreAuth: an unregistered identity's repeated nonce still fails 'not authorized' (never 'replayed'), proving it was not cached, while an authorized identity's replay is still rejected. Nonce cache unit tests: prune-after-TTL and cap-bounded memory. --- pkg/membership/nonce_cache_test.go | 51 +++++++++++++++++ pkg/membership/owner_nonce_test.go | 88 ++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 pkg/membership/nonce_cache_test.go create mode 100644 pkg/membership/owner_nonce_test.go diff --git a/pkg/membership/nonce_cache_test.go b/pkg/membership/nonce_cache_test.go new file mode 100644 index 00000000..0ff102ad --- /dev/null +++ b/pkg/membership/nonce_cache_test.go @@ -0,0 +1,51 @@ +package membership + +import ( + "strconv" + "testing" + "time" +) + +// TestNonceCacheRememberPrune covers the replay/expiry behavior directly on the +// cache: a fresh nonce is accepted (golden), an immediate repeat is rejected +// (error), and after the TTL the same nonce is accepted again because its entry +// was pruned (edge). +func TestNonceCacheRememberPrune(t *testing.T) { + nc := newNonceCache(50*time.Millisecond, 1000) + base := time.Now() + + if !nc.rememberOrReject("a", base) { + t.Fatalf("first sighting should be accepted") + } + if nc.rememberOrReject("a", base) { + t.Fatalf("an immediate replay should be rejected") + } + if !nc.rememberOrReject("a", base.Add(60*time.Millisecond)) { + t.Fatalf("after the TTL the nonce should be accepted again (pruned)") + } +} + +// TestNonceCacheCapBounded covers the memory bound (audit H7): with a long TTL so +// nothing expires, inserting far more nonces than the cap must still keep the +// cache at or under the cap (oldest evicted), and the order queue must not drift +// from the map. +func TestNonceCacheCapBounded(t *testing.T) { + const capacity = 100 + nc := newNonceCache(time.Hour, capacity) + base := time.Now() + for i := 0; i < 500; i++ { + nc.rememberOrReject("n"+strconv.Itoa(i), base) + } + + nc.mu.Lock() + size := len(nc.seen) + orderLen := len(nc.order) + nc.mu.Unlock() + + if size > capacity { + t.Fatalf("cache exceeded its cap: %d > %d", size, capacity) + } + if orderLen != size { + t.Fatalf("order queue drifted from the map: order=%d seen=%d", orderLen, size) + } +} diff --git a/pkg/membership/owner_nonce_test.go b/pkg/membership/owner_nonce_test.go new file mode 100644 index 00000000..7e2e77f3 --- /dev/null +++ b/pkg/membership/owner_nonce_test.go @@ -0,0 +1,88 @@ +package membership + +import ( + "encoding/json" + "net/http" + "strings" + "testing" + "time" + + cs "fn-registry/functions/cybersecurity" + + "github.com/enmanuel/unibus/pkg/frame" +) + +// TestAudit_OwnerSpoof ports the auditor's H6 finding: handleCreateRoom did not +// bind the body's declared owner to the request signer, so a registered peer +// could create rooms in another identity's name. Now the owner endpoint AND the +// owner signing key must both be the authenticated signer's. +func TestAudit_OwnerSpoof(t *testing.T) { + h := newAuthHarness(t, AuthEnforce) + + bob, _ := cs.GenerateIdentity() + register(t, h, bob, "bob") + bobEp := frame.EndpointID(bob.SignPub) + victim, _ := cs.GenerateIdentity() + + post := func(id cs.Identity, owner endpointJSON, nonce string) int { + body, _ := json.Marshal(createRoomReq{Subject: "some.room", Owner: owner}) + code, _ := do(t, signedReq(t, h.ts.URL, "POST", "/rooms", body, id, time.Now().Unix(), nonce)) + return code + } + + // Error path: bob signs, body claims victim as owner -> 403. + if code := post(bob, endpointJSON{Endpoint: frame.EndpointID(victim.SignPub), SignPub: victim.SignPub, KexPub: victim.KexPub}, "spoof-1"); code != http.StatusForbidden { + t.Fatalf("owner-spoofed create should be 403, got %d", code) + } + + // Edge: bob declares his own endpoint but a foreign signing key -> 403 (the + // key, not just the endpoint string, is bound to the signer). + if code := post(bob, endpointJSON{Endpoint: bobEp, SignPub: victim.SignPub, KexPub: victim.KexPub}, "spoof-2"); code != http.StatusForbidden { + t.Fatalf("create with a foreign owner key should be 403, got %d", code) + } + + // Golden: alice creates a room owned by herself -> 201. + aliceEp := frame.EndpointID(h.alice.SignPub) + if code := post(h.alice, endpointJSON{Endpoint: aliceEp, SignPub: h.alice.SignPub, KexPub: h.alice.KexPub}, "owner-ok"); code != http.StatusCreated { + t.Fatalf("self-owned create should be 201, got %d", code) + } +} + +// TestAudit_NonceCachePoisonPreAuth ports the auditor's H7 finding: the replay +// cache was populated BEFORE the allowlist check, so any unregistered identity +// (Ed25519 keys are free) could seed nonces into it. Now IsAuthorized runs first, +// so an unauthorized identity's nonce is never cached: a repeat of the same nonce +// still fails as "not authorized", not "replayed nonce". +func TestAudit_NonceCachePoisonPreAuth(t *testing.T) { + h := newAuthHarness(t, AuthEnforce) + + eve, _ := cs.GenerateIdentity() // valid signatures, NOT on the allowlist + now := time.Now().Unix() + + code1, body1 := do(t, signedReq(t, h.ts.URL, "GET", "/rooms/x", nil, eve, now, "poison-nonce")) + if code1 != http.StatusUnauthorized || !strings.Contains(body1, "not authorized") { + t.Fatalf("unregistered first request should be 401 not-authorized, got %d (%s)", code1, body1) + } + + // Same nonce again: if the nonce had been cached, this would report "replayed + // nonce". It must still be "not authorized" — proving the nonce was NOT cached. + code2, body2 := do(t, signedReq(t, h.ts.URL, "GET", "/rooms/x", nil, eve, now, "poison-nonce")) + if code2 != http.StatusUnauthorized { + t.Fatalf("unregistered replay should still be 401, got %d", code2) + } + if strings.Contains(body2, "replayed") { + t.Fatalf("an unauthorized identity's nonce was cached pre-auth: %s", body2) + } + if !strings.Contains(body2, "not authorized") { + t.Fatalf("second unregistered request should still be not-authorized, got: %s", body2) + } + + // Positive control: an AUTHORIZED identity's replay IS still rejected, so the + // reorder did not weaken anti-replay for legitimate traffic. + if code, _ := do(t, signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "alice-live")); code != http.StatusOK { + t.Fatalf("alice's first request should be 200, got %d", code) + } + if code, body := do(t, signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "alice-live")); code != http.StatusUnauthorized || !strings.Contains(body, "replayed") { + t.Fatalf("alice's replay should be 401 replayed nonce, got %d (%s)", code, body) + } +}