Merge issue/0004f-medium-fixes: owner binding, nonce-cache pre-auth, error leak (H6/H7/H12)

Owner of a created room must be the signer; the replay cache is populated only
after authorization (with bounded, O(expired) pruning); internal errors no
longer leak to clients.
This commit is contained in:
2026-06-07 14:36:22 +02:00
4 changed files with 228 additions and 28 deletions
+59 -18
View File
@@ -75,6 +75,11 @@ const (
const (
clockSkew = 30 * time.Second
nonceTTL = 60 * time.Second
// maxNonceCacheEntries bounds the replay cache so it cannot grow without limit
// (audit H7). With IsAuthorized now gating insertion, only authorized traffic
// is cached, so this ceiling is only approached under a legitimate burst; at
// the cap the oldest nonce is evicted (its TTL is nearly up anyway).
maxNonceCacheEntries = 100_000
)
// CanonicalRequest returns the exact bytes that are signed and verified for a
@@ -91,34 +96,64 @@ func CanonicalRequest(method, path, ts, nonce string, body []byte) []byte {
}
// nonceCache remembers recently-seen nonces to reject replays. It is an
// in-memory map guarded by a mutex with lazy expiry — sufficient for a single
// membershipd process (the spec's chosen tradeoff over a server-issued nonce
// round-trip). A distributed deployment would need a shared store.
// in-memory store guarded by a mutex — sufficient for a single membershipd
// process (the spec's chosen tradeoff over a server-issued nonce round-trip). A
// distributed deployment would need a shared store (tracked for issue 0003).
//
// Pruning is O(expired), not O(n): because the TTL is constant, insertion order
// equals expiry order, so the oldest entries (front of `order`) are exactly the
// ones that expire first (audit H7 — the previous full-map scan under the mutex
// was a CPU-amplification vector). A size cap bounds memory.
type nonceCache struct {
mu sync.Mutex
seen map[string]time.Time
ttl time.Duration
mu sync.Mutex
seen map[string]time.Time // nonce -> expiry
order []string // nonces in insertion order == expiry order
ttl time.Duration
cap int
}
func newNonceCache(ttl time.Duration) *nonceCache {
return &nonceCache{seen: make(map[string]time.Time), ttl: ttl}
func newNonceCache(ttl time.Duration, capacity int) *nonceCache {
return &nonceCache{seen: make(map[string]time.Time), ttl: ttl, cap: capacity}
}
// rememberOrReject records nonce and returns true if it was unseen, or false if
// it is a replay (still live in the cache). Expired entries are pruned lazily on
// each call so the map cannot grow without bound under steady traffic.
// it is a replay (still live in the cache).
func (n *nonceCache) rememberOrReject(nonce string, now time.Time) bool {
n.mu.Lock()
defer n.mu.Unlock()
for k, exp := range n.seen {
if exp.Before(now) {
delete(n.seen, k)
// Prune expired entries from the front (oldest first). The first live entry
// ends the scan — everything behind it was inserted later and is newer.
cut := 0
for cut < len(n.order) {
exp, ok := n.seen[n.order[cut]]
if !ok {
cut++ // already evicted by the cap path below
continue
}
if !exp.Before(now) {
break
}
delete(n.seen, n.order[cut])
cut++
}
if cut > 0 {
n.order = append(n.order[:0], n.order[cut:]...)
}
if exp, ok := n.seen[nonce]; ok && !exp.Before(now) {
return false
return false // a live replay
}
// Bound memory: at capacity, evict the oldest entry (its TTL is nearly up).
for len(n.seen) >= n.cap && len(n.order) > 0 {
oldest := n.order[0]
n.order = n.order[1:]
delete(n.seen, oldest)
}
n.seen[nonce] = now.Add(n.ttl)
n.order = append(n.order, nonce)
return true
}
@@ -172,10 +207,9 @@ func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (auth
return authResult{}, fmt.Errorf("invalid signature")
}
if !s.nonces.rememberOrReject(nonce, now) {
return authResult{}, fmt.Errorf("replayed nonce")
}
// Authorize BEFORE touching the replay cache (audit H7): an unregistered
// identity can mint valid signatures for free, so caching its nonces would let
// it poison/grow the cache pre-auth. Only authorized identities are remembered.
if !s.store.IsAuthorized(pubHex) {
return authResult{}, fmt.Errorf("identity not authorized")
}
@@ -185,5 +219,12 @@ func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (auth
// IsAuthorized passed but the row vanished (race with revoke): fail closed.
return authResult{}, fmt.Errorf("identity not authorized")
}
// Anti-replay last: a replayed request from an authorized identity is still
// rejected here (the nonce is already live in the cache from its first use).
if !s.nonces.rememberOrReject(nonce, now) {
return authResult{}, fmt.Errorf("replayed nonce")
}
return authResult{pubHex: pubHex, endpoint: frame.EndpointID(pub), user: user}, nil
}
+51
View File
@@ -0,0 +1,51 @@
package membership
import (
"strconv"
"testing"
"time"
)
// TestNonceCacheRememberPrune covers the replay/expiry behavior directly on the
// cache: a fresh nonce is accepted (golden), an immediate repeat is rejected
// (error), and after the TTL the same nonce is accepted again because its entry
// was pruned (edge).
func TestNonceCacheRememberPrune(t *testing.T) {
nc := newNonceCache(50*time.Millisecond, 1000)
base := time.Now()
if !nc.rememberOrReject("a", base) {
t.Fatalf("first sighting should be accepted")
}
if nc.rememberOrReject("a", base) {
t.Fatalf("an immediate replay should be rejected")
}
if !nc.rememberOrReject("a", base.Add(60*time.Millisecond)) {
t.Fatalf("after the TTL the nonce should be accepted again (pruned)")
}
}
// TestNonceCacheCapBounded covers the memory bound (audit H7): with a long TTL so
// nothing expires, inserting far more nonces than the cap must still keep the
// cache at or under the cap (oldest evicted), and the order queue must not drift
// from the map.
func TestNonceCacheCapBounded(t *testing.T) {
const capacity = 100
nc := newNonceCache(time.Hour, capacity)
base := time.Now()
for i := 0; i < 500; i++ {
nc.rememberOrReject("n"+strconv.Itoa(i), base)
}
nc.mu.Lock()
size := len(nc.seen)
orderLen := len(nc.order)
nc.mu.Unlock()
if size > capacity {
t.Fatalf("cache exceeded its cap: %d > %d", size, capacity)
}
if orderLen != size {
t.Fatalf("order queue drifted from the map: order=%d seen=%d", orderLen, size)
}
}
+88
View File
@@ -0,0 +1,88 @@
package membership
import (
"encoding/json"
"net/http"
"strings"
"testing"
"time"
cs "fn-registry/functions/cybersecurity"
"github.com/enmanuel/unibus/pkg/frame"
)
// TestAudit_OwnerSpoof ports the auditor's H6 finding: handleCreateRoom did not
// bind the body's declared owner to the request signer, so a registered peer
// could create rooms in another identity's name. Now the owner endpoint AND the
// owner signing key must both be the authenticated signer's.
func TestAudit_OwnerSpoof(t *testing.T) {
h := newAuthHarness(t, AuthEnforce)
bob, _ := cs.GenerateIdentity()
register(t, h, bob, "bob")
bobEp := frame.EndpointID(bob.SignPub)
victim, _ := cs.GenerateIdentity()
post := func(id cs.Identity, owner endpointJSON, nonce string) int {
body, _ := json.Marshal(createRoomReq{Subject: "some.room", Owner: owner})
code, _ := do(t, signedReq(t, h.ts.URL, "POST", "/rooms", body, id, time.Now().Unix(), nonce))
return code
}
// Error path: bob signs, body claims victim as owner -> 403.
if code := post(bob, endpointJSON{Endpoint: frame.EndpointID(victim.SignPub), SignPub: victim.SignPub, KexPub: victim.KexPub}, "spoof-1"); code != http.StatusForbidden {
t.Fatalf("owner-spoofed create should be 403, got %d", code)
}
// Edge: bob declares his own endpoint but a foreign signing key -> 403 (the
// key, not just the endpoint string, is bound to the signer).
if code := post(bob, endpointJSON{Endpoint: bobEp, SignPub: victim.SignPub, KexPub: victim.KexPub}, "spoof-2"); code != http.StatusForbidden {
t.Fatalf("create with a foreign owner key should be 403, got %d", code)
}
// Golden: alice creates a room owned by herself -> 201.
aliceEp := frame.EndpointID(h.alice.SignPub)
if code := post(h.alice, endpointJSON{Endpoint: aliceEp, SignPub: h.alice.SignPub, KexPub: h.alice.KexPub}, "owner-ok"); code != http.StatusCreated {
t.Fatalf("self-owned create should be 201, got %d", code)
}
}
// TestAudit_NonceCachePoisonPreAuth ports the auditor's H7 finding: the replay
// cache was populated BEFORE the allowlist check, so any unregistered identity
// (Ed25519 keys are free) could seed nonces into it. Now IsAuthorized runs first,
// so an unauthorized identity's nonce is never cached: a repeat of the same nonce
// still fails as "not authorized", not "replayed nonce".
func TestAudit_NonceCachePoisonPreAuth(t *testing.T) {
h := newAuthHarness(t, AuthEnforce)
eve, _ := cs.GenerateIdentity() // valid signatures, NOT on the allowlist
now := time.Now().Unix()
code1, body1 := do(t, signedReq(t, h.ts.URL, "GET", "/rooms/x", nil, eve, now, "poison-nonce"))
if code1 != http.StatusUnauthorized || !strings.Contains(body1, "not authorized") {
t.Fatalf("unregistered first request should be 401 not-authorized, got %d (%s)", code1, body1)
}
// Same nonce again: if the nonce had been cached, this would report "replayed
// nonce". It must still be "not authorized" — proving the nonce was NOT cached.
code2, body2 := do(t, signedReq(t, h.ts.URL, "GET", "/rooms/x", nil, eve, now, "poison-nonce"))
if code2 != http.StatusUnauthorized {
t.Fatalf("unregistered replay should still be 401, got %d", code2)
}
if strings.Contains(body2, "replayed") {
t.Fatalf("an unauthorized identity's nonce was cached pre-auth: %s", body2)
}
if !strings.Contains(body2, "not authorized") {
t.Fatalf("second unregistered request should still be not-authorized, got: %s", body2)
}
// Positive control: an AUTHORIZED identity's replay IS still rejected, so the
// reorder did not weaken anti-replay for legitimate traffic.
if code, _ := do(t, signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "alice-live")); code != http.StatusOK {
t.Fatalf("alice's first request should be 200, got %d", code)
}
if code, body := do(t, signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "alice-live")); code != http.StatusUnauthorized || !strings.Contains(body, "replayed") {
t.Fatalf("alice's replay should be 401 replayed nonce, got %d (%s)", code, body)
}
}
+30 -10
View File
@@ -19,6 +19,7 @@ import (
"golang.org/x/time/rate"
"github.com/enmanuel/unibus/pkg/blobstore"
"github.com/enmanuel/unibus/pkg/frame"
)
// Body-size ceilings for the control plane. They bound how much an unauthenticated
@@ -84,7 +85,7 @@ func NewServer(store *Store, blobs *blobstore.Store, authMode AuthMode) *Server
blobs: blobs,
mux: http.NewServeMux(),
authMode: authMode,
nonces: newNonceCache(nonceTTL),
nonces: newNonceCache(nonceTTL, maxNonceCacheEntries),
limiter: newIPRateLimiter(defaultRatePerSec, defaultRateBurst, rateBucketTTL),
}
s.routes()
@@ -307,6 +308,15 @@ func writeErr(w http.ResponseWriter, code int, msg string) {
writeJSON(w, code, map[string]string{"error": msg})
}
// writeServerErr logs the internal error detail and returns ONLY a generic
// message to the client (audit H12): raw store/blob errors embed SQL fragments
// and filesystem paths, which must not leak to a caller. Use it for any error
// that originates inside the server (5xx, or a not-found wrapping a store error).
func writeServerErr(w http.ResponseWriter, r *http.Request, code int, publicMsg string, err error) {
log.Printf("[handler] %s %s -> %d: %v", r.Method, r.URL.Path, code, err)
writeErr(w, code, publicMsg)
}
// canonicalSig returns the bytes to verify for a request: the request struct
// re-marshaled with its Sig field cleared. The caller passes a copy with Sig
// already zeroed. This is symmetric with how the client signs.
@@ -359,6 +369,16 @@ func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) {
"cleartext rooms are disabled on this deployment; create an encrypted (Matrix-policy) room")
return
}
// Owner binding (audit H6): the declared owner must BE the authenticated
// signer — both the endpoint id and the signing key. Otherwise a registered
// peer could create rooms in another identity's name. Enforced only when an
// authenticated signer is present (AuthOff/dev trusts the caller).
if signer, ok := signerEndpoint(r); ok {
if req.Owner.Endpoint != signer || frame.EndpointID(req.Owner.SignPub) != signer {
writeErr(w, http.StatusForbidden, "forbidden: room owner must be the authenticated signer")
return
}
}
roomID := newULID()
info := RoomInfo{
RoomID: roomID,
@@ -369,7 +389,7 @@ func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) {
OwnerEndpoint: req.Owner.Endpoint,
}
if err := s.store.CreateRoom(info, req.Owner.SignPub, req.Owner.KexPub, req.SealedKeySelf); err != nil {
writeErr(w, http.StatusInternalServerError, err.Error())
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
writeJSON(w, http.StatusCreated, createRoomResp{RoomID: roomID})
@@ -391,7 +411,7 @@ func (s *Server) handleInvite(w http.ResponseWriter, r *http.Request) {
}
info, err := s.store.GetRoom(roomID)
if err != nil {
writeErr(w, http.StatusNotFound, err.Error())
writeServerErr(w, r, http.StatusNotFound, "room not found", err)
return
}
m := Member{
@@ -401,7 +421,7 @@ func (s *Server) handleInvite(w http.ResponseWriter, r *http.Request) {
KexPub: req.Member.KexPub,
}
if err := s.store.AddMember(roomID, m, info.Epoch, req.SealedKey); err != nil {
writeErr(w, http.StatusInternalServerError, err.Error())
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
writeJSON(w, http.StatusOK, map[string]string{"status": "invited"})
@@ -441,7 +461,7 @@ func (s *Server) handleGetKey(w http.ResponseWriter, r *http.Request) {
"not invited to this encrypted room: no key has been sealed for your identity. Ask the room owner to invite you before joining.")
return
}
writeErr(w, http.StatusInternalServerError, err.Error())
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
writeJSON(w, http.StatusOK, keyResp{Epoch: ep, SealedKey: sealed})
@@ -533,7 +553,7 @@ func (s *Server) handleRekey(w http.ResponseWriter, r *http.Request) {
// Bump epoch, then store the fresh sealed keys for the remaining members,
// then remove the kicked/left members.
if err := s.store.BumpEpoch(roomID, req.NewEpoch); err != nil {
writeErr(w, http.StatusInternalServerError, err.Error())
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
keys := make(map[string][]byte, len(req.Keys))
@@ -542,13 +562,13 @@ func (s *Server) handleRekey(w http.ResponseWriter, r *http.Request) {
}
if len(keys) > 0 {
if err := s.store.PutSealedKeys(roomID, req.NewEpoch, keys); err != nil {
writeErr(w, http.StatusInternalServerError, err.Error())
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
}
for _, ep := range req.Remove {
if err := s.store.RemoveMember(roomID, ep); err != nil {
writeErr(w, http.StatusInternalServerError, err.Error())
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
}
@@ -572,7 +592,7 @@ func (s *Server) handlePutBlob(w http.ResponseWriter, r *http.Request) {
}
hash, err := s.blobs.Put(data)
if err != nil {
writeErr(w, http.StatusInternalServerError, err.Error())
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
writeJSON(w, http.StatusOK, blobResp{Hash: hash})
@@ -586,7 +606,7 @@ func (s *Server) handleGetBlob(w http.ResponseWriter, r *http.Request) {
}
data, err := s.blobs.Get(hash)
if err != nil {
writeErr(w, http.StatusNotFound, err.Error())
writeServerErr(w, r, http.StatusNotFound, "not found", err)
return
}
w.Header().Set("Content-Type", "application/octet-stream")