package membership // kvNonceStore is the replicated anti-replay backend (issue 0003e): seen nonces // live in a JetStream KV bucket shared by every node, with a per-key TTL so they // expire on their own. This closes the multi-node replay hole the auditor // flagged: the per-process memNonceCache let an attacker replay a captured // request to a DIFFERENT node, whose local cache never saw the nonce. With the // shared bucket the first node to see a nonce wins the atomic Create, and every // other node rejects the replay. import ( "context" "crypto/sha256" "encoding/hex" "errors" "fmt" "time" "github.com/nats-io/nats.go/jetstream" ) const bucketNonces = "UNIBUS_nonces" type kvNonceStore struct { kv jetstream.KeyValue opTimeout time.Duration } // newKVNonceStore creates (or opens) the replicated nonce bucket. ttl is the // per-key expiry — it must be >= the request acceptance window (2*clockSkew) so // a replay can never outlive its memory, exactly like the in-memory cache's TTL. func newKVNonceStore(js jetstream.JetStream, ttl time.Duration, replicas int, opTimeout time.Duration) (*kvNonceStore, error) { if replicas <= 0 { replicas = 1 } if opTimeout <= 0 { opTimeout = defaultKVOpTime } ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() kv, err := js.CreateOrUpdateKeyValue(ctx, jetstream.KeyValueConfig{ Bucket: bucketNonces, TTL: ttl, Replicas: replicas, History: 1, Storage: jetstream.FileStorage, }) if err != nil { return nil, fmt.Errorf("membership: open nonce KV bucket (replicas=%d): %w", replicas, err) } return &kvNonceStore{kv: kv, opTimeout: opTimeout}, nil } // nonceKVKey maps a raw nonce (std-base64, which contains '+' '/' '=' that KV // keys forbid) to a KV-safe token: the hex of its sha256. Deterministic, so the // same nonce always maps to the same key, and collision-free in practice. func nonceKVKey(nonce string) string { sum := sha256.Sum256([]byte(nonce)) return hex.EncodeToString(sum[:]) } // rememberOrReject atomically claims the nonce: Create succeeds only if the key // is absent, so the first sight returns true (accept) and any later sight (a // replay, on this or any other node sharing the bucket) returns false. A backend // error fails CLOSED — reject — so a KV outage never silently disables // anti-replay. The TTL on the bucket expires the key, reopening the window. func (s *kvNonceStore) rememberOrReject(nonce string, _ time.Time) bool { ctx, cancel := context.WithTimeout(context.Background(), s.opTimeout) defer cancel() if _, err := s.kv.Create(ctx, nonceKVKey(nonce), nil); err != nil { if errors.Is(err, jetstream.ErrKeyExists) { return false // replay: already claimed } return false // backend unreachable: fail closed } return true // first sight: accept }