package membership import ( "crypto/sha256" "encoding/base64" "encoding/hex" "fmt" "net/http" "strconv" "sync" "time" cs "fn-registry/functions/cybersecurity" "github.com/enmanuel/unibus/pkg/frame" ) // AuthMode is the control-plane authentication rollout state (feature flag // bus-auth). It governs how the HTTP middleware treats a request whose signature // is missing, invalid, replayed, skewed, or from an unregistered identity. // // AuthOff — do not verify anything (legacy behavior; default). // AuthSoft — verify and LOG rejections, but let the request through. Lets // clients migrate to signing without an outage. // AuthEnforce — reject unauthenticated requests with 401. type AuthMode int const ( AuthOff AuthMode = iota AuthSoft AuthEnforce ) func (m AuthMode) String() string { switch m { case AuthOff: return "off" case AuthSoft: return "soft" case AuthEnforce: return "enforce" default: return "unknown" } } // ParseAuthMode maps the bus-auth flag string to an AuthMode. func ParseAuthMode(s string) (AuthMode, error) { switch s { case "off", "": return AuthOff, nil case "soft": return AuthSoft, nil case "enforce": return AuthEnforce, nil default: return AuthOff, fmt.Errorf("membership: invalid bus-auth mode %q (want off|soft|enforce)", s) } } // Control-plane signature headers. The client signs the canonical bytes of the // request and presents these; the server reconstructs the canonical bytes and // verifies. See canonicalRequest for the exact byte layout. const ( hdrPub = "X-Unibus-Pub" // signer Ed25519 public key, lowercase hex hdrTs = "X-Unibus-Ts" // unix seconds (string) hdrNonce = "X-Unibus-Nonce" // 16 random bytes, std base64 hdrSig = "X-Unibus-Sig" // Ed25519 signature over canonical, std base64 ) // Anti-replay parameters. A request is accepted only if its timestamp is within // clockSkew of now; nonces are remembered for nonceTTL so a captured request // cannot be replayed inside its acceptance window. nonceTTL must be >= the full // acceptance window (2*clockSkew) so a replay can never outlive its memory. const ( clockSkew = 30 * time.Second nonceTTL = 60 * time.Second // maxNonceCacheEntries bounds the replay cache so it cannot grow without limit // (audit H7). With IsAuthorized now gating insertion, only authorized traffic // is cached, so this ceiling is only approached under a legitimate burst; at // the cap the oldest nonce is evicted (its TTL is nearly up anyway). maxNonceCacheEntries = 100_000 ) // CanonicalRequest returns the exact bytes that are signed and verified for a // control-plane request: // // method "\n" path "\n" ts "\n" nonce "\n" hex(sha256(body)) // // path is the request URI (path plus raw query) so query parameters (endpoint, // epoch) are covered by the signature. It is exported so the client library and // tests sign with the identical construction — the one place this format lives. func CanonicalRequest(method, path, ts, nonce string, body []byte) []byte { sum := sha256.Sum256(body) return []byte(method + "\n" + path + "\n" + ts + "\n" + nonce + "\n" + hex.EncodeToString(sum[:])) } // nonceCache remembers recently-seen nonces to reject replays. It is an // in-memory store guarded by a mutex — sufficient for a single membershipd // process (the spec's chosen tradeoff over a server-issued nonce round-trip). A // distributed deployment would need a shared store (tracked for issue 0003). // // Pruning is O(expired), not O(n): because the TTL is constant, insertion order // equals expiry order, so the oldest entries (front of `order`) are exactly the // ones that expire first (audit H7 — the previous full-map scan under the mutex // was a CPU-amplification vector). A size cap bounds memory. type nonceCache struct { mu sync.Mutex seen map[string]time.Time // nonce -> expiry order []string // nonces in insertion order == expiry order ttl time.Duration cap int } func newNonceCache(ttl time.Duration, capacity int) *nonceCache { return &nonceCache{seen: make(map[string]time.Time), ttl: ttl, cap: capacity} } // rememberOrReject records nonce and returns true if it was unseen, or false if // it is a replay (still live in the cache). func (n *nonceCache) rememberOrReject(nonce string, now time.Time) bool { n.mu.Lock() defer n.mu.Unlock() // Prune expired entries from the front (oldest first). The first live entry // ends the scan — everything behind it was inserted later and is newer. cut := 0 for cut < len(n.order) { exp, ok := n.seen[n.order[cut]] if !ok { cut++ // already evicted by the cap path below continue } if !exp.Before(now) { break } delete(n.seen, n.order[cut]) cut++ } if cut > 0 { n.order = append(n.order[:0], n.order[cut:]...) } if exp, ok := n.seen[nonce]; ok && !exp.Before(now) { return false // a live replay } // Bound memory: at capacity, evict the oldest entry (its TTL is nearly up). for len(n.seen) >= n.cap && len(n.order) > 0 { oldest := n.order[0] n.order = n.order[1:] delete(n.seen, oldest) } n.seen[nonce] = now.Add(n.ttl) n.order = append(n.order, nonce) return true } // authResult is what a successful authentication yields: the verified signing // key (hex), the endpoint id derived from it, and the authorized user record. // Handlers use endpoint for membership authorization (only a member of a room // may read its metadata/keys); user is available for role checks. type authResult struct { pubHex string endpoint string user User } // authenticate verifies the signature headers on r against body and the user // allowlist. It returns an error describing the first failing check; the // middleware decides whether that error blocks (enforce) or only logs (soft). // // Order matters: cheap, non-cryptographic checks (header presence, key shape, // clock skew) run first; the Ed25519 verification runs before the replay cache // is touched so an attacker cannot poison the cache with unsigned nonces; the // allowlist lookup runs last. func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (authResult, error) { pubHex := r.Header.Get(hdrPub) ts := r.Header.Get(hdrTs) nonce := r.Header.Get(hdrNonce) sigB64 := r.Header.Get(hdrSig) if pubHex == "" || ts == "" || nonce == "" || sigB64 == "" { return authResult{}, fmt.Errorf("missing auth headers") } pub, err := hex.DecodeString(pubHex) if err != nil || len(pub) != 32 { return authResult{}, fmt.Errorf("malformed %s (want 32-byte Ed25519 hex)", hdrPub) } tsInt, err := strconv.ParseInt(ts, 10, 64) if err != nil { return authResult{}, fmt.Errorf("malformed %s", hdrTs) } if d := now.Unix() - tsInt; d > int64(clockSkew/time.Second) || d < -int64(clockSkew/time.Second) { return authResult{}, fmt.Errorf("timestamp out of range (skew %ds)", d) } sig, err := base64.StdEncoding.DecodeString(sigB64) if err != nil { return authResult{}, fmt.Errorf("malformed %s", hdrSig) } canonical := CanonicalRequest(r.Method, r.URL.RequestURI(), ts, nonce, body) if !cs.VerifyEd25519(pub, canonical, sig) { return authResult{}, fmt.Errorf("invalid signature") } // Authorize BEFORE touching the replay cache (audit H7): an unregistered // identity can mint valid signatures for free, so caching its nonces would let // it poison/grow the cache pre-auth. Only authorized identities are remembered. if !s.store.IsAuthorized(pubHex) { return authResult{}, fmt.Errorf("identity not authorized") } user, err := s.store.GetUser(pubHex) if err != nil { // IsAuthorized passed but the row vanished (race with revoke): fail closed. return authResult{}, fmt.Errorf("identity not authorized") } // Anti-replay last: a replayed request from an authorized identity is still // rejected here (the nonce is already live in the cache from its first use). if !s.nonces.rememberOrReject(nonce, now) { return authResult{}, fmt.Errorf("replayed nonce") } return authResult{pubHex: pubHex, endpoint: frame.EndpointID(pub), user: user}, nil }