From 3e39e23fe03af7ad65e99f1ed88eb3b2bd7d34f5 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Sun, 7 Jun 2026 12:31:50 +0200 Subject: [PATCH] feat(membership): signed control-plane auth middleware + anti-replay Adds the bus-auth rollout (off|soft|enforce) to the control plane. The middleware verifies an Ed25519 request signature over CanonicalRequest (method, request-URI, ts, nonce, sha256(body)), checks the timestamp is within +/-30s, rejects replayed nonces via an in-memory TTL cache (60s), and requires the signer to be an active user in the allowlist. soft logs rejections but lets requests through so clients can migrate without an outage; off is the legacy no-op default. /healthz is exempt so health probes work before any identity exists. CanonicalRequest is exported as the single source of truth shared with the client. Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/membership/auth.go | 185 +++++++++++++++++++++++++++++++++++++++ pkg/membership/server.go | 65 ++++++++++++-- 2 files changed, 242 insertions(+), 8 deletions(-) create mode 100644 pkg/membership/auth.go diff --git a/pkg/membership/auth.go b/pkg/membership/auth.go new file mode 100644 index 0000000..4b19234 --- /dev/null +++ b/pkg/membership/auth.go @@ -0,0 +1,185 @@ +package membership + +import ( + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "fmt" + "net/http" + "strconv" + "sync" + "time" + + cs "fn-registry/functions/cybersecurity" +) + +// AuthMode is the control-plane authentication rollout state (feature flag +// bus-auth). It governs how the HTTP middleware treats a request whose signature +// is missing, invalid, replayed, skewed, or from an unregistered identity. +// +// AuthOff — do not verify anything (legacy behavior; default). +// AuthSoft — verify and LOG rejections, but let the request through. Lets +// clients migrate to signing without an outage. +// AuthEnforce — reject unauthenticated requests with 401. +type AuthMode int + +const ( + AuthOff AuthMode = iota + AuthSoft + AuthEnforce +) + +func (m AuthMode) String() string { + switch m { + case AuthOff: + return "off" + case AuthSoft: + return "soft" + case AuthEnforce: + return "enforce" + default: + return "unknown" + } +} + +// ParseAuthMode maps the bus-auth flag string to an AuthMode. +func ParseAuthMode(s string) (AuthMode, error) { + switch s { + case "off", "": + return AuthOff, nil + case "soft": + return AuthSoft, nil + case "enforce": + return AuthEnforce, nil + default: + return AuthOff, fmt.Errorf("membership: invalid bus-auth mode %q (want off|soft|enforce)", s) + } +} + +// Control-plane signature headers. The client signs the canonical bytes of the +// request and presents these; the server reconstructs the canonical bytes and +// verifies. See canonicalRequest for the exact byte layout. +const ( + hdrPub = "X-Unibus-Pub" // signer Ed25519 public key, lowercase hex + hdrTs = "X-Unibus-Ts" // unix seconds (string) + hdrNonce = "X-Unibus-Nonce" // 16 random bytes, std base64 + hdrSig = "X-Unibus-Sig" // Ed25519 signature over canonical, std base64 +) + +// Anti-replay parameters. A request is accepted only if its timestamp is within +// clockSkew of now; nonces are remembered for nonceTTL so a captured request +// cannot be replayed inside its acceptance window. nonceTTL must be >= the full +// acceptance window (2*clockSkew) so a replay can never outlive its memory. +const ( + clockSkew = 30 * time.Second + nonceTTL = 60 * time.Second +) + +// CanonicalRequest returns the exact bytes that are signed and verified for a +// control-plane request: +// +// method "\n" path "\n" ts "\n" nonce "\n" hex(sha256(body)) +// +// path is the request URI (path plus raw query) so query parameters (endpoint, +// epoch) are covered by the signature. It is exported so the client library and +// tests sign with the identical construction — the one place this format lives. +func CanonicalRequest(method, path, ts, nonce string, body []byte) []byte { + sum := sha256.Sum256(body) + return []byte(method + "\n" + path + "\n" + ts + "\n" + nonce + "\n" + hex.EncodeToString(sum[:])) +} + +// nonceCache remembers recently-seen nonces to reject replays. It is an +// in-memory map guarded by a mutex with lazy expiry — sufficient for a single +// membershipd process (the spec's chosen tradeoff over a server-issued nonce +// round-trip). A distributed deployment would need a shared store. +type nonceCache struct { + mu sync.Mutex + seen map[string]time.Time + ttl time.Duration +} + +func newNonceCache(ttl time.Duration) *nonceCache { + return &nonceCache{seen: make(map[string]time.Time), ttl: ttl} +} + +// rememberOrReject records nonce and returns true if it was unseen, or false if +// it is a replay (still live in the cache). Expired entries are pruned lazily on +// each call so the map cannot grow without bound under steady traffic. +func (n *nonceCache) rememberOrReject(nonce string, now time.Time) bool { + n.mu.Lock() + defer n.mu.Unlock() + for k, exp := range n.seen { + if exp.Before(now) { + delete(n.seen, k) + } + } + if exp, ok := n.seen[nonce]; ok && !exp.Before(now) { + return false + } + n.seen[nonce] = now.Add(n.ttl) + return true +} + +// authResult is what a successful authentication yields: the verified signing +// key (hex) and the authorized user record. Handlers may use it for fine-grained +// authorization (e.g. role checks) in later phases. +type authResult struct { + pubHex string + user User +} + +// authenticate verifies the signature headers on r against body and the user +// allowlist. It returns an error describing the first failing check; the +// middleware decides whether that error blocks (enforce) or only logs (soft). +// +// Order matters: cheap, non-cryptographic checks (header presence, key shape, +// clock skew) run first; the Ed25519 verification runs before the replay cache +// is touched so an attacker cannot poison the cache with unsigned nonces; the +// allowlist lookup runs last. +func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (authResult, error) { + pubHex := r.Header.Get(hdrPub) + ts := r.Header.Get(hdrTs) + nonce := r.Header.Get(hdrNonce) + sigB64 := r.Header.Get(hdrSig) + if pubHex == "" || ts == "" || nonce == "" || sigB64 == "" { + return authResult{}, fmt.Errorf("missing auth headers") + } + + pub, err := hex.DecodeString(pubHex) + if err != nil || len(pub) != 32 { + return authResult{}, fmt.Errorf("malformed %s (want 32-byte Ed25519 hex)", hdrPub) + } + + tsInt, err := strconv.ParseInt(ts, 10, 64) + if err != nil { + return authResult{}, fmt.Errorf("malformed %s", hdrTs) + } + if d := now.Unix() - tsInt; d > int64(clockSkew/time.Second) || d < -int64(clockSkew/time.Second) { + return authResult{}, fmt.Errorf("timestamp out of range (skew %ds)", d) + } + + sig, err := base64.StdEncoding.DecodeString(sigB64) + if err != nil { + return authResult{}, fmt.Errorf("malformed %s", hdrSig) + } + + canonical := CanonicalRequest(r.Method, r.URL.RequestURI(), ts, nonce, body) + if !cs.VerifyEd25519(pub, canonical, sig) { + return authResult{}, fmt.Errorf("invalid signature") + } + + if !s.nonces.rememberOrReject(nonce, now) { + return authResult{}, fmt.Errorf("replayed nonce") + } + + if !s.store.IsAuthorized(pubHex) { + return authResult{}, fmt.Errorf("identity not authorized") + } + + user, err := s.store.GetUser(pubHex) + if err != nil { + // IsAuthorized passed but the row vanished (race with revoke): fail closed. + return authResult{}, fmt.Errorf("identity not authorized") + } + return authResult{pubHex: pubHex, user: user}, nil +} diff --git a/pkg/membership/server.go b/pkg/membership/server.go index 9ee5df2..47b9058 100644 --- a/pkg/membership/server.go +++ b/pkg/membership/server.go @@ -1,14 +1,17 @@ package membership import ( + "bytes" "database/sql" "encoding/json" "errors" "fmt" "io" + "log" "net/http" "strconv" "strings" + "time" cs "fn-registry/functions/cybersecurity" @@ -24,20 +27,66 @@ import ( // rate limiting, and read endpoints (GET) are unauthenticated. Hardening // (mTLS, capabilities, rate limits) is a later phase. type Server struct { - store *Store - blobs *blobstore.Store - mux *http.ServeMux + store *Store + blobs *blobstore.Store + mux *http.ServeMux + authMode AuthMode + nonces *nonceCache } -// NewServer wires the membership store and blob store into an http.Handler. -func NewServer(store *Store, blobs *blobstore.Store) *Server { - s := &Server{store: store, blobs: blobs, mux: http.NewServeMux()} +// NewServer wires the membership store and blob store into an http.Handler. The +// authMode selects the control-plane auth rollout state (AuthOff for callers and +// tests that have not migrated to signed requests yet). +func NewServer(store *Store, blobs *blobstore.Store, authMode AuthMode) *Server { + s := &Server{ + store: store, + blobs: blobs, + mux: http.NewServeMux(), + authMode: authMode, + nonces: newNonceCache(nonceTTL), + } s.routes() return s } -// ServeHTTP satisfies http.Handler. -func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.mux.ServeHTTP(w, r) } +// ServeHTTP satisfies http.Handler. It runs the control-plane auth middleware +// (signature verification + anti-replay + allowlist) ahead of the router +// according to authMode, then dispatches to the matched handler. +func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { + if s.authMode == AuthOff || isAuthExempt(r) { + s.mux.ServeHTTP(w, r) + return + } + + // Buffer the body so the signature can be verified over it and the handler + // still reads it. Bodies on the control plane are small (JSON metadata or a + // media blob already capped upstream), so full buffering is acceptable. + body, err := io.ReadAll(r.Body) + if err != nil { + writeErr(w, http.StatusBadRequest, "read body: "+err.Error()) + return + } + _ = r.Body.Close() + r.Body = io.NopCloser(bytes.NewReader(body)) + + if _, err := s.authenticate(r, body, time.Now()); err != nil { + if s.authMode == AuthSoft { + log.Printf("[auth] soft: would reject %s %s: %v", r.Method, r.URL.Path, err) + s.mux.ServeHTTP(w, r) + return + } + writeErr(w, http.StatusUnauthorized, "unauthorized: "+err.Error()) + return + } + s.mux.ServeHTTP(w, r) +} + +// isAuthExempt lists requests that bypass control-plane auth even under enforce. +// Only the unauthenticated health probe qualifies: it carries no data and is +// needed by load balancers / smoke checks / systemd before any identity exists. +func isAuthExempt(r *http.Request) bool { + return r.Method == http.MethodGet && r.URL.Path == "/healthz" +} func (s *Server) routes() { s.mux.HandleFunc("GET /healthz", s.handleHealth)