0aa2caae43
Three medium audit findings. H6 (owner spoof): handleCreateRoom now binds the body's declared owner to the authenticated signer — both the endpoint id and the signing key must be the signer's — so a registered peer cannot create rooms in another identity's name. Enforced only when an authenticated signer is present. H7 (nonce-cache poison pre-auth): IsAuthorized now runs BEFORE the replay cache is touched, so an unregistered identity (Ed25519 keys are free) can no longer seed nonces into it. The cache is rewritten with O(expired) pruning (insertion order equals expiry order under a constant TTL) instead of the previous O(n) full-map scan under the mutex, plus a size cap with oldest-eviction. This is the prerequisite the 0003 replicated nonce store builds on. H12 (error leak): internal store/blob errors are logged and replaced with a generic client message via writeServerErr, so SQL fragments and filesystem paths no longer reach the caller. Crafted 4xx messages (owner-sig, validation) are kept. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
231 lines
7.9 KiB
Go
231 lines
7.9 KiB
Go
package membership
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/base64"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"net/http"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
|
|
cs "fn-registry/functions/cybersecurity"
|
|
|
|
"github.com/enmanuel/unibus/pkg/frame"
|
|
)
|
|
|
|
// AuthMode is the control-plane authentication rollout state (feature flag
|
|
// bus-auth). It governs how the HTTP middleware treats a request whose signature
|
|
// is missing, invalid, replayed, skewed, or from an unregistered identity.
|
|
//
|
|
// AuthOff — do not verify anything (legacy behavior; default).
|
|
// AuthSoft — verify and LOG rejections, but let the request through. Lets
|
|
// clients migrate to signing without an outage.
|
|
// AuthEnforce — reject unauthenticated requests with 401.
|
|
type AuthMode int
|
|
|
|
const (
|
|
AuthOff AuthMode = iota
|
|
AuthSoft
|
|
AuthEnforce
|
|
)
|
|
|
|
func (m AuthMode) String() string {
|
|
switch m {
|
|
case AuthOff:
|
|
return "off"
|
|
case AuthSoft:
|
|
return "soft"
|
|
case AuthEnforce:
|
|
return "enforce"
|
|
default:
|
|
return "unknown"
|
|
}
|
|
}
|
|
|
|
// ParseAuthMode maps the bus-auth flag string to an AuthMode.
|
|
func ParseAuthMode(s string) (AuthMode, error) {
|
|
switch s {
|
|
case "off", "":
|
|
return AuthOff, nil
|
|
case "soft":
|
|
return AuthSoft, nil
|
|
case "enforce":
|
|
return AuthEnforce, nil
|
|
default:
|
|
return AuthOff, fmt.Errorf("membership: invalid bus-auth mode %q (want off|soft|enforce)", s)
|
|
}
|
|
}
|
|
|
|
// Control-plane signature headers. The client signs the canonical bytes of the
|
|
// request and presents these; the server reconstructs the canonical bytes and
|
|
// verifies. See canonicalRequest for the exact byte layout.
|
|
const (
|
|
hdrPub = "X-Unibus-Pub" // signer Ed25519 public key, lowercase hex
|
|
hdrTs = "X-Unibus-Ts" // unix seconds (string)
|
|
hdrNonce = "X-Unibus-Nonce" // 16 random bytes, std base64
|
|
hdrSig = "X-Unibus-Sig" // Ed25519 signature over canonical, std base64
|
|
)
|
|
|
|
// Anti-replay parameters. A request is accepted only if its timestamp is within
|
|
// clockSkew of now; nonces are remembered for nonceTTL so a captured request
|
|
// cannot be replayed inside its acceptance window. nonceTTL must be >= the full
|
|
// acceptance window (2*clockSkew) so a replay can never outlive its memory.
|
|
const (
|
|
clockSkew = 30 * time.Second
|
|
nonceTTL = 60 * time.Second
|
|
// maxNonceCacheEntries bounds the replay cache so it cannot grow without limit
|
|
// (audit H7). With IsAuthorized now gating insertion, only authorized traffic
|
|
// is cached, so this ceiling is only approached under a legitimate burst; at
|
|
// the cap the oldest nonce is evicted (its TTL is nearly up anyway).
|
|
maxNonceCacheEntries = 100_000
|
|
)
|
|
|
|
// CanonicalRequest returns the exact bytes that are signed and verified for a
|
|
// control-plane request:
|
|
//
|
|
// method "\n" path "\n" ts "\n" nonce "\n" hex(sha256(body))
|
|
//
|
|
// path is the request URI (path plus raw query) so query parameters (endpoint,
|
|
// epoch) are covered by the signature. It is exported so the client library and
|
|
// tests sign with the identical construction — the one place this format lives.
|
|
func CanonicalRequest(method, path, ts, nonce string, body []byte) []byte {
|
|
sum := sha256.Sum256(body)
|
|
return []byte(method + "\n" + path + "\n" + ts + "\n" + nonce + "\n" + hex.EncodeToString(sum[:]))
|
|
}
|
|
|
|
// nonceCache remembers recently-seen nonces to reject replays. It is an
|
|
// in-memory store guarded by a mutex — sufficient for a single membershipd
|
|
// process (the spec's chosen tradeoff over a server-issued nonce round-trip). A
|
|
// distributed deployment would need a shared store (tracked for issue 0003).
|
|
//
|
|
// Pruning is O(expired), not O(n): because the TTL is constant, insertion order
|
|
// equals expiry order, so the oldest entries (front of `order`) are exactly the
|
|
// ones that expire first (audit H7 — the previous full-map scan under the mutex
|
|
// was a CPU-amplification vector). A size cap bounds memory.
|
|
type nonceCache struct {
|
|
mu sync.Mutex
|
|
seen map[string]time.Time // nonce -> expiry
|
|
order []string // nonces in insertion order == expiry order
|
|
ttl time.Duration
|
|
cap int
|
|
}
|
|
|
|
func newNonceCache(ttl time.Duration, capacity int) *nonceCache {
|
|
return &nonceCache{seen: make(map[string]time.Time), ttl: ttl, cap: capacity}
|
|
}
|
|
|
|
// rememberOrReject records nonce and returns true if it was unseen, or false if
|
|
// it is a replay (still live in the cache).
|
|
func (n *nonceCache) rememberOrReject(nonce string, now time.Time) bool {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
// Prune expired entries from the front (oldest first). The first live entry
|
|
// ends the scan — everything behind it was inserted later and is newer.
|
|
cut := 0
|
|
for cut < len(n.order) {
|
|
exp, ok := n.seen[n.order[cut]]
|
|
if !ok {
|
|
cut++ // already evicted by the cap path below
|
|
continue
|
|
}
|
|
if !exp.Before(now) {
|
|
break
|
|
}
|
|
delete(n.seen, n.order[cut])
|
|
cut++
|
|
}
|
|
if cut > 0 {
|
|
n.order = append(n.order[:0], n.order[cut:]...)
|
|
}
|
|
|
|
if exp, ok := n.seen[nonce]; ok && !exp.Before(now) {
|
|
return false // a live replay
|
|
}
|
|
|
|
// Bound memory: at capacity, evict the oldest entry (its TTL is nearly up).
|
|
for len(n.seen) >= n.cap && len(n.order) > 0 {
|
|
oldest := n.order[0]
|
|
n.order = n.order[1:]
|
|
delete(n.seen, oldest)
|
|
}
|
|
|
|
n.seen[nonce] = now.Add(n.ttl)
|
|
n.order = append(n.order, nonce)
|
|
return true
|
|
}
|
|
|
|
// authResult is what a successful authentication yields: the verified signing
|
|
// key (hex), the endpoint id derived from it, and the authorized user record.
|
|
// Handlers use endpoint for membership authorization (only a member of a room
|
|
// may read its metadata/keys); user is available for role checks.
|
|
type authResult struct {
|
|
pubHex string
|
|
endpoint string
|
|
user User
|
|
}
|
|
|
|
// authenticate verifies the signature headers on r against body and the user
|
|
// allowlist. It returns an error describing the first failing check; the
|
|
// middleware decides whether that error blocks (enforce) or only logs (soft).
|
|
//
|
|
// Order matters: cheap, non-cryptographic checks (header presence, key shape,
|
|
// clock skew) run first; the Ed25519 verification runs before the replay cache
|
|
// is touched so an attacker cannot poison the cache with unsigned nonces; the
|
|
// allowlist lookup runs last.
|
|
func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (authResult, error) {
|
|
pubHex := r.Header.Get(hdrPub)
|
|
ts := r.Header.Get(hdrTs)
|
|
nonce := r.Header.Get(hdrNonce)
|
|
sigB64 := r.Header.Get(hdrSig)
|
|
if pubHex == "" || ts == "" || nonce == "" || sigB64 == "" {
|
|
return authResult{}, fmt.Errorf("missing auth headers")
|
|
}
|
|
|
|
pub, err := hex.DecodeString(pubHex)
|
|
if err != nil || len(pub) != 32 {
|
|
return authResult{}, fmt.Errorf("malformed %s (want 32-byte Ed25519 hex)", hdrPub)
|
|
}
|
|
|
|
tsInt, err := strconv.ParseInt(ts, 10, 64)
|
|
if err != nil {
|
|
return authResult{}, fmt.Errorf("malformed %s", hdrTs)
|
|
}
|
|
if d := now.Unix() - tsInt; d > int64(clockSkew/time.Second) || d < -int64(clockSkew/time.Second) {
|
|
return authResult{}, fmt.Errorf("timestamp out of range (skew %ds)", d)
|
|
}
|
|
|
|
sig, err := base64.StdEncoding.DecodeString(sigB64)
|
|
if err != nil {
|
|
return authResult{}, fmt.Errorf("malformed %s", hdrSig)
|
|
}
|
|
|
|
canonical := CanonicalRequest(r.Method, r.URL.RequestURI(), ts, nonce, body)
|
|
if !cs.VerifyEd25519(pub, canonical, sig) {
|
|
return authResult{}, fmt.Errorf("invalid signature")
|
|
}
|
|
|
|
// Authorize BEFORE touching the replay cache (audit H7): an unregistered
|
|
// identity can mint valid signatures for free, so caching its nonces would let
|
|
// it poison/grow the cache pre-auth. Only authorized identities are remembered.
|
|
if !s.store.IsAuthorized(pubHex) {
|
|
return authResult{}, fmt.Errorf("identity not authorized")
|
|
}
|
|
|
|
user, err := s.store.GetUser(pubHex)
|
|
if err != nil {
|
|
// IsAuthorized passed but the row vanished (race with revoke): fail closed.
|
|
return authResult{}, fmt.Errorf("identity not authorized")
|
|
}
|
|
|
|
// Anti-replay last: a replayed request from an authorized identity is still
|
|
// rejected here (the nonce is already live in the cache from its first use).
|
|
if !s.nonces.rememberOrReject(nonce, now) {
|
|
return authResult{}, fmt.Errorf("replayed nonce")
|
|
}
|
|
|
|
return authResult{pubHex: pubHex, endpoint: frame.EndpointID(pub), user: user}, nil
|
|
}
|