8b6a01d280
membershipd never called Server.UseReplicatedNonces, so every node kept a per-process anti-replay cache and a signed request accepted on node A could be replayed to node B (200+200). This wires the shared JetStream KV nonce bucket on any clustered node, closing the cross-node replay hole. Bootstrap: under enforce the service needs JetStream on its own embedded server, but the data plane only accepts allowlisted clients. Resolved with an ephemeral internal service identity the authenticator recognizes and grants full permissions (NewNkeyAuthenticatorACLInternal), connected over the in-process transport (no TLS/CA needed for the self-connection). Hard rule: --cluster-name != "" means the replicated nonce bucket is mandatory; if it cannot be created the node refuses to start (wireReplicatedNonces returns a fatal error) rather than run insecurely. Standalone nodes keep the in-memory cache unchanged (branch-by-abstraction: no JetStream dependency added). Changes: - busauth: NewNkeyAuthenticatorACLInternal + fullPermissions for the internal id. - cmd/membershipd: connectInternalJS (in-process, privileged) / connectExternalJS; wireReplicatedNonces helper; main wires it when clustered; --kv-replicas flag. Tests (regression of audit 0008 N3): - TestAttack0008_N3: 2 clustered nodes share the bucket, cross-node replay -> 401. - TestAttack0008_N3_StandaloneKeepsLocalCache: standalone needs no JetStream, same-node replay still 401. - TestAttack0008_N3_ClusteredRequiresJetStream: clustered + no JetStream -> fatal. - TestInternalConnPrivilegedUnderEnforce / ...OutsiderRejected: the privileged self-connection works under enforce and no other identity can claim it. CGO_ENABLED=0 go build/vet/test green; govulncheck 0 reachable. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
222 lines
7.6 KiB
Go
222 lines
7.6 KiB
Go
package main
|
|
|
|
// Regression for audit report 0008, vector N3: the binary must wire the
|
|
// replicated nonce store on a clustered node so a signed request accepted on one
|
|
// node cannot be replayed to another. The auditor's ephemeral attack showed the
|
|
// OLD binary never called UseReplicatedNonces (each node kept a per-process
|
|
// cache), so a captured request replayed to a second node with 200+200. These
|
|
// tests drive the SAME helper the binary uses (wireReplicatedNonces) so they
|
|
// prove the WIRING, not just the underlying API.
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/rand"
|
|
"encoding/base64"
|
|
"encoding/hex"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"path/filepath"
|
|
"strconv"
|
|
"testing"
|
|
"time"
|
|
|
|
cs "fn-registry/functions/cybersecurity"
|
|
|
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
|
"github.com/enmanuel/unibus/pkg/frame"
|
|
"github.com/enmanuel/unibus/pkg/membership"
|
|
"github.com/nats-io/nats.go"
|
|
"github.com/nats-io/nats.go/jetstream"
|
|
)
|
|
|
|
func freePort(t *testing.T) int {
|
|
t.Helper()
|
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
|
if err != nil {
|
|
t.Fatalf("free port: %v", err)
|
|
}
|
|
defer l.Close()
|
|
return l.Addr().(*net.TCPAddr).Port
|
|
}
|
|
|
|
// signed008 builds a transport-signed control-plane request with a caller-chosen
|
|
// ts+nonce, so a test can reuse the exact same signed bytes against two nodes to
|
|
// exercise replay.
|
|
func signed008(t *testing.T, baseURL, method, path string, body []byte, id cs.Identity, ts int64, nonce string) *http.Request {
|
|
t.Helper()
|
|
canonical := membership.CanonicalRequest(method, path, strconv.FormatInt(ts, 10), nonce, body)
|
|
sig := cs.SignEd25519(id.SignPriv, canonical)
|
|
var rdr io.Reader
|
|
if body != nil {
|
|
rdr = bytes.NewReader(body)
|
|
}
|
|
req, err := http.NewRequest(method, baseURL+path, rdr)
|
|
if err != nil {
|
|
t.Fatalf("new request: %v", err)
|
|
}
|
|
req.Header.Set("X-Unibus-Pub", hex.EncodeToString(id.SignPub))
|
|
req.Header.Set("X-Unibus-Ts", strconv.FormatInt(ts, 10))
|
|
req.Header.Set("X-Unibus-Nonce", nonce)
|
|
req.Header.Set("X-Unibus-Sig", base64.StdEncoding.EncodeToString(sig))
|
|
return req
|
|
}
|
|
|
|
func randNonce(t *testing.T) string {
|
|
t.Helper()
|
|
raw := make([]byte, 16)
|
|
if _, err := rand.Read(raw); err != nil {
|
|
t.Fatalf("nonce: %v", err)
|
|
}
|
|
return base64.StdEncoding.EncodeToString(raw)
|
|
}
|
|
|
|
// TestAttack0008_N3 is the blocker regression: two clustered membershipd nodes
|
|
// wired through wireReplicatedNonces share a JetStream KV nonce bucket, so a
|
|
// request accepted on node A is rejected (401) when replayed to node B. Before
|
|
// the fix the binary never wired this and the replay returned 200.
|
|
func TestAttack0008_N3(t *testing.T) {
|
|
// One NATS+JetStream backing the shared nonce bucket (no client auth needed:
|
|
// the test drives the membership.Server's nonce store directly via HTTP).
|
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
|
StoreDir: t.TempDir(), Host: "127.0.0.1", Port: freePort(t),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("nats: %v", err)
|
|
}
|
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
|
nc, err := nats.Connect(ns.ClientURL())
|
|
if err != nil {
|
|
t.Fatalf("connect: %v", err)
|
|
}
|
|
t.Cleanup(nc.Close)
|
|
js, err := jetstream.New(nc)
|
|
if err != nil {
|
|
t.Fatalf("jetstream: %v", err)
|
|
}
|
|
|
|
// Shared control-plane state (stand-in for the replicated store) + two nodes.
|
|
dir := t.TempDir()
|
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
|
if err != nil {
|
|
t.Fatalf("store: %v", err)
|
|
}
|
|
t.Cleanup(func() { store.Close() })
|
|
alice, err := cs.GenerateIdentity()
|
|
if err != nil {
|
|
t.Fatalf("identity: %v", err)
|
|
}
|
|
if err := store.AddUser(hex.EncodeToString(alice.SignPub), "alice", membership.RoleAdmin); err != nil {
|
|
t.Fatalf("add alice: %v", err)
|
|
}
|
|
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
|
|
|
|
// Each node is wired EXACTLY as the binary wires a clustered node.
|
|
mkNode := func() *httptest.Server {
|
|
srv := membership.NewServer(store, blobs, membership.AuthEnforce)
|
|
if err := wireReplicatedNonces(srv, js, true /*clustered*/, 1); err != nil {
|
|
t.Fatalf("wireReplicatedNonces: %v", err)
|
|
}
|
|
return httptest.NewServer(srv)
|
|
}
|
|
nodeA := mkNode()
|
|
t.Cleanup(nodeA.Close)
|
|
nodeB := mkNode()
|
|
t.Cleanup(nodeB.Close)
|
|
|
|
ts := time.Now().Unix()
|
|
nonce := randNonce(t)
|
|
path := "/members/" + frame.EndpointID(alice.SignPub) + "/rooms"
|
|
|
|
// Golden: alice's signed request is accepted on node A.
|
|
respA, err := http.DefaultClient.Do(signed008(t, nodeA.URL, "GET", path, nil, alice, ts, nonce))
|
|
if err != nil {
|
|
t.Fatalf("do A: %v", err)
|
|
}
|
|
respA.Body.Close()
|
|
if respA.StatusCode != http.StatusOK {
|
|
t.Fatalf("node A first use: status %d, want 200", respA.StatusCode)
|
|
}
|
|
|
|
// Error path (the attack): replay the SAME signed bytes to node B → 401.
|
|
respB, err := http.DefaultClient.Do(signed008(t, nodeB.URL, "GET", path, nil, alice, ts, nonce))
|
|
if err != nil {
|
|
t.Fatalf("do B: %v", err)
|
|
}
|
|
respB.Body.Close()
|
|
if respB.StatusCode != http.StatusUnauthorized {
|
|
t.Fatalf("cross-node replay to node B: status %d, want 401 (replayed nonce must be rejected)", respB.StatusCode)
|
|
}
|
|
}
|
|
|
|
// TestAttack0008_N3_StandaloneKeepsLocalCache is the edge: a NON-clustered node
|
|
// must NOT require JetStream — wireReplicatedNonces is a no-op and the node keeps
|
|
// its in-memory cache, which still rejects a same-node replay (the single-node
|
|
// guarantee is unchanged). This proves the fix does not add a JetStream
|
|
// dependency to standalone deployments.
|
|
func TestAttack0008_N3_StandaloneKeepsLocalCache(t *testing.T) {
|
|
dir := t.TempDir()
|
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
|
if err != nil {
|
|
t.Fatalf("store: %v", err)
|
|
}
|
|
t.Cleanup(func() { store.Close() })
|
|
alice, err := cs.GenerateIdentity()
|
|
if err != nil {
|
|
t.Fatalf("identity: %v", err)
|
|
}
|
|
if err := store.AddUser(hex.EncodeToString(alice.SignPub), "alice", membership.RoleAdmin); err != nil {
|
|
t.Fatalf("add alice: %v", err)
|
|
}
|
|
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
|
|
|
|
srv := membership.NewServer(store, blobs, membership.AuthEnforce)
|
|
// Standalone: clustered=false, js=nil. Must succeed (no JetStream needed).
|
|
if err := wireReplicatedNonces(srv, nil, false /*clustered*/, 1); err != nil {
|
|
t.Fatalf("standalone wireReplicatedNonces must be a no-op, got: %v", err)
|
|
}
|
|
node := httptest.NewServer(srv)
|
|
t.Cleanup(node.Close)
|
|
|
|
ts := time.Now().Unix()
|
|
nonce := randNonce(t)
|
|
path := "/members/" + frame.EndpointID(alice.SignPub) + "/rooms"
|
|
|
|
resp1, err := http.DefaultClient.Do(signed008(t, node.URL, "GET", path, nil, alice, ts, nonce))
|
|
if err != nil {
|
|
t.Fatalf("do 1: %v", err)
|
|
}
|
|
resp1.Body.Close()
|
|
if resp1.StatusCode != http.StatusOK {
|
|
t.Fatalf("first use: status %d, want 200", resp1.StatusCode)
|
|
}
|
|
// Same-node replay is still rejected by the in-memory cache.
|
|
resp2, err := http.DefaultClient.Do(signed008(t, node.URL, "GET", path, nil, alice, ts, nonce))
|
|
if err != nil {
|
|
t.Fatalf("do 2: %v", err)
|
|
}
|
|
resp2.Body.Close()
|
|
if resp2.StatusCode != http.StatusUnauthorized {
|
|
t.Fatalf("same-node replay: status %d, want 401", resp2.StatusCode)
|
|
}
|
|
}
|
|
|
|
// TestAttack0008_N3_ClusteredRequiresJetStream proves the hard rule: a clustered
|
|
// node with NO JetStream available refuses (error), so the binary fails fast
|
|
// instead of silently running with a per-process cache.
|
|
func TestAttack0008_N3_ClusteredRequiresJetStream(t *testing.T) {
|
|
dir := t.TempDir()
|
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
|
if err != nil {
|
|
t.Fatalf("store: %v", err)
|
|
}
|
|
t.Cleanup(func() { store.Close() })
|
|
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
|
|
srv := membership.NewServer(store, blobs, membership.AuthEnforce)
|
|
if err := wireReplicatedNonces(srv, nil, true /*clustered*/, 1); err == nil {
|
|
t.Fatalf("clustered node with no JetStream must fail, got nil")
|
|
}
|
|
}
|