Merge issue/0006d-posture: homogeneous cluster posture + /healthz posture (audit 0008 N1)

This commit is contained in:
2026-06-07 17:17:37 +02:00
5 changed files with 151 additions and 22 deletions
+18 -1
View File
@@ -83,7 +83,17 @@ func validateBootConfig(bind string, mode membership.AuthMode, tlsCert, tlsKey s
// The three route-TLS paths are all-or-nothing (mutual TLS needs the node cert,
// its key, and the CA together), independent of the bind, so a partial TLS
// config never silently degrades to plaintext routes.
func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA string) error {
//
// Homogeneous posture (issue 0006d, audit 0008 N1): a cluster is only as secure
// as its weakest node — the data plane forwards every subject between nodes, so a
// single node running without enforced auth lets an unauthenticated peer
// Subscribe(">") on it and harvest the traffic forwarded from the ACL'd nodes.
// This node therefore REFUSES to join a cluster unless it runs --bus-auth enforce,
// regardless of bind: a clustered node is a production node, and there is no safe
// "dev cluster without auth". (A peer running a tampered binary is out of this
// node's control; /healthz exposes each node's posture so a monitor can detect
// one that is not enforce+ACL — see Server.Posture.)
func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA string, mode membership.AuthMode) error {
rtAny := rtCert != "" || rtKey != "" || rtCA != ""
rtAll := rtCert != "" && rtKey != "" && rtCA != ""
if rtAny && !rtAll {
@@ -93,6 +103,13 @@ func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA st
if clusterName == "" {
return nil // standalone: no route layer to secure
}
// A clustered node MUST enforce auth (homogeneous posture). Checked before the
// loopback shortcut so even a loopback cluster cannot form without enforce.
if mode != membership.AuthEnforce {
return fmt.Errorf(
"refusing to start: cluster %q requires --bus-auth enforce; a cluster node without enforced auth+ACL lets an unauthenticated peer harvest the traffic forwarded from the other nodes (audit 0008 N1) — every node must run the same enforce+ACL+TLS posture",
clusterName)
}
if isLoopbackBind(bind) {
return nil // loopback cluster is dev-only and unreachable from outside
}
+44 -19
View File
@@ -108,31 +108,40 @@ func TestBootConfigPolicy(t *testing.T) {
// route-TLS flags are all-or-nothing regardless of bind.
func TestClusterConfigPolicy(t *testing.T) {
const c, k, ca = "node.crt", "node.key", "ca.crt"
en := membership.AuthEnforce
off := membership.AuthOff
soft := membership.AuthSoft
cases := []struct {
name string
clusterName, bind string
user, pass string
rtCert, rtKey, rtCA string
wantErr bool
name string
clusterName, bind string
user, pass string
rtCert, rtKey, rtCA string
mode membership.AuthMode
wantErr bool
}{
// Standalone (no cluster name) is always allowed, even on a public bind.
{"standalone-public", "", "0.0.0.0", "", "", "", "", "", false},
// Loopback dev cluster: unguarded (unreachable from outside).
{"loopback-cluster-bare", "unibus", "127.0.0.1", "", "", "", "", "", false},
// Golden: full public HA config.
{"public-full", "unibus", "0.0.0.0", "u", "p", c, k, ca, false},
// Error: public cluster without a route secret.
{"public-no-secret", "unibus", "0.0.0.0", "", "", c, k, ca, true},
{"public-half-secret", "unibus", "0.0.0.0", "u", "", c, k, ca, true},
// Standalone (no cluster name) is always allowed, even on a public bind and
// without enforce — the cluster posture rule does not apply to a single node.
{"standalone-public-off", "", "0.0.0.0", "", "", "", "", "", off, false},
// Loopback dev cluster WITH enforce: allowed (unreachable from outside).
{"loopback-cluster-enforce", "unibus", "127.0.0.1", "", "", "", "", "", en, false},
// Golden: full public HA config under enforce.
{"public-full-enforce", "unibus", "0.0.0.0", "u", "p", c, k, ca, en, false},
// N1 (audit 0008): a clustered node WITHOUT enforce is refused — even on
// loopback — so no weak node can join the cluster.
{"cluster-off-refused", "unibus", "127.0.0.1", "", "", "", "", "", off, true},
{"cluster-soft-refused", "unibus", "0.0.0.0", "u", "p", c, k, ca, soft, true},
// Error: public cluster without a route secret (enforce on, fails on secret).
{"public-no-secret", "unibus", "0.0.0.0", "", "", c, k, ca, en, true},
{"public-half-secret", "unibus", "0.0.0.0", "u", "", c, k, ca, en, true},
// Error: public cluster without mutual route TLS.
{"public-no-tls", "unibus", "10.0.0.1", "u", "p", "", "", "", true},
// Error: partial route-TLS flags trip regardless of bind.
{"loopback-partial-tls", "unibus", "127.0.0.1", "", "", c, "", "", true},
{"standalone-partial-tls", "", "127.0.0.1", "", "", c, k, "", true},
{"public-no-tls", "unibus", "10.0.0.1", "u", "p", "", "", "", en, true},
// Error: partial route-TLS flags trip regardless of bind/mode.
{"loopback-partial-tls", "unibus", "127.0.0.1", "", "", c, "", "", en, true},
{"standalone-partial-tls", "", "127.0.0.1", "", "", c, k, "", off, true},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
err := validateClusterConfig(tc.clusterName, tc.bind, tc.user, tc.pass, tc.rtCert, tc.rtKey, tc.rtCA)
err := validateClusterConfig(tc.clusterName, tc.bind, tc.user, tc.pass, tc.rtCert, tc.rtKey, tc.rtCA, tc.mode)
if tc.wantErr && err == nil {
t.Fatalf("cluster config %+v should be refused", tc)
}
@@ -143,6 +152,22 @@ func TestClusterConfigPolicy(t *testing.T) {
}
}
// TestAttack0008_N1 is the regression for audit 0008 N1 scenario 2: a node
// configured to join a cluster while NOT enforcing auth (the weak node that lets
// an unauthenticated peer harvest the cluster's forwarded traffic) must be refused
// at startup. The homogeneous-posture rule makes this binary unable to BE that
// weak node.
func TestAttack0008_N1(t *testing.T) {
// Weak node: clustered but --bus-auth off -> refused.
if err := validateClusterConfig("unibus", "0.0.0.0", "u", "p", "n.crt", "n.key", "ca.crt", membership.AuthOff); err == nil {
t.Fatalf("a clustered node without enforce must be refused (audit 0008 N1)")
}
// Same node WITH enforce + full route security -> allowed.
if err := validateClusterConfig("unibus", "0.0.0.0", "u", "p", "n.crt", "n.key", "ca.crt", membership.AuthEnforce); err != nil {
t.Fatalf("a clustered enforce node with full route security must be allowed, got: %v", err)
}
}
func TestSplitRoutes(t *testing.T) {
cases := []struct {
in string
+12 -1
View File
@@ -97,7 +97,7 @@ func main() {
}
// Cluster route guard (issue 0003a): a public cluster needs a route secret
// and mutual route TLS, and the route-TLS flags are all-or-nothing.
if err := validateClusterConfig(*clusterName, *bind, *clusterUser, *clusterPass, *routeTLSCert, *routeTLSKey, *routeTLSCA); err != nil {
if err := validateClusterConfig(*clusterName, *bind, *clusterUser, *clusterPass, *routeTLSCert, *routeTLSKey, *routeTLSCA, authMode); err != nil {
log.Fatalf("%v", err)
}
@@ -274,6 +274,17 @@ func main() {
srv.RequireEncryptedRooms = true
log.Printf("cleartext rooms: DISABLED (public bind requires end-to-end encryption)")
}
// Publish this node's posture on /healthz so a monitor (or a peer) can detect a
// cluster member not running the homogeneous enforce+ACL+TLS posture (audit
// 0008 N1). enforce implies the per-subject ACL in this binary (they are wired
// together above).
srv.Posture = membership.Posture{
Enforce: enforce,
ACL: enforce,
TLS: *tlsCert != "",
Cluster: clustered,
Store: *storeBackend,
}
// Replicated anti-replay (issue 0006a, audit 0008 N3): a clustered node MUST
// share its nonce store across the cluster, or a request accepted on one node
+57
View File
@@ -0,0 +1,57 @@
package membership_test
import (
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"path/filepath"
"testing"
"github.com/enmanuel/unibus/pkg/blobstore"
"github.com/enmanuel/unibus/pkg/membership"
)
// TestHealthExposesPosture: /healthz publishes the node's security posture so a
// monitor (or a peer) can detect a cluster member that is not enforce+ACL+TLS
// (audit 0008 N1). The probe stays unauthenticated.
func TestHealthExposesPosture(t *testing.T) {
dir := t.TempDir()
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
if err != nil {
t.Fatalf("store: %v", err)
}
t.Cleanup(func() { store.Close() })
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
srv := membership.NewServer(store, blobs, membership.AuthEnforce)
srv.Posture = membership.Posture{Enforce: true, ACL: true, TLS: true, Cluster: true, Store: "kv"}
ts := httptest.NewServer(srv)
t.Cleanup(ts.Close)
resp, err := http.Get(ts.URL + "/healthz")
if err != nil {
t.Fatalf("get healthz: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Fatalf("healthz status %d, want 200", resp.StatusCode)
}
body, _ := io.ReadAll(resp.Body)
var got struct {
Status string `json:"status"`
Posture membership.Posture `json:"posture"`
}
if err := json.Unmarshal(body, &got); err != nil {
t.Fatalf("decode healthz %q: %v", string(body), err)
}
if got.Status != "ok" {
t.Fatalf("status = %q, want ok", got.Status)
}
if !got.Posture.Enforce || !got.Posture.ACL || !got.Posture.TLS || !got.Posture.Cluster {
t.Fatalf("posture not surfaced correctly: %+v", got.Posture)
}
if got.Posture.Store != "kv" {
t.Fatalf("posture.store = %q, want kv", got.Posture.Store)
}
}
+20 -1
View File
@@ -81,6 +81,25 @@ type Server struct {
// (non-loopback) bind. See dev/0004d-dataplane-acl.md for the full rationale
// and the residual metadata exposure this does NOT close.
RequireEncryptedRooms bool
// Posture is the node's security posture, surfaced on /healthz so an operator
// or a peer can detect a node NOT running the homogeneous enforce+ACL+TLS
// posture a secure cluster requires (audit 0008 N1). It is set by the command;
// the zero value (all false) reflects an unsecured dev node.
Posture Posture
}
// Posture describes the security posture a membershipd node runs with. It is
// non-secret operational metadata (booleans + the store backend name), published
// on /healthz so a monitor can flag a cluster member that is not enforce+ACL+TLS
// — the weak node that would let an unauthenticated peer harvest the cluster's
// forwarded traffic (audit 0008 N1).
type Posture struct {
Enforce bool `json:"enforce"`
ACL bool `json:"acl"`
TLS bool `json:"tls"`
Cluster bool `json:"cluster"`
Store string `json:"store"` // "sqlite" | "kv"
}
// NewServer wires the membership store and blob store into an http.Handler. The
@@ -390,7 +409,7 @@ func (s *Server) verifyOwnerSig(roomID, by string, sig, canonical []byte) (Membe
// ---- handlers -------------------------------------------------------------
func (s *Server) handleHealth(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
writeJSON(w, http.StatusOK, map[string]any{"status": "ok", "posture": s.Posture})
}
func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) {