Merge issue/0006d-posture: homogeneous cluster posture + /healthz posture (audit 0008 N1)
This commit is contained in:
@@ -83,7 +83,17 @@ func validateBootConfig(bind string, mode membership.AuthMode, tlsCert, tlsKey s
|
||||
// The three route-TLS paths are all-or-nothing (mutual TLS needs the node cert,
|
||||
// its key, and the CA together), independent of the bind, so a partial TLS
|
||||
// config never silently degrades to plaintext routes.
|
||||
func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA string) error {
|
||||
//
|
||||
// Homogeneous posture (issue 0006d, audit 0008 N1): a cluster is only as secure
|
||||
// as its weakest node — the data plane forwards every subject between nodes, so a
|
||||
// single node running without enforced auth lets an unauthenticated peer
|
||||
// Subscribe(">") on it and harvest the traffic forwarded from the ACL'd nodes.
|
||||
// This node therefore REFUSES to join a cluster unless it runs --bus-auth enforce,
|
||||
// regardless of bind: a clustered node is a production node, and there is no safe
|
||||
// "dev cluster without auth". (A peer running a tampered binary is out of this
|
||||
// node's control; /healthz exposes each node's posture so a monitor can detect
|
||||
// one that is not enforce+ACL — see Server.Posture.)
|
||||
func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA string, mode membership.AuthMode) error {
|
||||
rtAny := rtCert != "" || rtKey != "" || rtCA != ""
|
||||
rtAll := rtCert != "" && rtKey != "" && rtCA != ""
|
||||
if rtAny && !rtAll {
|
||||
@@ -93,6 +103,13 @@ func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA st
|
||||
if clusterName == "" {
|
||||
return nil // standalone: no route layer to secure
|
||||
}
|
||||
// A clustered node MUST enforce auth (homogeneous posture). Checked before the
|
||||
// loopback shortcut so even a loopback cluster cannot form without enforce.
|
||||
if mode != membership.AuthEnforce {
|
||||
return fmt.Errorf(
|
||||
"refusing to start: cluster %q requires --bus-auth enforce; a cluster node without enforced auth+ACL lets an unauthenticated peer harvest the traffic forwarded from the other nodes (audit 0008 N1) — every node must run the same enforce+ACL+TLS posture",
|
||||
clusterName)
|
||||
}
|
||||
if isLoopbackBind(bind) {
|
||||
return nil // loopback cluster is dev-only and unreachable from outside
|
||||
}
|
||||
|
||||
@@ -108,31 +108,40 @@ func TestBootConfigPolicy(t *testing.T) {
|
||||
// route-TLS flags are all-or-nothing regardless of bind.
|
||||
func TestClusterConfigPolicy(t *testing.T) {
|
||||
const c, k, ca = "node.crt", "node.key", "ca.crt"
|
||||
en := membership.AuthEnforce
|
||||
off := membership.AuthOff
|
||||
soft := membership.AuthSoft
|
||||
cases := []struct {
|
||||
name string
|
||||
clusterName, bind string
|
||||
user, pass string
|
||||
rtCert, rtKey, rtCA string
|
||||
wantErr bool
|
||||
name string
|
||||
clusterName, bind string
|
||||
user, pass string
|
||||
rtCert, rtKey, rtCA string
|
||||
mode membership.AuthMode
|
||||
wantErr bool
|
||||
}{
|
||||
// Standalone (no cluster name) is always allowed, even on a public bind.
|
||||
{"standalone-public", "", "0.0.0.0", "", "", "", "", "", false},
|
||||
// Loopback dev cluster: unguarded (unreachable from outside).
|
||||
{"loopback-cluster-bare", "unibus", "127.0.0.1", "", "", "", "", "", false},
|
||||
// Golden: full public HA config.
|
||||
{"public-full", "unibus", "0.0.0.0", "u", "p", c, k, ca, false},
|
||||
// Error: public cluster without a route secret.
|
||||
{"public-no-secret", "unibus", "0.0.0.0", "", "", c, k, ca, true},
|
||||
{"public-half-secret", "unibus", "0.0.0.0", "u", "", c, k, ca, true},
|
||||
// Standalone (no cluster name) is always allowed, even on a public bind and
|
||||
// without enforce — the cluster posture rule does not apply to a single node.
|
||||
{"standalone-public-off", "", "0.0.0.0", "", "", "", "", "", off, false},
|
||||
// Loopback dev cluster WITH enforce: allowed (unreachable from outside).
|
||||
{"loopback-cluster-enforce", "unibus", "127.0.0.1", "", "", "", "", "", en, false},
|
||||
// Golden: full public HA config under enforce.
|
||||
{"public-full-enforce", "unibus", "0.0.0.0", "u", "p", c, k, ca, en, false},
|
||||
// N1 (audit 0008): a clustered node WITHOUT enforce is refused — even on
|
||||
// loopback — so no weak node can join the cluster.
|
||||
{"cluster-off-refused", "unibus", "127.0.0.1", "", "", "", "", "", off, true},
|
||||
{"cluster-soft-refused", "unibus", "0.0.0.0", "u", "p", c, k, ca, soft, true},
|
||||
// Error: public cluster without a route secret (enforce on, fails on secret).
|
||||
{"public-no-secret", "unibus", "0.0.0.0", "", "", c, k, ca, en, true},
|
||||
{"public-half-secret", "unibus", "0.0.0.0", "u", "", c, k, ca, en, true},
|
||||
// Error: public cluster without mutual route TLS.
|
||||
{"public-no-tls", "unibus", "10.0.0.1", "u", "p", "", "", "", true},
|
||||
// Error: partial route-TLS flags trip regardless of bind.
|
||||
{"loopback-partial-tls", "unibus", "127.0.0.1", "", "", c, "", "", true},
|
||||
{"standalone-partial-tls", "", "127.0.0.1", "", "", c, k, "", true},
|
||||
{"public-no-tls", "unibus", "10.0.0.1", "u", "p", "", "", "", en, true},
|
||||
// Error: partial route-TLS flags trip regardless of bind/mode.
|
||||
{"loopback-partial-tls", "unibus", "127.0.0.1", "", "", c, "", "", en, true},
|
||||
{"standalone-partial-tls", "", "127.0.0.1", "", "", c, k, "", off, true},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
err := validateClusterConfig(tc.clusterName, tc.bind, tc.user, tc.pass, tc.rtCert, tc.rtKey, tc.rtCA)
|
||||
err := validateClusterConfig(tc.clusterName, tc.bind, tc.user, tc.pass, tc.rtCert, tc.rtKey, tc.rtCA, tc.mode)
|
||||
if tc.wantErr && err == nil {
|
||||
t.Fatalf("cluster config %+v should be refused", tc)
|
||||
}
|
||||
@@ -143,6 +152,22 @@ func TestClusterConfigPolicy(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestAttack0008_N1 is the regression for audit 0008 N1 scenario 2: a node
|
||||
// configured to join a cluster while NOT enforcing auth (the weak node that lets
|
||||
// an unauthenticated peer harvest the cluster's forwarded traffic) must be refused
|
||||
// at startup. The homogeneous-posture rule makes this binary unable to BE that
|
||||
// weak node.
|
||||
func TestAttack0008_N1(t *testing.T) {
|
||||
// Weak node: clustered but --bus-auth off -> refused.
|
||||
if err := validateClusterConfig("unibus", "0.0.0.0", "u", "p", "n.crt", "n.key", "ca.crt", membership.AuthOff); err == nil {
|
||||
t.Fatalf("a clustered node without enforce must be refused (audit 0008 N1)")
|
||||
}
|
||||
// Same node WITH enforce + full route security -> allowed.
|
||||
if err := validateClusterConfig("unibus", "0.0.0.0", "u", "p", "n.crt", "n.key", "ca.crt", membership.AuthEnforce); err != nil {
|
||||
t.Fatalf("a clustered enforce node with full route security must be allowed, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitRoutes(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
|
||||
+12
-1
@@ -97,7 +97,7 @@ func main() {
|
||||
}
|
||||
// Cluster route guard (issue 0003a): a public cluster needs a route secret
|
||||
// and mutual route TLS, and the route-TLS flags are all-or-nothing.
|
||||
if err := validateClusterConfig(*clusterName, *bind, *clusterUser, *clusterPass, *routeTLSCert, *routeTLSKey, *routeTLSCA); err != nil {
|
||||
if err := validateClusterConfig(*clusterName, *bind, *clusterUser, *clusterPass, *routeTLSCert, *routeTLSKey, *routeTLSCA, authMode); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
@@ -274,6 +274,17 @@ func main() {
|
||||
srv.RequireEncryptedRooms = true
|
||||
log.Printf("cleartext rooms: DISABLED (public bind requires end-to-end encryption)")
|
||||
}
|
||||
// Publish this node's posture on /healthz so a monitor (or a peer) can detect a
|
||||
// cluster member not running the homogeneous enforce+ACL+TLS posture (audit
|
||||
// 0008 N1). enforce implies the per-subject ACL in this binary (they are wired
|
||||
// together above).
|
||||
srv.Posture = membership.Posture{
|
||||
Enforce: enforce,
|
||||
ACL: enforce,
|
||||
TLS: *tlsCert != "",
|
||||
Cluster: clustered,
|
||||
Store: *storeBackend,
|
||||
}
|
||||
|
||||
// Replicated anti-replay (issue 0006a, audit 0008 N3): a clustered node MUST
|
||||
// share its nonce store across the cluster, or a request accepted on one node
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
package membership_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||
"github.com/enmanuel/unibus/pkg/membership"
|
||||
)
|
||||
|
||||
// TestHealthExposesPosture: /healthz publishes the node's security posture so a
|
||||
// monitor (or a peer) can detect a cluster member that is not enforce+ACL+TLS
|
||||
// (audit 0008 N1). The probe stays unauthenticated.
|
||||
func TestHealthExposesPosture(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("store: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { store.Close() })
|
||||
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
|
||||
|
||||
srv := membership.NewServer(store, blobs, membership.AuthEnforce)
|
||||
srv.Posture = membership.Posture{Enforce: true, ACL: true, TLS: true, Cluster: true, Store: "kv"}
|
||||
ts := httptest.NewServer(srv)
|
||||
t.Cleanup(ts.Close)
|
||||
|
||||
resp, err := http.Get(ts.URL + "/healthz")
|
||||
if err != nil {
|
||||
t.Fatalf("get healthz: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("healthz status %d, want 200", resp.StatusCode)
|
||||
}
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
var got struct {
|
||||
Status string `json:"status"`
|
||||
Posture membership.Posture `json:"posture"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &got); err != nil {
|
||||
t.Fatalf("decode healthz %q: %v", string(body), err)
|
||||
}
|
||||
if got.Status != "ok" {
|
||||
t.Fatalf("status = %q, want ok", got.Status)
|
||||
}
|
||||
if !got.Posture.Enforce || !got.Posture.ACL || !got.Posture.TLS || !got.Posture.Cluster {
|
||||
t.Fatalf("posture not surfaced correctly: %+v", got.Posture)
|
||||
}
|
||||
if got.Posture.Store != "kv" {
|
||||
t.Fatalf("posture.store = %q, want kv", got.Posture.Store)
|
||||
}
|
||||
}
|
||||
@@ -81,6 +81,25 @@ type Server struct {
|
||||
// (non-loopback) bind. See dev/0004d-dataplane-acl.md for the full rationale
|
||||
// and the residual metadata exposure this does NOT close.
|
||||
RequireEncryptedRooms bool
|
||||
|
||||
// Posture is the node's security posture, surfaced on /healthz so an operator
|
||||
// or a peer can detect a node NOT running the homogeneous enforce+ACL+TLS
|
||||
// posture a secure cluster requires (audit 0008 N1). It is set by the command;
|
||||
// the zero value (all false) reflects an unsecured dev node.
|
||||
Posture Posture
|
||||
}
|
||||
|
||||
// Posture describes the security posture a membershipd node runs with. It is
|
||||
// non-secret operational metadata (booleans + the store backend name), published
|
||||
// on /healthz so a monitor can flag a cluster member that is not enforce+ACL+TLS
|
||||
// — the weak node that would let an unauthenticated peer harvest the cluster's
|
||||
// forwarded traffic (audit 0008 N1).
|
||||
type Posture struct {
|
||||
Enforce bool `json:"enforce"`
|
||||
ACL bool `json:"acl"`
|
||||
TLS bool `json:"tls"`
|
||||
Cluster bool `json:"cluster"`
|
||||
Store string `json:"store"` // "sqlite" | "kv"
|
||||
}
|
||||
|
||||
// NewServer wires the membership store and blob store into an http.Handler. The
|
||||
@@ -390,7 +409,7 @@ func (s *Server) verifyOwnerSig(roomID, by string, sig, canonical []byte) (Membe
|
||||
// ---- handlers -------------------------------------------------------------
|
||||
|
||||
func (s *Server) handleHealth(w http.ResponseWriter, _ *http.Request) {
|
||||
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
|
||||
writeJSON(w, http.StatusOK, map[string]any{"status": "ok", "posture": s.Posture})
|
||||
}
|
||||
|
||||
func (s *Server) handleCreateRoom(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
Reference in New Issue
Block a user