9b96537aa6
A cluster is only as secure as its weakest node: the data plane forwards every
subject between nodes, so one node running without enforced auth lets an
unauthenticated peer Subscribe(">") on it and harvest the traffic forwarded from
the ACL'd nodes.
- validateClusterConfig now takes the auth mode and REFUSES to join a cluster
unless --bus-auth enforce, regardless of bind (a clustered node is a production
node; there is no safe dev cluster without auth). This binary therefore cannot
BE the weak node.
- Server.Posture {enforce,acl,tls,cluster,store} is published on /healthz (non
secret operational metadata, probe stays unauthenticated) so a monitor or peer
can detect a cluster member not running enforce+ACL+TLS — covering a peer that
runs a tampered/old binary outside this node's control.
Tests:
- TestAttack0008_N1: a clustered node with --bus-auth off is refused; the same
node with enforce + full route security is allowed.
- TestClusterConfigPolicy: extended with off/soft clustered cases (refused) and
the mode parameter throughout.
- TestHealthExposesPosture: /healthz returns the posture booleans + store backend.
CGO_ENABLED=0 go build/vet/test green; govulncheck 0 reachable.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
129 lines
6.0 KiB
Go
129 lines
6.0 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"net"
|
|
"strings"
|
|
|
|
"github.com/enmanuel/unibus/pkg/membership"
|
|
)
|
|
|
|
// splitRoutes parses the comma-separated --routes flag into a clean slice of
|
|
// route URLs, dropping empty entries and surrounding whitespace so a trailing
|
|
// comma or a spaced list does not yield a bogus empty route.
|
|
func splitRoutes(csv string) []string {
|
|
var out []string
|
|
for _, r := range strings.Split(csv, ",") {
|
|
if r = strings.TrimSpace(r); r != "" {
|
|
out = append(out, r)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// isLoopbackBind reports whether the --bind value keeps the service reachable
|
|
// only from this host. An empty bind means "all interfaces" (public), and a
|
|
// hostname we cannot resolve to a loopback literal is treated as public — the
|
|
// conservative choice, so an unusual bind never silently slips past the guard.
|
|
func isLoopbackBind(bind string) bool {
|
|
switch bind {
|
|
case "localhost":
|
|
return true
|
|
case "":
|
|
return false // empty binds every interface
|
|
}
|
|
ip := net.ParseIP(bind)
|
|
if ip == nil {
|
|
return false // a hostname we can't classify: assume public
|
|
}
|
|
return ip.IsLoopback()
|
|
}
|
|
|
|
// validateBootConfig is the fail-open guard (audit H2). It refuses any startup
|
|
// configuration that would expose the bus without enforced authentication:
|
|
//
|
|
// - a non-loopback --bind without --bus-auth enforce (the data plane and
|
|
// control plane would both accept anyone),
|
|
// - --tls-cert/--tls-key without --bus-auth enforce (TLS encrypts the channel
|
|
// but authenticates no one — encrypted access for everybody is still open), and
|
|
// - a non-loopback --bind WITHOUT --tls-cert/--tls-key (the control plane would
|
|
// serve metadata over plaintext HTTP publicly — audit H5 reappearing, the N4
|
|
// gap the re-audit found: TLS was available but not mandatory).
|
|
//
|
|
// It is a pure function of the parsed flags so the command can fail fast at
|
|
// startup and tests can assert the policy without booting a server.
|
|
func validateBootConfig(bind string, mode membership.AuthMode, tlsCert, tlsKey string) error {
|
|
if !isLoopbackBind(bind) && mode != membership.AuthEnforce {
|
|
return fmt.Errorf(
|
|
"refusing to start: --bind %q is not loopback but --bus-auth is %q; a public bind requires --bus-auth enforce (or bind 127.0.0.1 for local dev)",
|
|
bind, mode)
|
|
}
|
|
if (tlsCert != "" || tlsKey != "") && mode != membership.AuthEnforce {
|
|
return fmt.Errorf(
|
|
"refusing to start: --tls-cert/--tls-key set but --bus-auth is %q; TLS without enforced auth is fail-open (encrypted channel, no authentication) — set --bus-auth enforce",
|
|
mode)
|
|
}
|
|
if !isLoopbackBind(bind) && (tlsCert == "" || tlsKey == "") {
|
|
return fmt.Errorf(
|
|
"refusing to start: --bind %q is not loopback but --tls-cert/--tls-key are not both set; a public control plane must serve HTTPS or its metadata (subjects, pubkeys, sealed keys, the social graph) travels in cleartext to a network MITM (audit H5/N4) — provide a CA-signed --tls-cert/--tls-key, or bind 127.0.0.1 for local dev",
|
|
bind)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateClusterConfig guards the cluster route layer (issue 0003a). The route
|
|
// layer is a server-to-server trust boundary distinct from the client data
|
|
// plane: leaving it open lets anyone who reaches the route port join the cluster
|
|
// or inject messages into the whole bus (audit 0004, "auth of the cluster
|
|
// routes"). So on a public (non-loopback) bind, a cluster MUST carry both a
|
|
// shared route secret AND mutual route TLS. It is a pure function of the parsed
|
|
// flags. An empty clusterName means "no cluster" (standalone) and is always
|
|
// allowed.
|
|
//
|
|
// The three route-TLS paths are all-or-nothing (mutual TLS needs the node cert,
|
|
// its key, and the CA together), independent of the bind, so a partial TLS
|
|
// config never silently degrades to plaintext routes.
|
|
//
|
|
// Homogeneous posture (issue 0006d, audit 0008 N1): a cluster is only as secure
|
|
// as its weakest node — the data plane forwards every subject between nodes, so a
|
|
// single node running without enforced auth lets an unauthenticated peer
|
|
// Subscribe(">") on it and harvest the traffic forwarded from the ACL'd nodes.
|
|
// This node therefore REFUSES to join a cluster unless it runs --bus-auth enforce,
|
|
// regardless of bind: a clustered node is a production node, and there is no safe
|
|
// "dev cluster without auth". (A peer running a tampered binary is out of this
|
|
// node's control; /healthz exposes each node's posture so a monitor can detect
|
|
// one that is not enforce+ACL — see Server.Posture.)
|
|
func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA string, mode membership.AuthMode) error {
|
|
rtAny := rtCert != "" || rtKey != "" || rtCA != ""
|
|
rtAll := rtCert != "" && rtKey != "" && rtCA != ""
|
|
if rtAny && !rtAll {
|
|
return fmt.Errorf(
|
|
"refusing to start: --route-tls-cert/--route-tls-key/--route-tls-ca must be set together (mutual route TLS needs all three)")
|
|
}
|
|
if clusterName == "" {
|
|
return nil // standalone: no route layer to secure
|
|
}
|
|
// A clustered node MUST enforce auth (homogeneous posture). Checked before the
|
|
// loopback shortcut so even a loopback cluster cannot form without enforce.
|
|
if mode != membership.AuthEnforce {
|
|
return fmt.Errorf(
|
|
"refusing to start: cluster %q requires --bus-auth enforce; a cluster node without enforced auth+ACL lets an unauthenticated peer harvest the traffic forwarded from the other nodes (audit 0008 N1) — every node must run the same enforce+ACL+TLS posture",
|
|
clusterName)
|
|
}
|
|
if isLoopbackBind(bind) {
|
|
return nil // loopback cluster is dev-only and unreachable from outside
|
|
}
|
|
// Public cluster: demand a route secret and mutual route TLS.
|
|
if user == "" || pass == "" {
|
|
return fmt.Errorf(
|
|
"refusing to start: cluster %q on public bind %q requires --cluster-user and --cluster-pass; an unauthenticated route port lets anyone join the cluster",
|
|
clusterName, bind)
|
|
}
|
|
if !rtAll {
|
|
return fmt.Errorf(
|
|
"refusing to start: cluster %q on public bind %q requires mutual route TLS (--route-tls-cert/--route-tls-key/--route-tls-ca); plaintext routes expose server-to-server traffic and admit unsigned nodes",
|
|
clusterName, bind)
|
|
}
|
|
return nil
|
|
}
|