feat(0003a): NATS cluster routes with shared-secret auth + mutual route TLS
Add high-availability cluster support to the embedded NATS server (issue 0003a, first phase of decentralization). pkg/embeddednats: - ServerConfig gains ServerName (unique per node, required by JetStream RAFT) and an optional *ClusterConfig (cluster name, route host/port, peer route URLs, shared-secret Username/Password, and a mutual-TLS *tls.Config). applyClusterOpts maps it onto server.Options.Cluster + Routes. Nil Cluster keeps the legacy standalone server. pkg/busauth: - RouteTLSConfig builds the route layer's mutual-TLS config: the node presents its CA-signed certificate AND verifies the peer's certificate against the bus CA (RequireAndVerifyClientCert), reusing the issue-0001 CA. Routes authenticate NODES, never the client nkey authenticator. cmd/membershipd: - Cluster flags (--cluster-name/--server-name/--cluster-port/--routes/ --cluster-user/--cluster-pass/--route-tls-cert/-key/-ca) wire a node into the cluster. validateClusterConfig refuses a public cluster without a route secret and complete mutual route TLS, and rejects partial route-TLS flags (all-or-nothing). splitRoutes parses the CSV. Tests (DoD: golden + 2 edge + error path): - TestClusterForwardsAcrossNodes: 2-node cluster forwards a client subject from one node to a subscriber on the other. - TestClusterThreeNodesForward: 3-node (HA shape) cross-node forwarding. - TestClusterMutualTLSForwards: forwarding over mutual-TLS routes. - TestClusterRejectsBadRouteAuth: wrong cluster password -> no route. - TestClusterRejectsUnsignedNode: cert not signed by the bus CA -> no route. - TestClusterConfigPolicy / TestSplitRoutes: boot-guard + CSV parsing. Master stays green: standalone (no --cluster-name) is unchanged.
This commit is contained in:
@@ -8,25 +8,76 @@ package embeddednats
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
server "github.com/nats-io/nats-server/v2/server"
|
||||
)
|
||||
|
||||
// ClusterConfig configures the route layer that links several embedded NATS
|
||||
// servers into a single cluster (issue 0003a). It is the data-plane side of
|
||||
// high availability: with a cluster, a client subject published on one node is
|
||||
// forwarded to subscribers connected to any other node, and (with JetStream
|
||||
// replicas > 1) streams/KV are RAFT-replicated across nodes so the loss of one
|
||||
// node does not lose the bus.
|
||||
//
|
||||
// The route layer is a SEPARATE trust boundary from the client data plane: it
|
||||
// carries server-to-server traffic, so it authenticates NODES, not bus users.
|
||||
// Never reuse the nkey client authenticator here. Routes are secured with their
|
||||
// own shared secret (Username/Password -> NATS Cluster.Authorization) and their
|
||||
// own mutual TLS (TLS, built from the bus CA with busauth.RouteTLSConfig): a
|
||||
// node without the cluster secret and a CA-signed node certificate cannot join
|
||||
// the cluster nor inject messages into it.
|
||||
type ClusterConfig struct {
|
||||
// Name is the cluster name; it MUST be identical on every node or the
|
||||
// servers refuse to gossip routes to each other.
|
||||
Name string
|
||||
// Host and Port are the route listener (server-to-server), distinct from the
|
||||
// client Host/Port. Use a free, non-client port (e.g. 6250).
|
||||
Host string
|
||||
Port int
|
||||
// Routes are the nats-route URLs of the OTHER nodes, e.g.
|
||||
// "nats://user:pass@10.0.0.2:6250". When the route layer is password
|
||||
// protected each URL must carry the same userinfo as the local Username /
|
||||
// Password so this node authenticates outbound to its peers.
|
||||
Routes []string
|
||||
// Username and Password gate the route listener (NATS Cluster.Authorization).
|
||||
// A peer (or impostor) that connects to this node's route port without these
|
||||
// credentials is rejected, so it never becomes a route. Empty disables route
|
||||
// auth (dev / trusted-network only).
|
||||
Username string
|
||||
Password string
|
||||
// TLS, when non-nil, secures the route connections with mutual TLS. Build it
|
||||
// with busauth.RouteTLSConfig(cert, key, ca): the server presents its node
|
||||
// certificate AND requires+verifies the connecting node's certificate against
|
||||
// the bus CA, so an unsigned impostor cannot establish a route even with the
|
||||
// right password. Nil keeps routes plaintext (dev / WireGuard-only).
|
||||
TLS *tls.Config
|
||||
}
|
||||
|
||||
// ServerConfig is the full set of knobs for the embedded NATS server. The zero
|
||||
// value (empty StoreDir aside) yields a dev-friendly server: JetStream on, bound
|
||||
// to all interfaces, no client auth, no TLS. Secured deployments set Auth and
|
||||
// TLS; tests set Host to loopback and a free Port.
|
||||
// to all interfaces, no client auth, no TLS, standalone (no cluster). Secured
|
||||
// deployments set Auth and TLS; HA deployments set ServerName + Cluster; tests
|
||||
// set Host to loopback and a free Port.
|
||||
type ServerConfig struct {
|
||||
StoreDir string // JetStream store directory
|
||||
Host string // bind interface; "" = nats-server default ("0.0.0.0")
|
||||
Port int // listen port
|
||||
// ServerName is this node's unique name within the cluster. JetStream's RAFT
|
||||
// layer requires a stable, unique name per node to form its meta-group; leave
|
||||
// it empty for a standalone server (nats-server then auto-generates one).
|
||||
ServerName string
|
||||
// Auth, when non-nil, is installed as CustomClientAuthentication so the data
|
||||
// plane only accepts approved clients (nkey signature + bus allowlist).
|
||||
Auth server.Authentication
|
||||
// TLS, when non-nil, makes the server present a certificate and require TLS
|
||||
// on the data plane. Clients must trust the issuing CA (see busauth).
|
||||
TLS *tls.Config
|
||||
// Cluster, when non-nil, joins this server to a route cluster for high
|
||||
// availability (issue 0003a). Nil keeps the server standalone (the legacy
|
||||
// single-node behavior).
|
||||
Cluster *ClusterConfig
|
||||
}
|
||||
|
||||
// Start is a thin backward-compatible wrapper: embedded JetStream server on the
|
||||
@@ -60,6 +111,7 @@ func StartServer(cfg ServerConfig) (*server.Server, error) {
|
||||
StoreDir: cfg.StoreDir,
|
||||
Host: cfg.Host,
|
||||
Port: cfg.Port,
|
||||
ServerName: cfg.ServerName,
|
||||
DontListen: false,
|
||||
// Keep the embedded server quiet by default; the host app logs the URLs.
|
||||
NoLog: true,
|
||||
@@ -78,6 +130,12 @@ func StartServer(cfg ServerConfig) (*server.Server, error) {
|
||||
opts.TLS = true
|
||||
}
|
||||
|
||||
if cfg.Cluster != nil {
|
||||
if err := applyClusterOpts(opts, cfg.Cluster); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
ns, err := server.NewServer(opts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("embeddednats: new server: %w", err)
|
||||
@@ -93,6 +151,34 @@ func StartServer(cfg ServerConfig) (*server.Server, error) {
|
||||
return ns, nil
|
||||
}
|
||||
|
||||
// applyClusterOpts translates a ClusterConfig into the nats-server route options
|
||||
// on opts: the cluster listener (name + host/port + shared-secret auth + mutual
|
||||
// TLS) and the outbound routes to the other nodes. A malformed route URL is a
|
||||
// configuration error and aborts startup rather than silently dropping a peer.
|
||||
func applyClusterOpts(opts *server.Options, c *ClusterConfig) error {
|
||||
opts.Cluster = server.ClusterOpts{
|
||||
Name: c.Name,
|
||||
Host: c.Host,
|
||||
Port: c.Port,
|
||||
Username: c.Username,
|
||||
Password: c.Password,
|
||||
}
|
||||
if c.TLS != nil {
|
||||
opts.Cluster.TLSConfig = c.TLS
|
||||
// A generous handshake budget: route TLS does a mutual handshake and the
|
||||
// peer may still be booting. The default 2s can flap on a cold cluster.
|
||||
opts.Cluster.TLSTimeout = 5.0
|
||||
}
|
||||
for _, r := range c.Routes {
|
||||
u, err := url.Parse(r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("embeddednats: parse route %q: %w", r, err)
|
||||
}
|
||||
opts.Routes = append(opts.Routes, u)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ClientURL returns a NATS connection URL for the running embedded server.
|
||||
func ClientURL(ns *server.Server) string {
|
||||
return ns.ClientURL()
|
||||
|
||||
Reference in New Issue
Block a user