package main import ( "fmt" "net" "net/url" "os" "strings" "github.com/enmanuel/unibus/pkg/membership" ) // splitRoutes parses the comma-separated --routes flag into a clean slice of // route URLs, dropping empty entries and surrounding whitespace so a trailing // comma or a spaced list does not yield a bogus empty route. func splitRoutes(csv string) []string { var out []string for _, r := range strings.Split(csv, ",") { if r = strings.TrimSpace(r); r != "" { out = append(out, r) } } return out } // resolveClusterPass resolves the cluster route secret WITHOUT leaking it through // argv (audit 0008 N1-low: --cluster-pass in argv is visible in ps/journald). // Precedence: --cluster-pass-file (read + trim the file), then the env var // UNIBUS_CLUSTER_PASS, then the legacy --cluster-pass flag (argv-visible, kept for // dev/compat). env is injected (os.Getenv result) so the function stays testable. // It returns the secret and a short source label for logging (never the secret). func resolveClusterPass(passFlag, passFile, env string) (secret, source string, err error) { if passFile != "" { b, rerr := os.ReadFile(passFile) if rerr != nil { return "", "", fmt.Errorf("read --cluster-pass-file %q: %w", passFile, rerr) } return strings.TrimSpace(string(b)), "file", nil } if env != "" { return env, "env", nil } if passFlag != "" { return passFlag, "flag", nil } return "", "none", nil } // injectRouteCreds rewrites each route URL that carries NO userinfo to embed // user:pass, so the cluster secret is supplied once (via file/env) instead of // repeated in every --routes argv entry where ps/journald would expose it. A route // that already carries userinfo is left untouched (operator override). With an // empty user it is a no-op. A malformed route URL is an error (configuration bug) // rather than a silently dropped peer. func injectRouteCreds(routes []string, user, pass string) ([]string, error) { if user == "" { return routes, nil } out := make([]string, 0, len(routes)) for _, r := range routes { u, err := url.Parse(r) if err != nil { return nil, fmt.Errorf("parse route %q: %w", r, err) } if u.User == nil { u.User = url.UserPassword(user, pass) } out = append(out, u.String()) } return out, nil } // isLoopbackURL reports whether a NATS url targets this host only (loopback). Used // to guard migrate-to-kv (audit 0008 N6): pushing the allowlist to a REMOTE NATS // without TLS would send handles/roles/sign-pubs in cleartext, so a remote target // must be TLS-pinned (--ca). A url we cannot classify is treated as NON-loopback // (conservative: it then requires --ca). func isLoopbackURL(natsURL string) bool { u, err := url.Parse(natsURL) if err != nil { return false } host := u.Hostname() switch host { case "localhost": return true case "": return false } ip := net.ParseIP(host) return ip != nil && ip.IsLoopback() } // isLoopbackBind reports whether the --bind value keeps the service reachable // only from this host. An empty bind means "all interfaces" (public), and a // hostname we cannot resolve to a loopback literal is treated as public — the // conservative choice, so an unusual bind never silently slips past the guard. func isLoopbackBind(bind string) bool { switch bind { case "localhost": return true case "": return false // empty binds every interface } ip := net.ParseIP(bind) if ip == nil { return false // a hostname we can't classify: assume public } return ip.IsLoopback() } // validateBootConfig is the fail-open guard (audit H2). It refuses any startup // configuration that would expose the bus without enforced authentication: // // - a non-loopback --bind without --bus-auth enforce (the data plane and // control plane would both accept anyone), // - --tls-cert/--tls-key without --bus-auth enforce (TLS encrypts the channel // but authenticates no one — encrypted access for everybody is still open), and // - a non-loopback --bind WITHOUT --tls-cert/--tls-key (the control plane would // serve metadata over plaintext HTTP publicly — audit H5 reappearing, the N4 // gap the re-audit found: TLS was available but not mandatory). // // It is a pure function of the parsed flags so the command can fail fast at // startup and tests can assert the policy without booting a server. func validateBootConfig(bind string, mode membership.AuthMode, tlsCert, tlsKey string) error { if !isLoopbackBind(bind) && mode != membership.AuthEnforce { return fmt.Errorf( "refusing to start: --bind %q is not loopback but --bus-auth is %q; a public bind requires --bus-auth enforce (or bind 127.0.0.1 for local dev)", bind, mode) } if (tlsCert != "" || tlsKey != "") && mode != membership.AuthEnforce { return fmt.Errorf( "refusing to start: --tls-cert/--tls-key set but --bus-auth is %q; TLS without enforced auth is fail-open (encrypted channel, no authentication) — set --bus-auth enforce", mode) } if !isLoopbackBind(bind) && (tlsCert == "" || tlsKey == "") { return fmt.Errorf( "refusing to start: --bind %q is not loopback but --tls-cert/--tls-key are not both set; a public control plane must serve HTTPS or its metadata (subjects, pubkeys, sealed keys, the social graph) travels in cleartext to a network MITM (audit H5/N4) — provide a CA-signed --tls-cert/--tls-key, or bind 127.0.0.1 for local dev", bind) } return nil } // validateClusterConfig guards the cluster route layer (issue 0003a). The route // layer is a server-to-server trust boundary distinct from the client data // plane: leaving it open lets anyone who reaches the route port join the cluster // or inject messages into the whole bus (audit 0004, "auth of the cluster // routes"). So on a public (non-loopback) bind, a cluster MUST carry both a // shared route secret AND mutual route TLS. It is a pure function of the parsed // flags. An empty clusterName means "no cluster" (standalone) and is always // allowed. // // The three route-TLS paths are all-or-nothing (mutual TLS needs the node cert, // its key, and the CA together), independent of the bind, so a partial TLS // config never silently degrades to plaintext routes. // // Homogeneous posture (issue 0006d, audit 0008 N1): a cluster is only as secure // as its weakest node — the data plane forwards every subject between nodes, so a // single node running without enforced auth lets an unauthenticated peer // Subscribe(">") on it and harvest the traffic forwarded from the ACL'd nodes. // This node therefore REFUSES to join a cluster unless it runs --bus-auth enforce, // regardless of bind: a clustered node is a production node, and there is no safe // "dev cluster without auth". (A peer running a tampered binary is out of this // node's control; /healthz exposes each node's posture so a monitor can detect // one that is not enforce+ACL — see Server.Posture.) func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA string, mode membership.AuthMode) error { rtAny := rtCert != "" || rtKey != "" || rtCA != "" rtAll := rtCert != "" && rtKey != "" && rtCA != "" if rtAny && !rtAll { return fmt.Errorf( "refusing to start: --route-tls-cert/--route-tls-key/--route-tls-ca must be set together (mutual route TLS needs all three)") } if clusterName == "" { return nil // standalone: no route layer to secure } // A clustered node MUST enforce auth (homogeneous posture). Checked before the // loopback shortcut so even a loopback cluster cannot form without enforce. if mode != membership.AuthEnforce { return fmt.Errorf( "refusing to start: cluster %q requires --bus-auth enforce; a cluster node without enforced auth+ACL lets an unauthenticated peer harvest the traffic forwarded from the other nodes (audit 0008 N1) — every node must run the same enforce+ACL+TLS posture", clusterName) } if isLoopbackBind(bind) { return nil // loopback cluster is dev-only and unreachable from outside } // Public cluster: demand a route secret and mutual route TLS. if user == "" || pass == "" { return fmt.Errorf( "refusing to start: cluster %q on public bind %q requires --cluster-user and --cluster-pass; an unauthenticated route port lets anyone join the cluster", clusterName, bind) } if !rtAll { return fmt.Errorf( "refusing to start: cluster %q on public bind %q requires mutual route TLS (--route-tls-cert/--route-tls-key/--route-tls-ca); plaintext routes expose server-to-server traffic and admit unsigned nodes", clusterName, bind) } return nil }