48a3d6be33
Parameterized, NO-VPS-touched material to bring up unibus as a 3-node cluster. The authoring agent ran none of it on a host; every remote-changing step is marked HUMAN and deploy-cluster.sh defaults to a dry run. deploy/cluster/: - nodes.env — topology (cluster name, ports, per-node rows). Public IPs known (homer 141.94.69.66, datardos 51.91.100.142) pre-filled; magnus public IP and all WireGuard IPs are <PLACEHOLDER> for the human; scripts refuse to run while any remain. - generate-cluster-certs.sh — mints a SEPARATE cluster route CA + a route cert per node (server+clientAuth, mutual routes) and a data-plane server cert per node signed by the reused client CA (../tls/ca.*); SAN = public + WG + hostname. - membershipd-cluster.service — one unit, parameterized per node via /opt/unibus/cluster.env: enforce + per-subject ACL + TLS + --store kv, --cluster-pass-file (secret out of argv), Restart=always. - deploy-cluster.sh — cross-build linux/amd64, generate each node's cluster.env (routes to the other two on the WG mesh, no userinfo), rsync + install (only with --yes); staggered start is manual. - README.md — runbook: prerequisites, loopback bootstrap to seed the first admin into the KV (works around the user-CLI/KV chicken-and-egg), staggered bring-up, verify posture+quorum, scale R1->R3 in place, and the chaos test (left to 0003f on the real VPS). - .gitignore — out/, build/, secrets/, *.key never committed. bash -n passes on both scripts; go build/test unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
121 lines
4.6 KiB
Bash
Executable File
121 lines
4.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# generate-cluster-certs.sh — mint the TLS material for a unibus 3-node cluster
|
|
# (issue 0006g). Run ONCE on a trusted machine (e.g. om, which custodies the bus
|
|
# CA); distribute the per-node output to each node over a secure channel. This
|
|
# script touches NO remote host.
|
|
#
|
|
# It produces two trust roots, kept SEPARATE on purpose (audit 0008 N1-low):
|
|
#
|
|
# 1. The CLUSTER route CA (cluster-ca.crt/key, generated here): signs each
|
|
# node's ROUTE certificate. The route layer authenticates NODES, not bus
|
|
# users, so it must NOT share the client data-plane CA — a client cert can
|
|
# then never be presented to the route port.
|
|
# 2. The CLIENT data-plane CA (../tls/ca.crt/key, the one clients pin): signs
|
|
# each node's DATA-PLANE server certificate. Reused, not regenerated, so
|
|
# existing clients keep trusting the bus.
|
|
#
|
|
# Per node it emits, under out/<name>/:
|
|
# route-<name>.crt/key route cert (cluster CA), EKU server+clientAuth
|
|
# (each node is BOTH server and dialer to its peers)
|
|
# server-<name>.crt/key data-plane cert (client CA), EKU serverAuth
|
|
# cluster-ca.crt the route CA cert (for --route-tls-ca)
|
|
# ca.crt the client CA cert (for clients / control-plane TLS)
|
|
#
|
|
# SANs per node = its public IP + its WireGuard IP + its hostname + localhost.
|
|
#
|
|
# Key material: EC P-256 (Go crypto/tls + nats-server friendly), matching
|
|
# ../tls/generate-certs.sh.
|
|
set -euo pipefail
|
|
|
|
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
cd "$DIR"
|
|
|
|
# shellcheck source=/dev/null
|
|
source ./nodes.env
|
|
|
|
# Refuse to run while any placeholder remains (HUMAN must fill nodes.env first).
|
|
if grep -q '<[A-Z_]\+>' nodes.env; then
|
|
echo "ERROR: nodes.env still has <PLACEHOLDER> values — fill them in first." >&2
|
|
grep -n '<[A-Z_]\+>' nodes.env >&2
|
|
exit 2
|
|
fi
|
|
|
|
CLIENT_CA_CRT="../tls/ca.crt"
|
|
CLIENT_CA_KEY="../tls/ca.key"
|
|
if [[ ! -f "$CLIENT_CA_CRT" || ! -f "$CLIENT_CA_KEY" ]]; then
|
|
echo "ERROR: client data-plane CA not found at ../tls/ca.{crt,key}." >&2
|
|
echo " Run ../tls/generate-certs.sh first (it mints the client CA)." >&2
|
|
exit 2
|
|
fi
|
|
|
|
DAYS_CA=3650
|
|
DAYS_CRT=825
|
|
|
|
force=0
|
|
[[ "${1:-}" == "--force" ]] && force=1
|
|
|
|
# --- cluster route CA (separate trust root) ---
|
|
if [[ ! -f cluster-ca.crt || ! -f cluster-ca.key || $force -eq 1 ]]; then
|
|
echo "==> generating cluster route CA (separate from the client CA)"
|
|
openssl ecparam -name prime256v1 -genkey -noout -out cluster-ca.key
|
|
chmod 600 cluster-ca.key
|
|
openssl req -x509 -new -key cluster-ca.key -sha256 -days "$DAYS_CA" \
|
|
-subj "/CN=unibus-cluster-ca" -out cluster-ca.crt
|
|
else
|
|
echo "==> reusing existing cluster route CA (pass --force to regenerate)"
|
|
fi
|
|
|
|
# mint <out_key> <out_crt> <subject_cn> <san> <eku> <ca_crt> <ca_key>
|
|
mint_cert() {
|
|
local out_key="$1" out_crt="$2" cn="$3" san="$4" eku="$5" ca_crt="$6" ca_key="$7"
|
|
local csr ext
|
|
csr="$(mktemp)"
|
|
ext="$(mktemp)"
|
|
openssl ecparam -name prime256v1 -genkey -noout -out "$out_key"
|
|
chmod 600 "$out_key"
|
|
openssl req -new -key "$out_key" -subj "/CN=${cn}" -out "$csr"
|
|
cat > "$ext" <<EOF
|
|
subjectAltName=${san}
|
|
extendedKeyUsage=${eku}
|
|
keyUsage=digitalSignature,keyEncipherment
|
|
EOF
|
|
openssl x509 -req -in "$csr" -CA "$ca_crt" -CAkey "$ca_key" -CAcreateserial \
|
|
-sha256 -days "$DAYS_CRT" -extfile "$ext" -out "$out_crt"
|
|
rm -f "$csr" "$ext"
|
|
}
|
|
|
|
for row in "${CLUSTER_NODES[@]}"; do
|
|
read -r name _ssh pub wg <<<"$row"
|
|
echo "==> node ${name}: SAN IP:${pub}, IP:${wg}, DNS:${name}, localhost, 127.0.0.1"
|
|
nodedir="out/${name}"
|
|
mkdir -p "$nodedir"
|
|
san="IP:${pub},IP:${wg},DNS:${name},DNS:localhost,IP:127.0.0.1"
|
|
|
|
# Route cert: signed by the cluster CA; server+client auth (mutual routes).
|
|
mint_cert "${nodedir}/route-${name}.key" "${nodedir}/route-${name}.crt" \
|
|
"unibus-route-${name}" "$san" "serverAuth,clientAuth" \
|
|
cluster-ca.crt cluster-ca.key
|
|
|
|
# Data-plane server cert: signed by the client CA; serverAuth only.
|
|
mint_cert "${nodedir}/server-${name}.key" "${nodedir}/server-${name}.crt" \
|
|
"unibus-${name}" "$san" "serverAuth" \
|
|
"$CLIENT_CA_CRT" "$CLIENT_CA_KEY"
|
|
|
|
# Co-locate the two CA certs each node needs.
|
|
cp cluster-ca.crt "${nodedir}/cluster-ca.crt"
|
|
cp "$CLIENT_CA_CRT" "${nodedir}/ca.crt"
|
|
done
|
|
|
|
rm -f cluster-ca.srl ../tls/ca.srl 2>/dev/null || true
|
|
|
|
echo
|
|
echo "==> done. Per-node material under out/<name>/ (KEYS ARE SECRET — never git):"
|
|
for row in "${CLUSTER_NODES[@]}"; do
|
|
read -r name _rest <<<"$row"
|
|
echo " out/${name}/ (route-${name}.*, server-${name}.*, cluster-ca.crt, ca.crt)"
|
|
done
|
|
echo
|
|
echo "verify a SAN with:"
|
|
echo " openssl x509 -in out/<name>/server-<name>.crt -noout -text | grep -A1 'Subject Alternative Name'"
|