#!/usr/bin/env bash # # deploy-cluster.sh — cross-build membershipd and stage it onto the three cluster # nodes (issue 0006g). DEFAULT IS DRY-RUN: it prints the plan and touches nothing. # Pass --yes to actually rsync + run remote commands. Steps that a HUMAN must run # (or confirm) are marked "HUMAN:". # # Prerequisites (HUMAN, once): # 1. Fill nodes.env (no left). # 2. ./generate-cluster-certs.sh (mints out// TLS material) # 3. Create the route secret locally: mkdir -p secrets && openssl rand -hex 32 > secrets/cluster.pass # (secrets/ is gitignored; it is rsynced to each node as cluster.pass) # 4. SSH access to every node's SSH_HOST with sudo-less root (SSH_USER=root). # # What it does per node (with --yes): # - rsync the membershipd binary, the node's TLS material, the unit, the # generated cluster.env and the route secret into REMOTE_DIR. # - install + daemon-reload the systemd unit. # Start is STAGGERED and left to the human (see README): start the seed node, # seed the admin, then start the rest. set -euo pipefail DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$DIR" # shellcheck source=/dev/null source ./nodes.env APPLY=0 [[ "${1:-}" == "--yes" ]] && APPLY=1 if grep -q '<[A-Z_]\+>' nodes.env; then echo "ERROR: nodes.env still has values — fill them in first." >&2 exit 2 fi SECRET_FILE="secrets/cluster.pass" if [[ ! -f "$SECRET_FILE" ]]; then echo "ERROR: $SECRET_FILE missing. HUMAN: mkdir -p secrets && openssl rand -hex 32 > $SECRET_FILE" >&2 exit 2 fi run() { # Echo every action; only execute it under --yes. echo " + $*" if [[ $APPLY -eq 1 ]]; then "$@" fi } echo "==> [1/3] cross-build membershipd (linux/amd64, CGO disabled)" run env CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o build/membershipd ../../cmd/membershipd # Build the comma-separated route list for a node = the OTHER nodes' addresses on # the chosen network, with NO userinfo (the secret is injected by membershipd from # the file). Echoes nothing; prints the value. routes_for() { local self="$1" out="" local row name _ssh pub wg addr for row in "${CLUSTER_NODES[@]}"; do read -r name _ssh pub wg <<<"$row" [[ "$name" == "$self" ]] && continue if [[ "$ROUTE_NETWORK" == "public" ]]; then addr="$pub"; else addr="$wg"; fi out+="nats://${addr}:${NATS_ROUTE_PORT}," done echo "${out%,}" } echo "==> [2/3] stage each node (REMOTE_DIR=$REMOTE_DIR)" for row in "${CLUSTER_NODES[@]}"; do read -r name ssh _pub _wg <<<"$row" target="${SSH_USER}@${ssh}" nodedir="out/${name}" if [[ ! -d "$nodedir" ]]; then echo "ERROR: $nodedir missing — run ./generate-cluster-certs.sh first." >&2 exit 2 fi routes="$(routes_for "$name")" echo "-- node ${name} (ssh ${ssh}) routes=${routes}" # Generate this node's cluster.env locally, then ship it. envfile="build/cluster-${name}.env" mkdir -p build cat > "$envfile" < [3/3] staged." if [[ $APPLY -eq 0 ]]; then echo " DRY-RUN: nothing was sent. Re-run with --yes to apply." fi cat <<'NEXT' HUMAN — bring up (see README "Bring up" — a LONE node has no quorum and never serves healthz, so do NOT gate the next node on the previous one going green): 1. Seed the FIRST admin into the KV via the loopback bootstrap (README "Seed the first admin"); this is needed only for the chicken-and-egg admin. 2. Start all three so a 2/3 quorum forms (order does not matter); healthz turns ok only once the meta-group elects a leader (~10-30s cold): for h in magnus homer datardos; do ssh "$h" 'sudo systemctl enable --now membershipd-cluster'; done 3. Verify posture + quorum (README "Verify"). 4. Ensure R3 on every control-plane stream (README "Replication: go straight to R3"); R1 is a SPOF, not a milestone. 5. Add further users with the cluster LIVE — no restart — via `membershipd user add --store kv` (README "Add users to the live cluster"). NEXT