Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b983e43090 | |||
| ff580ac031 | |||
| 9fbff79df4 | |||
| 33746d9962 |
@@ -2,10 +2,10 @@
|
|||||||
#
|
#
|
||||||
# This file is SOURCED by generate-cluster-certs.sh and deploy-cluster.sh.
|
# This file is SOURCED by generate-cluster-certs.sh and deploy-cluster.sh.
|
||||||
#
|
#
|
||||||
# HUMAN: fill in every <PLACEHOLDER> with the real value before running the
|
# HUMAN: fill in every placeholder with the real value before running the
|
||||||
# scripts. The public IPs known at authoring time are pre-filled; the WireGuard
|
# scripts. The public IPs known at authoring time are pre-filled; the WireGuard
|
||||||
# mesh IPs and magnus's public IP must be supplied. The scripts refuse to run
|
# mesh IPs and magnus's public IP must be supplied. The scripts refuse to run
|
||||||
# while any <PLACEHOLDER> remains.
|
# while any unfilled placeholder remains.
|
||||||
|
|
||||||
# Cluster identity (must be identical on every node).
|
# Cluster identity (must be identical on every node).
|
||||||
CLUSTER_NAME="unibus"
|
CLUSTER_NAME="unibus"
|
||||||
@@ -16,7 +16,7 @@ CLUSTER_USER="unibus-cluster"
|
|||||||
# KV/nonce replication factor. START AT 1 for the initial 1->3 rollout, then raise
|
# KV/nonce replication factor. START AT 1 for the initial 1->3 rollout, then raise
|
||||||
# to 3 IN PLACE (see README "Scale to R3") once all three nodes have joined. Only
|
# to 3 IN PLACE (see README "Scale to R3") once all three nodes have joined. Only
|
||||||
# set this to 3 here after the third node is up and you re-run the KV update.
|
# set this to 3 here after the third node is up and you re-run the KV update.
|
||||||
KV_REPLICAS=1
|
KV_REPLICAS=3
|
||||||
|
|
||||||
# Ports (same on every node; the route port is server-to-server only).
|
# Ports (same on every node; the route port is server-to-server only).
|
||||||
NATS_CLIENT_PORT=4250
|
NATS_CLIENT_PORT=4250
|
||||||
@@ -30,15 +30,28 @@ SSH_USER="root"
|
|||||||
# Which address family the inter-node routes use. "wg" builds --routes from the
|
# Which address family the inter-node routes use. "wg" builds --routes from the
|
||||||
# WireGuard mesh IPs (private server-to-server links, preferred); "public" uses
|
# WireGuard mesh IPs (private server-to-server links, preferred); "public" uses
|
||||||
# the public IPs. The route layer is always mutual-TLS regardless.
|
# the public IPs. The route layer is always mutual-TLS regardless.
|
||||||
ROUTE_NETWORK="wg"
|
#
|
||||||
|
# DEPLOY DECISION (2026-06-07): set to "public". No WireGuard mesh exists between
|
||||||
|
# the three cluster nodes — homer and datardos do not even have the `wg` binary
|
||||||
|
# installed, and om's only WG peers are the operator's personal PCs, not the VPS.
|
||||||
|
# Rather than stand up a fresh mesh blindly, the routes go over the public IPs,
|
||||||
|
# still protected by the separate cluster route CA (mutual-TLS). On magnus (the
|
||||||
|
# only node with ufw active) the route port 6250 is restricted to the homer and
|
||||||
|
# datardos public IPs; homer/datardos run ufw inactive (Docker hosts) and rely on
|
||||||
|
# the route mutual-TLS for 6250.
|
||||||
|
ROUTE_NETWORK="public"
|
||||||
|
|
||||||
# One row per node: NAME SSH_HOST PUBLIC_IP WG_IP
|
# One row per node: NAME SSH_HOST PUBLIC_IP WG_IP
|
||||||
# NAME -> --server-name and the per-node cert filenames (unique).
|
# NAME -> --server-name and the per-node cert filenames (unique).
|
||||||
# SSH_HOST -> the `ssh <SSH_HOST>` alias (see ~/.ssh/config).
|
# SSH_HOST -> the `ssh ALIAS` alias (see ~/.ssh/config).
|
||||||
# PUBLIC_IP -> public address; goes in the cert SANs (client-facing data plane).
|
# PUBLIC_IP -> public address; goes in the cert SANs (client-facing data plane).
|
||||||
# WG_IP -> WireGuard mesh address; cert SAN + route target when ROUTE_NETWORK=wg.
|
# WG_IP -> WireGuard mesh address; cert SAN + route target when ROUTE_NETWORK=wg.
|
||||||
|
# NOTE: with ROUTE_NETWORK=public and no WireGuard mesh, the WG_IP column is set to
|
||||||
|
# each node's public IP so the cert SAN covers the address actually used by the
|
||||||
|
# public routes and no unfilled placeholder remains (scripts refuse to run otherwise).
|
||||||
|
# magnus == organic-machine.com == om (135.125.201.30); SSH alias `magnus` enters as root.
|
||||||
CLUSTER_NODES=(
|
CLUSTER_NODES=(
|
||||||
"magnus magnus <MAGNUS_PUBLIC_IP> <MAGNUS_WG_IP>"
|
"magnus magnus 135.125.201.30 135.125.201.30"
|
||||||
"homer homer 141.94.69.66 <HOMER_WG_IP>"
|
"homer homer 141.94.69.66 141.94.69.66"
|
||||||
"datardos dd 51.91.100.142 <DATARDOS_WG_IP>"
|
"datardos dd 51.91.100.142 51.91.100.142"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -0,0 +1,78 @@
|
|||||||
|
---
|
||||||
|
issue: 0007
|
||||||
|
title: Cifrado at-rest del control plane (JetStream KV / SQLite en disco)
|
||||||
|
status: spec
|
||||||
|
created: 2026-06-07
|
||||||
|
domain: security
|
||||||
|
scope: unibus (pkg/embeddednats, cmd/membershipd, deploy/cluster) + procedimiento de migración del store existente
|
||||||
|
---
|
||||||
|
|
||||||
|
# Objetivo
|
||||||
|
|
||||||
|
Cifrar en reposo el almacenamiento del plano de control para que un nodo comprometido
|
||||||
|
(root en el VPS) o un disco robado no exponga los metadatos de control en claro.
|
||||||
|
|
||||||
|
Estado actual (auditado el 07/06/2026, report 0012 y siguientes):
|
||||||
|
|
||||||
|
- **Contenido de los mensajes**: cifrado E2E por room (megolm/olm). El servidor nunca ve el
|
||||||
|
plaintext; no vive en el plano de control. **No es el objeto de este issue.**
|
||||||
|
- **Claves de room** (`UNIBUS_room_keys`): guardadas **selladas** (sealed box X25519, cifradas
|
||||||
|
para cada miembro). El servidor las almacena y reparte pero no puede abrirlas. **Ya protegidas.**
|
||||||
|
- **Metadatos de control** (`UNIBUS_rooms`, `UNIBUS_members`, `UNIBUS_rooms_by_member`,
|
||||||
|
`UNIBUS_users`): se serializan con `json.Marshal` y se escriben **en claro** en el store. En
|
||||||
|
cluster ese store es el directorio `local_files/jetstream/` de cada nodo; en single-node es el
|
||||||
|
archivo SQLite `local_files/unibus.db`. Hoy **no hay cifrado at-rest**: con root en un nodo se
|
||||||
|
pueden leer subjects de salas, la pertenencia (quién está en qué sala con qué rol), los handles
|
||||||
|
y roles de los usuarios, y las claves públicas (signPub/kexPub). No se exponen mensajes (E2E) ni
|
||||||
|
se pueden descifrar salas (claves selladas), pero sí toda la topología.
|
||||||
|
|
||||||
|
Tras este issue, los buckets/archivos del control plane quedan cifrados en disco con una clave por
|
||||||
|
nodo gestionada fuera de git. El modelo de amenaza pasa de "root del nodo ve la topología" a "root
|
||||||
|
del nodo necesita además la clave at-rest (que puede vivir en un secreto separado / TPM / variable
|
||||||
|
de entorno inyectada) para leer cualquier cosa".
|
||||||
|
|
||||||
|
# Contexto técnico
|
||||||
|
|
||||||
|
- NATS Server / JetStream soporta **encryption at-rest** nativo: se configura una cifra
|
||||||
|
(`aes` o `chacha20`) y una clave; JetStream cifra los ficheros de los streams/KV en disco. El
|
||||||
|
bus usa un NATS **embebido** (`pkg/embeddednats`), así que la activación es por opciones del
|
||||||
|
servidor embebido, no por un `nats-server.conf` externo.
|
||||||
|
- Para el backend SQLite (single-node) el equivalente sería SQLCipher o cifrado a nivel de
|
||||||
|
archivo/FS; queda como sub-tarea de menor prioridad porque el despliegue real es cluster (KV).
|
||||||
|
|
||||||
|
# Tareas
|
||||||
|
|
||||||
|
1. Confirmar la API de encryption-at-rest del NATS embebido en la versión usada (opción de
|
||||||
|
servidor para cipher + clave; cómo se pasa la clave de forma que no quede en argv ni en git).
|
||||||
|
2. Activar el cifrado en `pkg/embeddednats` detrás de una opción de configuración. La clave se
|
||||||
|
inyecta por archivo (`--jetstream-encryption-key-file`, 0600, junto a las claves TLS del nodo)
|
||||||
|
o variable de entorno desde el unit systemd; nunca en argv ni commiteada.
|
||||||
|
3. `cmd/membershipd`: flag/env para la clave + reflejar el estado en la posture publicada en
|
||||||
|
`/healthz` (p.ej. `"at_rest":true`) para que el monitor lo verifique.
|
||||||
|
4. `deploy/cluster`: provisionar la clave at-rest por nodo (generación + `pass`/secrets gitignored)
|
||||||
|
y cablearla en `cluster.env` + el unit. Documentar en el runbook.
|
||||||
|
5. **Migración del store existente** (gotcha crítico): JetStream no re-cifra retroactivamente los
|
||||||
|
datos ya escritos en claro. Diseñar y documentar el procedimiento seguro para el cluster en
|
||||||
|
producción (probable: backup → exportar snapshot del control plane → parar nodo → recrear el
|
||||||
|
store con la clave activa → re-importar; o rotación nodo a nodo aprovechando la replicación R3).
|
||||||
|
Respetar la regla de migraciones (aditivo, sin pérdida de datos).
|
||||||
|
6. Tests: arrancar un nodo con clave at-rest, escribir un user/room, y verificar que el fichero en
|
||||||
|
disco **no** contiene en claro un subject/handle conocido (grep negativo), y que el nodo sigue
|
||||||
|
leyéndolos con la clave. Verificar que sin la clave el store no se abre.
|
||||||
|
|
||||||
|
# Definition of Done
|
||||||
|
|
||||||
|
- Cifrado at-rest activo en los 3 nodos del cluster; `/healthz` lo refleja en la posture.
|
||||||
|
- Evidencia ejecutable: un valor conocido (subject de sala / handle de usuario) **no** aparece en
|
||||||
|
claro al hacer `grep` sobre `local_files/jetstream/`; el nodo lo sigue sirviendo con la clave.
|
||||||
|
- Procedimiento de migración probado sobre datos reales sin pérdida (snapshot/restore verificado).
|
||||||
|
- La clave at-rest nunca está en git ni en argv; vive en archivo 0600 / secreto inyectado.
|
||||||
|
- No baja ninguna otra capa de seguridad (enforce + ACL + TLS + E2E + sealed keys intactas).
|
||||||
|
|
||||||
|
# Notas
|
||||||
|
|
||||||
|
Aditivo y ortogonal al resto de la seguridad: TLS protege en tránsito, E2E el contenido, las claves
|
||||||
|
de room van selladas; este issue cierra el último hueco (metadatos de control en claro en disco)
|
||||||
|
para el modelo de amenaza "VPS comprometido / disco robado". Prioridad media: el despliegue ya es
|
||||||
|
seguro frente a ataques de red (enforce+TLS+ACL); esto endurece frente a compromiso físico/root del
|
||||||
|
host. Relacionado con el endurecimiento de los issues 0004/0005/0006.
|
||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
server "github.com/nats-io/nats-server/v2/server"
|
server "github.com/nats-io/nats-server/v2/server"
|
||||||
@@ -106,6 +107,13 @@ func StartHostAuth(storeDir, host string, port int, auth server.Authentication)
|
|||||||
// blocks until the server is ready to accept connections (up to 5s) and returns
|
// blocks until the server is ready to accept connections (up to 5s) and returns
|
||||||
// the running server; the caller must Shutdown it.
|
// the running server; the caller must Shutdown it.
|
||||||
func StartServer(cfg ServerConfig) (*server.Server, error) {
|
func StartServer(cfg ServerConfig) (*server.Server, error) {
|
||||||
|
// Diagnostic toggle: UNIBUS_NATS_DEBUG=1 enables the embedded nats-server's own
|
||||||
|
// logger (route/RAFT/JetStream errors), which is otherwise silenced. Off by
|
||||||
|
// default so production behavior is unchanged; only set it when debugging the
|
||||||
|
// cluster route layer.
|
||||||
|
debugLevel := os.Getenv("UNIBUS_NATS_DEBUG")
|
||||||
|
debugNATS := debugLevel == "1" || debugLevel == "2"
|
||||||
|
traceNATS := debugLevel == "2"
|
||||||
opts := &server.Options{
|
opts := &server.Options{
|
||||||
JetStream: true,
|
JetStream: true,
|
||||||
StoreDir: cfg.StoreDir,
|
StoreDir: cfg.StoreDir,
|
||||||
@@ -114,9 +122,18 @@ func StartServer(cfg ServerConfig) (*server.Server, error) {
|
|||||||
ServerName: cfg.ServerName,
|
ServerName: cfg.ServerName,
|
||||||
DontListen: false,
|
DontListen: false,
|
||||||
// Keep the embedded server quiet by default; the host app logs the URLs.
|
// Keep the embedded server quiet by default; the host app logs the URLs.
|
||||||
NoLog: true,
|
NoLog: !debugNATS,
|
||||||
|
Debug: debugNATS,
|
||||||
|
Trace: traceNATS,
|
||||||
|
Logtime: true,
|
||||||
NoSigs: true,
|
NoSigs: true,
|
||||||
}
|
}
|
||||||
|
if debugNATS {
|
||||||
|
// Expose the nats-server monitoring endpoint (loopback) so the operator can
|
||||||
|
// inspect /jsz, /routez, /varz while debugging the cluster meta-group.
|
||||||
|
opts.HTTPHost = "127.0.0.1"
|
||||||
|
opts.HTTPPort = 8222
|
||||||
|
}
|
||||||
if cfg.Auth != nil {
|
if cfg.Auth != nil {
|
||||||
opts.CustomClientAuthentication = cfg.Auth
|
opts.CustomClientAuthentication = cfg.Auth
|
||||||
// A CustomClientAuthentication alone does not make the server advertise a
|
// A CustomClientAuthentication alone does not make the server advertise a
|
||||||
@@ -141,6 +158,10 @@ func StartServer(cfg ServerConfig) (*server.Server, error) {
|
|||||||
return nil, fmt.Errorf("embeddednats: new server: %w", err)
|
return nil, fmt.Errorf("embeddednats: new server: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if debugNATS {
|
||||||
|
ns.ConfigureLogger()
|
||||||
|
}
|
||||||
|
|
||||||
go ns.Start()
|
go ns.Start()
|
||||||
|
|
||||||
if !ns.ReadyForConnections(5 * time.Second) {
|
if !ns.ReadyForConnections(5 * time.Second) {
|
||||||
@@ -162,6 +183,21 @@ func applyClusterOpts(opts *server.Options, c *ClusterConfig) error {
|
|||||||
Port: c.Port,
|
Port: c.Port,
|
||||||
Username: c.Username,
|
Username: c.Username,
|
||||||
Password: c.Password,
|
Password: c.Password,
|
||||||
|
// Disable route connection pooling (nats-server 2.10+ defaults to a pool of
|
||||||
|
// 3 connections per peer). On a small cluster the pool churns with
|
||||||
|
// "duplicate route"/"client closed" reconnects that interrupt the meta-group
|
||||||
|
// RAFT heartbeats, causing perpetual leader re-elections so the JetStream
|
||||||
|
// meta never becomes current and stream/KV creation hangs (issue 0006g).
|
||||||
|
// PoolSize=-1 forces the classic single route per peer, which is stable for
|
||||||
|
// the 3-node unibus cluster.
|
||||||
|
PoolSize: -1,
|
||||||
|
// NoAdvertise stops the server from gossiping its locally-discovered IPs to
|
||||||
|
// peers. The cluster nodes are Docker hosts, so without this NATS advertises
|
||||||
|
// the docker bridge addresses (172.x / 10.0.x) as reachable routes; peers
|
||||||
|
// then try to dial those private, mutually-unreachable IPs, churning the
|
||||||
|
// route layer and destabilizing the JetStream meta-group. With NoAdvertise
|
||||||
|
// the nodes use ONLY the explicit public-IP routes we configure (issue 0006g).
|
||||||
|
NoAdvertise: true,
|
||||||
}
|
}
|
||||||
if c.TLS != nil {
|
if c.TLS != nil {
|
||||||
opts.Cluster.TLSConfig = c.TLS
|
opts.Cluster.TLSConfig = c.TLS
|
||||||
|
|||||||
@@ -85,8 +85,18 @@ func OpenJetStream(js jetstream.JetStream, cfg JetStreamConfig) (Store, error) {
|
|||||||
if opTimeout <= 0 {
|
if opTimeout <= 0 {
|
||||||
opTimeout = defaultKVOpTime
|
opTimeout = defaultKVOpTime
|
||||||
}
|
}
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
// Bootstrap budget for creating/opening the buckets. On a single node JetStream
|
||||||
defer cancel()
|
// is ready the instant the server starts, so the first attempt succeeds. On a
|
||||||
|
// COLD multi-node cluster the JetStream meta-group must first elect a leader and
|
||||||
|
// each node must establish contact with it before its $JS.API responds. A KV
|
||||||
|
// op is a NATS request/reply: if it is published before the node's JetStream is
|
||||||
|
// ready the request is dropped (not queued), and a single long-context call then
|
||||||
|
// just blocks until it times out (issue 0006g). So we RETRY each bucket op with
|
||||||
|
// short per-attempt contexts until it succeeds or the overall bootstrap budget
|
||||||
|
// is exhausted; once the cluster is ready the next retry lands and the buckets
|
||||||
|
// are created, after which they persist and every node opens them quickly.
|
||||||
|
bootstrapBudget := 120 * time.Second
|
||||||
|
deadline := time.Now().Add(bootstrapBudget)
|
||||||
|
|
||||||
s := &jetstreamStore{opTimeout: opTimeout}
|
s := &jetstreamStore{opTimeout: opTimeout}
|
||||||
for _, b := range []struct {
|
for _, b := range []struct {
|
||||||
@@ -99,14 +109,27 @@ func OpenJetStream(js jetstream.JetStream, cfg JetStreamConfig) (Store, error) {
|
|||||||
{bucketRoomKeys, &s.keys},
|
{bucketRoomKeys, &s.keys},
|
||||||
{bucketUsers, &s.users},
|
{bucketUsers, &s.users},
|
||||||
} {
|
} {
|
||||||
kv, err := js.CreateOrUpdateKeyValue(ctx, jetstream.KeyValueConfig{
|
var kv jetstream.KeyValue
|
||||||
|
var lastErr error
|
||||||
|
for {
|
||||||
|
opCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
kv, lastErr = js.CreateOrUpdateKeyValue(opCtx, jetstream.KeyValueConfig{
|
||||||
Bucket: b.name,
|
Bucket: b.name,
|
||||||
Replicas: cfg.Replicas,
|
Replicas: cfg.Replicas,
|
||||||
History: 1,
|
History: 1,
|
||||||
Storage: jetstream.FileStorage,
|
Storage: jetstream.FileStorage,
|
||||||
})
|
})
|
||||||
if err != nil {
|
cancel()
|
||||||
return nil, fmt.Errorf("membership: open KV bucket %q (replicas=%d): %w", b.name, cfg.Replicas, err)
|
if lastErr == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if time.Now().After(deadline) {
|
||||||
|
return nil, fmt.Errorf("membership: open KV bucket %q (replicas=%d) after %s: %w", b.name, cfg.Replicas, bootstrapBudget, lastErr)
|
||||||
|
}
|
||||||
|
// JetStream not ready yet (no meta leader / request dropped). Wait and
|
||||||
|
// re-publish the op; in a cluster cold start this lands once the meta
|
||||||
|
// group settles.
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
}
|
}
|
||||||
*b.dst = kv
|
*b.dst = kv
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user