9013ea5e33
The one-time data move decentralization needs (issue 0003c): copy the entire control-plane state from the local SQLite database into the replicated JetStream KV buckets, with a backup taken first. pkg/membership: - Snapshot / SealedKeyRecord: a backend-agnostic dump of the whole control plane (rooms with their real epoch, members, every sealed-key row across epochs, users with status). - (*sqliteStore).ExportSnapshot and (*jetstreamStore).ExportSnapshot read a full Snapshot from each backend; (*jetstreamStore).importSnapshot writes one with raw Puts (preserving epoch/status, not resetting to defaults) so the migration is faithful and idempotent (every write is an overwrite, so re-running converges). - MigrateSQLiteToKV orchestrates export -> import; BackupSQLite makes a consistent copy via SQLite's VACUUM INTO before any migration. cmd/membershipd: - `membershipd migrate-to-kv --db <path> --nats-url <url> [--replicas N] [--ca <cert>] [--no-backup]` backs up the SQLite file, connects to the cluster's NATS, and migrates. Dispatched on the host like `user`. Tests (DoD: golden + edge + parity): - TestMigrateSQLiteToKVParity: seed a representative SQLite (two rooms, one rekeyed to epoch 2, members, a revoked user); after migration the KV ExportSnapshot equals the SQLite ExportSnapshot. - TestMigrateSQLiteToKVIdempotent: running the migration twice yields the same KV state. - TestBackupSQLiteCreatesConsistentCopy: the backup reopens with identical data. Plus a binary smoke (seed user -> run server -> migrate-to-kv -> re-run): backup written, 1 user migrated, second run identical.
177 lines
5.7 KiB
Go
177 lines
5.7 KiB
Go
package membership
|
|
|
|
// Migration from the local SQLite control plane to replicated JetStream KV
|
|
// (issue 0003c). It is the one-time, idempotent data move that decentralization
|
|
// needs: read the entire SQLite state, write it into the KV buckets. Re-running
|
|
// it is safe (every KV write is an overwrite), so a partial/interrupted run is
|
|
// recovered by running again, and a parity test can assert the two stores hold
|
|
// the same state before and after.
|
|
|
|
import (
|
|
"database/sql"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/nats-io/nats.go/jetstream"
|
|
)
|
|
|
|
// SealedKeyRecord is one row of room_keys: the sealed room key for an endpoint
|
|
// at a given epoch. It is the unit the snapshot carries so a backend can be
|
|
// imported with the exact epoch history (CreateRoom/AddMember alone could not
|
|
// reproduce a multi-epoch room).
|
|
type SealedKeyRecord struct {
|
|
RoomID string
|
|
Endpoint string
|
|
Epoch int
|
|
Sealed []byte
|
|
}
|
|
|
|
// Snapshot is the complete control-plane state, backend-agnostic. It is what
|
|
// ExportSnapshot produces and importSnapshot consumes, so the SQLite->KV
|
|
// migration and the parity test both work in terms of it.
|
|
type Snapshot struct {
|
|
Rooms []RoomInfo
|
|
Members map[string][]Member // roomID -> members
|
|
Keys []SealedKeyRecord
|
|
Users []User
|
|
}
|
|
|
|
// MigrateReport summarizes what a migration moved, for the operator log.
|
|
type MigrateReport struct {
|
|
BackupPath string
|
|
Rooms int
|
|
Members int
|
|
Keys int
|
|
Users int
|
|
}
|
|
|
|
// MigrateSQLiteToKV reads the SQLite store at sqlitePath and writes its entire
|
|
// state into the JetStream KV buckets on js (created with cfg.Replicas). It is
|
|
// idempotent: re-running converges to the same state. The caller is responsible
|
|
// for backing up the SQLite file first (BackupSQLite) — this function only
|
|
// reads it.
|
|
func MigrateSQLiteToKV(sqlitePath string, js jetstream.JetStream, cfg JetStreamConfig) (*MigrateReport, error) {
|
|
src, err := openSQLite(sqlitePath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("migrate: open sqlite %q: %w", sqlitePath, err)
|
|
}
|
|
defer src.Close()
|
|
|
|
snap, err := src.ExportSnapshot()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("migrate: export sqlite: %w", err)
|
|
}
|
|
|
|
dst, err := OpenJetStream(js, cfg)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("migrate: open kv: %w", err)
|
|
}
|
|
kv := dst.(*jetstreamStore)
|
|
if err := kv.importSnapshot(snap); err != nil {
|
|
return nil, fmt.Errorf("migrate: import to kv: %w", err)
|
|
}
|
|
|
|
members := 0
|
|
for _, ms := range snap.Members {
|
|
members += len(ms)
|
|
}
|
|
return &MigrateReport{
|
|
Rooms: len(snap.Rooms),
|
|
Members: members,
|
|
Keys: len(snap.Keys),
|
|
Users: len(snap.Users),
|
|
}, nil
|
|
}
|
|
|
|
// BackupSQLite makes a consistent copy of the SQLite database next to it,
|
|
// named "<path>.bak.<unixnano>", using SQLite's own VACUUM INTO (which writes a
|
|
// transactionally-consistent snapshot even with a live WAL). It returns the
|
|
// backup path. Always call this before MigrateSQLiteToKV so a botched migration
|
|
// can be undone.
|
|
func BackupSQLite(path string) (string, error) {
|
|
dst := fmt.Sprintf("%s.bak.%d", path, time.Now().UnixNano())
|
|
db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)")
|
|
if err != nil {
|
|
return "", fmt.Errorf("backup: open %q: %w", path, err)
|
|
}
|
|
defer db.Close()
|
|
if err := db.Ping(); err != nil {
|
|
return "", fmt.Errorf("backup: ping %q: %w", path, err)
|
|
}
|
|
// VACUUM INTO writes a fresh, consistent database file; the literal path is
|
|
// safely single-quoted (it is operator-supplied, never network input).
|
|
if _, err := db.Exec("VACUUM INTO '" + strings.ReplaceAll(dst, "'", "''") + "'"); err != nil {
|
|
return "", fmt.Errorf("backup: VACUUM INTO %q: %w", dst, err)
|
|
}
|
|
return dst, nil
|
|
}
|
|
|
|
// ---- SQLite export --------------------------------------------------------
|
|
|
|
// ExportSnapshot reads the entire SQLite control-plane state into a Snapshot.
|
|
func (s *sqliteStore) ExportSnapshot() (*Snapshot, error) {
|
|
snap := &Snapshot{Members: map[string][]Member{}}
|
|
|
|
rows, err := s.db.Query(`SELECT room_id, subject, key_epoch, encrypt, persist, sign_msgs, owner_endpoint FROM rooms ORDER BY room_id`)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("export: query rooms: %w", err)
|
|
}
|
|
for rows.Next() {
|
|
var r RoomInfo
|
|
var enc, per, sgn int
|
|
if err := rows.Scan(&r.RoomID, &r.Subject, &r.Epoch, &enc, &per, &sgn, &r.OwnerEndpoint); err != nil {
|
|
rows.Close()
|
|
return nil, fmt.Errorf("export: scan room: %w", err)
|
|
}
|
|
r.Encrypt, r.Persist, r.SignMsgs = enc != 0, per != 0, sgn != 0
|
|
snap.Rooms = append(snap.Rooms, r)
|
|
}
|
|
rows.Close()
|
|
if err := rows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
mrows, err := s.db.Query(`SELECT room_id, endpoint, role, sign_pub, kex_pub FROM members ORDER BY room_id, endpoint`)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("export: query members: %w", err)
|
|
}
|
|
for mrows.Next() {
|
|
var roomID string
|
|
var m Member
|
|
if err := mrows.Scan(&roomID, &m.Endpoint, &m.Role, &m.SignPub, &m.KexPub); err != nil {
|
|
mrows.Close()
|
|
return nil, fmt.Errorf("export: scan member: %w", err)
|
|
}
|
|
snap.Members[roomID] = append(snap.Members[roomID], m)
|
|
}
|
|
mrows.Close()
|
|
if err := mrows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
krows, err := s.db.Query(`SELECT room_id, epoch, endpoint, sealed_key FROM room_keys ORDER BY room_id, endpoint, epoch`)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("export: query room_keys: %w", err)
|
|
}
|
|
for krows.Next() {
|
|
var rec SealedKeyRecord
|
|
if err := krows.Scan(&rec.RoomID, &rec.Epoch, &rec.Endpoint, &rec.Sealed); err != nil {
|
|
krows.Close()
|
|
return nil, fmt.Errorf("export: scan room_key: %w", err)
|
|
}
|
|
snap.Keys = append(snap.Keys, rec)
|
|
}
|
|
krows.Close()
|
|
if err := krows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
users, err := s.ListUsers()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("export: list users: %w", err)
|
|
}
|
|
snap.Users = users
|
|
return snap, nil
|
|
}
|