feat(0003c): membershipd migrate-to-kv (idempotent SQLite -> JetStream KV)
The one-time data move decentralization needs (issue 0003c): copy the entire control-plane state from the local SQLite database into the replicated JetStream KV buckets, with a backup taken first. pkg/membership: - Snapshot / SealedKeyRecord: a backend-agnostic dump of the whole control plane (rooms with their real epoch, members, every sealed-key row across epochs, users with status). - (*sqliteStore).ExportSnapshot and (*jetstreamStore).ExportSnapshot read a full Snapshot from each backend; (*jetstreamStore).importSnapshot writes one with raw Puts (preserving epoch/status, not resetting to defaults) so the migration is faithful and idempotent (every write is an overwrite, so re-running converges). - MigrateSQLiteToKV orchestrates export -> import; BackupSQLite makes a consistent copy via SQLite's VACUUM INTO before any migration. cmd/membershipd: - `membershipd migrate-to-kv --db <path> --nats-url <url> [--replicas N] [--ca <cert>] [--no-backup]` backs up the SQLite file, connects to the cluster's NATS, and migrates. Dispatched on the host like `user`. Tests (DoD: golden + edge + parity): - TestMigrateSQLiteToKVParity: seed a representative SQLite (two rooms, one rekeyed to epoch 2, members, a revoked user); after migration the KV ExportSnapshot equals the SQLite ExportSnapshot. - TestMigrateSQLiteToKVIdempotent: running the migration twice yields the same KV state. - TestBackupSQLiteCreatesConsistentCopy: the backup reopens with identical data. Plus a binary smoke (seed user -> run server -> migrate-to-kv -> re-run): backup written, 1 user migrated, second run identical.
This commit is contained in:
@@ -0,0 +1,176 @@
|
||||
package membership
|
||||
|
||||
// Migration from the local SQLite control plane to replicated JetStream KV
|
||||
// (issue 0003c). It is the one-time, idempotent data move that decentralization
|
||||
// needs: read the entire SQLite state, write it into the KV buckets. Re-running
|
||||
// it is safe (every KV write is an overwrite), so a partial/interrupted run is
|
||||
// recovered by running again, and a parity test can assert the two stores hold
|
||||
// the same state before and after.
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/nats-io/nats.go/jetstream"
|
||||
)
|
||||
|
||||
// SealedKeyRecord is one row of room_keys: the sealed room key for an endpoint
|
||||
// at a given epoch. It is the unit the snapshot carries so a backend can be
|
||||
// imported with the exact epoch history (CreateRoom/AddMember alone could not
|
||||
// reproduce a multi-epoch room).
|
||||
type SealedKeyRecord struct {
|
||||
RoomID string
|
||||
Endpoint string
|
||||
Epoch int
|
||||
Sealed []byte
|
||||
}
|
||||
|
||||
// Snapshot is the complete control-plane state, backend-agnostic. It is what
|
||||
// ExportSnapshot produces and importSnapshot consumes, so the SQLite->KV
|
||||
// migration and the parity test both work in terms of it.
|
||||
type Snapshot struct {
|
||||
Rooms []RoomInfo
|
||||
Members map[string][]Member // roomID -> members
|
||||
Keys []SealedKeyRecord
|
||||
Users []User
|
||||
}
|
||||
|
||||
// MigrateReport summarizes what a migration moved, for the operator log.
|
||||
type MigrateReport struct {
|
||||
BackupPath string
|
||||
Rooms int
|
||||
Members int
|
||||
Keys int
|
||||
Users int
|
||||
}
|
||||
|
||||
// MigrateSQLiteToKV reads the SQLite store at sqlitePath and writes its entire
|
||||
// state into the JetStream KV buckets on js (created with cfg.Replicas). It is
|
||||
// idempotent: re-running converges to the same state. The caller is responsible
|
||||
// for backing up the SQLite file first (BackupSQLite) — this function only
|
||||
// reads it.
|
||||
func MigrateSQLiteToKV(sqlitePath string, js jetstream.JetStream, cfg JetStreamConfig) (*MigrateReport, error) {
|
||||
src, err := openSQLite(sqlitePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("migrate: open sqlite %q: %w", sqlitePath, err)
|
||||
}
|
||||
defer src.Close()
|
||||
|
||||
snap, err := src.ExportSnapshot()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("migrate: export sqlite: %w", err)
|
||||
}
|
||||
|
||||
dst, err := OpenJetStream(js, cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("migrate: open kv: %w", err)
|
||||
}
|
||||
kv := dst.(*jetstreamStore)
|
||||
if err := kv.importSnapshot(snap); err != nil {
|
||||
return nil, fmt.Errorf("migrate: import to kv: %w", err)
|
||||
}
|
||||
|
||||
members := 0
|
||||
for _, ms := range snap.Members {
|
||||
members += len(ms)
|
||||
}
|
||||
return &MigrateReport{
|
||||
Rooms: len(snap.Rooms),
|
||||
Members: members,
|
||||
Keys: len(snap.Keys),
|
||||
Users: len(snap.Users),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// BackupSQLite makes a consistent copy of the SQLite database next to it,
|
||||
// named "<path>.bak.<unixnano>", using SQLite's own VACUUM INTO (which writes a
|
||||
// transactionally-consistent snapshot even with a live WAL). It returns the
|
||||
// backup path. Always call this before MigrateSQLiteToKV so a botched migration
|
||||
// can be undone.
|
||||
func BackupSQLite(path string) (string, error) {
|
||||
dst := fmt.Sprintf("%s.bak.%d", path, time.Now().UnixNano())
|
||||
db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("backup: open %q: %w", path, err)
|
||||
}
|
||||
defer db.Close()
|
||||
if err := db.Ping(); err != nil {
|
||||
return "", fmt.Errorf("backup: ping %q: %w", path, err)
|
||||
}
|
||||
// VACUUM INTO writes a fresh, consistent database file; the literal path is
|
||||
// safely single-quoted (it is operator-supplied, never network input).
|
||||
if _, err := db.Exec("VACUUM INTO '" + strings.ReplaceAll(dst, "'", "''") + "'"); err != nil {
|
||||
return "", fmt.Errorf("backup: VACUUM INTO %q: %w", dst, err)
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// ---- SQLite export --------------------------------------------------------
|
||||
|
||||
// ExportSnapshot reads the entire SQLite control-plane state into a Snapshot.
|
||||
func (s *sqliteStore) ExportSnapshot() (*Snapshot, error) {
|
||||
snap := &Snapshot{Members: map[string][]Member{}}
|
||||
|
||||
rows, err := s.db.Query(`SELECT room_id, subject, key_epoch, encrypt, persist, sign_msgs, owner_endpoint FROM rooms ORDER BY room_id`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("export: query rooms: %w", err)
|
||||
}
|
||||
for rows.Next() {
|
||||
var r RoomInfo
|
||||
var enc, per, sgn int
|
||||
if err := rows.Scan(&r.RoomID, &r.Subject, &r.Epoch, &enc, &per, &sgn, &r.OwnerEndpoint); err != nil {
|
||||
rows.Close()
|
||||
return nil, fmt.Errorf("export: scan room: %w", err)
|
||||
}
|
||||
r.Encrypt, r.Persist, r.SignMsgs = enc != 0, per != 0, sgn != 0
|
||||
snap.Rooms = append(snap.Rooms, r)
|
||||
}
|
||||
rows.Close()
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mrows, err := s.db.Query(`SELECT room_id, endpoint, role, sign_pub, kex_pub FROM members ORDER BY room_id, endpoint`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("export: query members: %w", err)
|
||||
}
|
||||
for mrows.Next() {
|
||||
var roomID string
|
||||
var m Member
|
||||
if err := mrows.Scan(&roomID, &m.Endpoint, &m.Role, &m.SignPub, &m.KexPub); err != nil {
|
||||
mrows.Close()
|
||||
return nil, fmt.Errorf("export: scan member: %w", err)
|
||||
}
|
||||
snap.Members[roomID] = append(snap.Members[roomID], m)
|
||||
}
|
||||
mrows.Close()
|
||||
if err := mrows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
krows, err := s.db.Query(`SELECT room_id, epoch, endpoint, sealed_key FROM room_keys ORDER BY room_id, endpoint, epoch`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("export: query room_keys: %w", err)
|
||||
}
|
||||
for krows.Next() {
|
||||
var rec SealedKeyRecord
|
||||
if err := krows.Scan(&rec.RoomID, &rec.Epoch, &rec.Endpoint, &rec.Sealed); err != nil {
|
||||
krows.Close()
|
||||
return nil, fmt.Errorf("export: scan room_key: %w", err)
|
||||
}
|
||||
snap.Keys = append(snap.Keys, rec)
|
||||
}
|
||||
krows.Close()
|
||||
if err := krows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
users, err := s.ListUsers()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("export: list users: %w", err)
|
||||
}
|
||||
snap.Users = users
|
||||
return snap, nil
|
||||
}
|
||||
Reference in New Issue
Block a user