feat(0003c): membershipd migrate-to-kv (idempotent SQLite -> JetStream KV)

The one-time data move decentralization needs (issue 0003c): copy the
entire control-plane state from the local SQLite database into the
replicated JetStream KV buckets, with a backup taken first.

pkg/membership:
- Snapshot / SealedKeyRecord: a backend-agnostic dump of the whole
  control plane (rooms with their real epoch, members, every sealed-key
  row across epochs, users with status).
- (*sqliteStore).ExportSnapshot and (*jetstreamStore).ExportSnapshot read
  a full Snapshot from each backend; (*jetstreamStore).importSnapshot
  writes one with raw Puts (preserving epoch/status, not resetting to
  defaults) so the migration is faithful and idempotent (every write is
  an overwrite, so re-running converges).
- MigrateSQLiteToKV orchestrates export -> import; BackupSQLite makes a
  consistent copy via SQLite's VACUUM INTO before any migration.

cmd/membershipd:
- `membershipd migrate-to-kv --db <path> --nats-url <url> [--replicas N]
  [--ca <cert>] [--no-backup]` backs up the SQLite file, connects to the
  cluster's NATS, and migrates. Dispatched on the host like `user`.

Tests (DoD: golden + edge + parity):
- TestMigrateSQLiteToKVParity: seed a representative SQLite (two rooms,
  one rekeyed to epoch 2, members, a revoked user); after migration the
  KV ExportSnapshot equals the SQLite ExportSnapshot.
- TestMigrateSQLiteToKVIdempotent: running the migration twice yields the
  same KV state.
- TestBackupSQLiteCreatesConsistentCopy: the backup reopens with
  identical data.
Plus a binary smoke (seed user -> run server -> migrate-to-kv -> re-run):
backup written, 1 user migrated, second run identical.
This commit is contained in:
agent
2026-06-07 15:09:56 +02:00
parent b8c9b2b652
commit 9013ea5e33
5 changed files with 588 additions and 0 deletions
+123
View File
@@ -37,6 +37,7 @@ import (
"fmt"
"sort"
"strconv"
"strings"
"time"
"github.com/nats-io/nats.go/jetstream"
@@ -508,3 +509,125 @@ func (s *jetstreamStore) HasAdmin() bool {
}
return false
}
// ---- snapshot import / export (issue 0003c migration) ---------------------
// importSnapshot writes a full Snapshot into the KV buckets, preserving each
// room's epoch and each user's status (Put, not CreateRoom/AddUser, so the exact
// state is reproduced rather than reset to defaults). Idempotent: every write is
// an overwrite, so re-running the migration converges.
func (s *jetstreamStore) importSnapshot(snap *Snapshot) error {
ctx, cancel := s.ctx()
defer cancel()
for _, r := range snap.Rooms {
b, err := json.Marshal(r)
if err != nil {
return fmt.Errorf("import: marshal room %q: %w", r.RoomID, err)
}
if _, err := s.rooms.Put(ctx, r.RoomID, b); err != nil {
return fmt.Errorf("import: put room %q: %w", r.RoomID, err)
}
}
for roomID, members := range snap.Members {
for _, m := range members {
if err := s.putMember(ctx, roomID, m); err != nil {
return fmt.Errorf("import: %w", err)
}
}
}
for _, rec := range snap.Keys {
if _, err := s.keys.Put(ctx, sealedKey(rec.RoomID, rec.Endpoint, rec.Epoch), rec.Sealed); err != nil {
return fmt.Errorf("import: put key %q/%q@%d: %w", rec.RoomID, rec.Endpoint, rec.Epoch, err)
}
}
for _, u := range snap.Users {
b, err := json.Marshal(u)
if err != nil {
return fmt.Errorf("import: marshal user %q: %w", u.SignPub, err)
}
if _, err := s.users.Put(ctx, normalizeSignPub(u.SignPub), b); err != nil {
return fmt.Errorf("import: put user %q: %w", u.SignPub, err)
}
}
return nil
}
// ExportSnapshot reads the entire KV control-plane state back into a Snapshot,
// so the migration's parity test can compare it against the SQLite source.
func (s *jetstreamStore) ExportSnapshot() (*Snapshot, error) {
snap := &Snapshot{Members: map[string][]Member{}}
roomEntries, err := s.watchAll(s.rooms)
if err != nil {
return nil, fmt.Errorf("export kv: rooms: %w", err)
}
for _, e := range roomEntries {
var r RoomInfo
if err := json.Unmarshal(e.Value(), &r); err != nil {
return nil, fmt.Errorf("export kv: unmarshal room: %w", err)
}
snap.Rooms = append(snap.Rooms, r)
}
memberEntries, err := s.watchAll(s.members)
if err != nil {
return nil, fmt.Errorf("export kv: members: %w", err)
}
for _, e := range memberEntries {
// Key is "<roomID>.<endpoint>"; neither segment contains a dot.
roomID := strings.SplitN(e.Key(), ".", 2)[0]
var m Member
if err := json.Unmarshal(e.Value(), &m); err != nil {
return nil, fmt.Errorf("export kv: unmarshal member: %w", err)
}
snap.Members[roomID] = append(snap.Members[roomID], m)
}
keyEntries, err := s.watchAll(s.keys)
if err != nil {
return nil, fmt.Errorf("export kv: keys: %w", err)
}
for _, e := range keyEntries {
// Key is "<roomID>.<endpoint>.<epoch>".
parts := strings.Split(e.Key(), ".")
if len(parts) != 3 {
continue
}
epoch, err := strconv.Atoi(parts[2])
if err != nil {
continue
}
snap.Keys = append(snap.Keys, SealedKeyRecord{RoomID: parts[0], Endpoint: parts[1], Epoch: epoch, Sealed: e.Value()})
}
users, err := s.ListUsers()
if err != nil {
return nil, fmt.Errorf("export kv: users: %w", err)
}
snap.Users = users
return snap, nil
}
// watchAll collects every current entry of a bucket (no key filter), draining
// the watcher to its initial-snapshot nil marker.
func (s *jetstreamStore) watchAll(kv jetstream.KeyValue) ([]jetstream.KeyValueEntry, error) {
ctx, cancel := s.ctx()
defer cancel()
w, err := kv.WatchAll(ctx, jetstream.IgnoreDeletes())
if err != nil {
return nil, err
}
defer w.Stop()
var out []jetstream.KeyValueEntry
for {
select {
case e := <-w.Updates():
if e == nil {
return out, nil
}
out = append(out, e)
case <-ctx.Done():
return nil, ctx.Err()
}
}
}