Files
unibus/pkg/membership/migrate.go
T
agent 9013ea5e33 feat(0003c): membershipd migrate-to-kv (idempotent SQLite -> JetStream KV)
The one-time data move decentralization needs (issue 0003c): copy the
entire control-plane state from the local SQLite database into the
replicated JetStream KV buckets, with a backup taken first.

pkg/membership:
- Snapshot / SealedKeyRecord: a backend-agnostic dump of the whole
  control plane (rooms with their real epoch, members, every sealed-key
  row across epochs, users with status).
- (*sqliteStore).ExportSnapshot and (*jetstreamStore).ExportSnapshot read
  a full Snapshot from each backend; (*jetstreamStore).importSnapshot
  writes one with raw Puts (preserving epoch/status, not resetting to
  defaults) so the migration is faithful and idempotent (every write is
  an overwrite, so re-running converges).
- MigrateSQLiteToKV orchestrates export -> import; BackupSQLite makes a
  consistent copy via SQLite's VACUUM INTO before any migration.

cmd/membershipd:
- `membershipd migrate-to-kv --db <path> --nats-url <url> [--replicas N]
  [--ca <cert>] [--no-backup]` backs up the SQLite file, connects to the
  cluster's NATS, and migrates. Dispatched on the host like `user`.

Tests (DoD: golden + edge + parity):
- TestMigrateSQLiteToKVParity: seed a representative SQLite (two rooms,
  one rekeyed to epoch 2, members, a revoked user); after migration the
  KV ExportSnapshot equals the SQLite ExportSnapshot.
- TestMigrateSQLiteToKVIdempotent: running the migration twice yields the
  same KV state.
- TestBackupSQLiteCreatesConsistentCopy: the backup reopens with
  identical data.
Plus a binary smoke (seed user -> run server -> migrate-to-kv -> re-run):
backup written, 1 user migrated, second run identical.
2026-06-07 15:09:56 +02:00

177 lines
5.7 KiB
Go

package membership
// Migration from the local SQLite control plane to replicated JetStream KV
// (issue 0003c). It is the one-time, idempotent data move that decentralization
// needs: read the entire SQLite state, write it into the KV buckets. Re-running
// it is safe (every KV write is an overwrite), so a partial/interrupted run is
// recovered by running again, and a parity test can assert the two stores hold
// the same state before and after.
import (
"database/sql"
"fmt"
"strings"
"time"
"github.com/nats-io/nats.go/jetstream"
)
// SealedKeyRecord is one row of room_keys: the sealed room key for an endpoint
// at a given epoch. It is the unit the snapshot carries so a backend can be
// imported with the exact epoch history (CreateRoom/AddMember alone could not
// reproduce a multi-epoch room).
type SealedKeyRecord struct {
RoomID string
Endpoint string
Epoch int
Sealed []byte
}
// Snapshot is the complete control-plane state, backend-agnostic. It is what
// ExportSnapshot produces and importSnapshot consumes, so the SQLite->KV
// migration and the parity test both work in terms of it.
type Snapshot struct {
Rooms []RoomInfo
Members map[string][]Member // roomID -> members
Keys []SealedKeyRecord
Users []User
}
// MigrateReport summarizes what a migration moved, for the operator log.
type MigrateReport struct {
BackupPath string
Rooms int
Members int
Keys int
Users int
}
// MigrateSQLiteToKV reads the SQLite store at sqlitePath and writes its entire
// state into the JetStream KV buckets on js (created with cfg.Replicas). It is
// idempotent: re-running converges to the same state. The caller is responsible
// for backing up the SQLite file first (BackupSQLite) — this function only
// reads it.
func MigrateSQLiteToKV(sqlitePath string, js jetstream.JetStream, cfg JetStreamConfig) (*MigrateReport, error) {
src, err := openSQLite(sqlitePath)
if err != nil {
return nil, fmt.Errorf("migrate: open sqlite %q: %w", sqlitePath, err)
}
defer src.Close()
snap, err := src.ExportSnapshot()
if err != nil {
return nil, fmt.Errorf("migrate: export sqlite: %w", err)
}
dst, err := OpenJetStream(js, cfg)
if err != nil {
return nil, fmt.Errorf("migrate: open kv: %w", err)
}
kv := dst.(*jetstreamStore)
if err := kv.importSnapshot(snap); err != nil {
return nil, fmt.Errorf("migrate: import to kv: %w", err)
}
members := 0
for _, ms := range snap.Members {
members += len(ms)
}
return &MigrateReport{
Rooms: len(snap.Rooms),
Members: members,
Keys: len(snap.Keys),
Users: len(snap.Users),
}, nil
}
// BackupSQLite makes a consistent copy of the SQLite database next to it,
// named "<path>.bak.<unixnano>", using SQLite's own VACUUM INTO (which writes a
// transactionally-consistent snapshot even with a live WAL). It returns the
// backup path. Always call this before MigrateSQLiteToKV so a botched migration
// can be undone.
func BackupSQLite(path string) (string, error) {
dst := fmt.Sprintf("%s.bak.%d", path, time.Now().UnixNano())
db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)")
if err != nil {
return "", fmt.Errorf("backup: open %q: %w", path, err)
}
defer db.Close()
if err := db.Ping(); err != nil {
return "", fmt.Errorf("backup: ping %q: %w", path, err)
}
// VACUUM INTO writes a fresh, consistent database file; the literal path is
// safely single-quoted (it is operator-supplied, never network input).
if _, err := db.Exec("VACUUM INTO '" + strings.ReplaceAll(dst, "'", "''") + "'"); err != nil {
return "", fmt.Errorf("backup: VACUUM INTO %q: %w", dst, err)
}
return dst, nil
}
// ---- SQLite export --------------------------------------------------------
// ExportSnapshot reads the entire SQLite control-plane state into a Snapshot.
func (s *sqliteStore) ExportSnapshot() (*Snapshot, error) {
snap := &Snapshot{Members: map[string][]Member{}}
rows, err := s.db.Query(`SELECT room_id, subject, key_epoch, encrypt, persist, sign_msgs, owner_endpoint FROM rooms ORDER BY room_id`)
if err != nil {
return nil, fmt.Errorf("export: query rooms: %w", err)
}
for rows.Next() {
var r RoomInfo
var enc, per, sgn int
if err := rows.Scan(&r.RoomID, &r.Subject, &r.Epoch, &enc, &per, &sgn, &r.OwnerEndpoint); err != nil {
rows.Close()
return nil, fmt.Errorf("export: scan room: %w", err)
}
r.Encrypt, r.Persist, r.SignMsgs = enc != 0, per != 0, sgn != 0
snap.Rooms = append(snap.Rooms, r)
}
rows.Close()
if err := rows.Err(); err != nil {
return nil, err
}
mrows, err := s.db.Query(`SELECT room_id, endpoint, role, sign_pub, kex_pub FROM members ORDER BY room_id, endpoint`)
if err != nil {
return nil, fmt.Errorf("export: query members: %w", err)
}
for mrows.Next() {
var roomID string
var m Member
if err := mrows.Scan(&roomID, &m.Endpoint, &m.Role, &m.SignPub, &m.KexPub); err != nil {
mrows.Close()
return nil, fmt.Errorf("export: scan member: %w", err)
}
snap.Members[roomID] = append(snap.Members[roomID], m)
}
mrows.Close()
if err := mrows.Err(); err != nil {
return nil, err
}
krows, err := s.db.Query(`SELECT room_id, epoch, endpoint, sealed_key FROM room_keys ORDER BY room_id, endpoint, epoch`)
if err != nil {
return nil, fmt.Errorf("export: query room_keys: %w", err)
}
for krows.Next() {
var rec SealedKeyRecord
if err := krows.Scan(&rec.RoomID, &rec.Epoch, &rec.Endpoint, &rec.Sealed); err != nil {
krows.Close()
return nil, fmt.Errorf("export: scan room_key: %w", err)
}
snap.Keys = append(snap.Keys, rec)
}
krows.Close()
if err := krows.Err(); err != nil {
return nil, err
}
users, err := s.ListUsers()
if err != nil {
return nil, fmt.Errorf("export: list users: %w", err)
}
snap.Users = users
return snap, nil
}