feat(0003b): membership.Store interface + JetStream KV implementation

Branch-by-abstraction for the control-plane store (issue 0003b), so the
membership state can move off process-local SQLite onto replicated
JetStream KV without rewriting callers and without breaking master.

pkg/membership:
- Store is now an interface (rooms/members/keys + user allowlist +
  Close). The existing SQLite implementation is renamed sqliteStore and
  stays the default: Open(path) still returns it. openSQLite keeps the
  concrete type for internal callers (the 0003c migration).
- ErrNotFound is a storage-agnostic "no such record" sentinel; both
  backends return it (the SQLite store maps sql.ErrNoRows to it). The
  control plane now branches on ErrNotFound instead of sql.ErrNoRows, so
  server.go no longer imports database/sql.
- jetstreamStore (new) implements Store over five replicated KV buckets:
  rooms, members, rooms_by_member (reverse index for ListRoomsForEndpoint),
  room_keys, users. Replication factor is configurable (R1..R5) for the
  R1->R3 rollout. Every read is bounded by OpTimeout and IsAuthorized /
  HasAdmin FAIL CLOSED on any backend error (a KV quorum loss denies,
  never admits), per the audit's requirement for the decentralized store.

dev/feature_flags.json:
- Add the `decentralized` flag (OFF): sqliteStore default while off,
  jetstreamStore behind it. The membershipd boot wiring that selects the
  KV store is deliberately deferred to 0003e/0003f (the embedded-NATS
  authenticator<->store bootstrap is part of the session/deploy redesign);
  OFF keeps the single-node SQLite control plane unchanged.

Tests (DoD: golden + edges + error path):
- TestJetStreamStoreRoomsCRUD: encrypted room + owner + invited member
  round-trip through every room/member/key method, including latest-epoch
  resolution and rekey.
- TestJetStreamStoreUsers: add/get/authorize/list/revoke + admin gate,
  with case-insensitive key normalization and duplicate rejection.
- TestJetStreamStoreNotFound: ErrNotFound mapping for misses.
- TestJetStreamStoreIsAuthorizedFailClosed: NATS backend shut down ->
  IsAuthorized and HasAdmin both DENY within the bounded timeout.

The full existing suite stays green: sqliteStore is unchanged behavior.
This commit is contained in:
agent
2026-06-07 15:04:52 +02:00
parent 3230b31ade
commit 6b3ace1d39
10 changed files with 883 additions and 33 deletions
+77 -18
View File
@@ -13,6 +13,7 @@ package membership
import (
"database/sql"
"embed"
"errors"
"fmt"
"io/fs"
"sort"
@@ -26,6 +27,14 @@ import (
//go:embed migrations/*.sql
var migrationsFS embed.FS
// ErrNotFound is the store-agnostic "no such record" sentinel. Both backends
// (SQLite and JetStream KV) return it, wrapped, when a lookup misses, so callers
// distinguish "not invited / no key yet" from a genuine backend failure without
// depending on a specific driver's error (the SQLite store maps sql.ErrNoRows to
// it; the KV store maps a missing key to it). This is what lets the control
// plane stay storage-agnostic under the branch-by-abstraction of issue 0003b.
var ErrNotFound = errors.New("membership: not found")
// Member is a participant of a room with their published public keys.
type Member struct {
Endpoint string `json:"endpoint"`
@@ -45,14 +54,58 @@ type RoomInfo struct {
OwnerEndpoint string
}
// Store is the SQLite-backed membership/key store.
type Store struct {
// Store is the membership/key control-plane store: the authoritative source of
// room metadata, the member directory, per-epoch sealed room keys, and the bus
// user allowlist. It is an interface (branch-by-abstraction, issue 0003b) with
// two implementations: sqliteStore (the default, single-node, local SQLite) and
// jetstreamStore (rooms/members/keys/users on replicated JetStream KV, selected
// when the `decentralized` flag is on). Every lookup miss returns ErrNotFound
// (wrapped); every implementation MUST fail closed (IsAuthorized returns false
// on any backend error), so a KV quorum loss denies rather than admits.
type Store interface {
// Rooms / members / keys.
CreateRoom(info RoomInfo, ownerSignPub, ownerKexPub, ownerSealedKey []byte) error
GetRoom(roomID string) (RoomInfo, error)
AddMember(roomID string, m Member, epoch int, sealedKey []byte) error
GetMember(roomID, endpoint string) (Member, error)
ListMembers(roomID string) ([]Member, error)
ListRoomsForEndpoint(endpoint string) ([]RoomMembership, error)
GetSealedKey(roomID, endpoint string, epoch int) (int, []byte, error)
PutSealedKeys(roomID string, epoch int, keys map[string][]byte) error
BumpEpoch(roomID string, newEpoch int) error
RemoveMember(roomID, endpoint string) error
// Users (the bus allowlist).
AddUser(signPub, handle, role string) error
GetUser(signPub string) (User, error)
ListUsers() ([]User, error)
RevokeUser(signPub string) error
IsAuthorized(signPub string) bool
HasAdmin() bool
// Lifecycle.
Close() error
}
// sqliteStore is the SQLite-backed implementation of Store (the default,
// single-node backend). It stays the production default while the
// `decentralized` flag is off.
type sqliteStore struct {
db *sql.DB
}
// Open opens (creating if needed) the SQLite database at path and applies all
// embedded migrations idempotently.
func Open(path string) (*Store, error) {
// Open opens (creating if needed) the SQLite database at path, applies all
// embedded migrations idempotently, and returns it as a Store. It remains the
// default control-plane backend; the JetStream KV store is opened separately
// (OpenJetStream) when decentralization is enabled.
func Open(path string) (Store, error) {
return openSQLite(path)
}
// openSQLite is the concrete constructor, returning *sqliteStore so internal
// callers (e.g. the SQLite->KV migration) can use SQLite-specific helpers that
// are not part of the storage-agnostic Store interface.
func openSQLite(path string) (*sqliteStore, error) {
// _pragma busy_timeout avoids spurious "database is locked" under concurrent
// HTTP handlers; foreign_keys kept off — we manage referential integrity in code.
dsn := fmt.Sprintf("file:%s?_pragma=busy_timeout(5000)&_pragma=journal_mode(WAL)", path)
@@ -64,7 +117,7 @@ func Open(path string) (*Store, error) {
db.Close()
return nil, fmt.Errorf("membership: ping db: %w", err)
}
s := &Store{db: db}
s := &sqliteStore{db: db}
if err := s.applyMigrations(); err != nil {
db.Close()
return nil, err
@@ -73,11 +126,11 @@ func Open(path string) (*Store, error) {
}
// Close closes the underlying database.
func (s *Store) Close() error { return s.db.Close() }
func (s *sqliteStore) Close() error { return s.db.Close() }
// applyMigrations runs every embedded migration in lexical order, tolerating
// the "already applied" errors that SQLite's non-idempotent DDL produces.
func (s *Store) applyMigrations() error {
func (s *sqliteStore) applyMigrations() error {
files, err := fs.Glob(migrationsFS, "migrations/*.sql")
if err != nil {
return fmt.Errorf("membership: glob migrations: %w", err)
@@ -103,7 +156,7 @@ func nowRFC3339() string { return time.Now().UTC().Format(time.RFC3339Nano) }
// CreateRoom inserts a room at epoch 1, registers the owner as a member with
// role "owner", and stores the owner's sealed key for epoch 1. Idempotent
// inserts are not used: a duplicate room_id returns an error.
func (s *Store) CreateRoom(info RoomInfo, ownerSignPub, ownerKexPub, ownerSealedKey []byte) error {
func (s *sqliteStore) CreateRoom(info RoomInfo, ownerSignPub, ownerKexPub, ownerSealedKey []byte) error {
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf("membership: begin: %w", err)
@@ -142,7 +195,7 @@ func (s *Store) CreateRoom(info RoomInfo, ownerSignPub, ownerKexPub, ownerSealed
}
// GetRoom returns room metadata (including current epoch).
func (s *Store) GetRoom(roomID string) (RoomInfo, error) {
func (s *sqliteStore) GetRoom(roomID string) (RoomInfo, error) {
var info RoomInfo
var enc, per, sgn int
err := s.db.QueryRow(
@@ -158,7 +211,7 @@ func (s *Store) GetRoom(roomID string) (RoomInfo, error) {
// AddMember inserts a member at the given role and stores their sealed key for
// the supplied epoch.
func (s *Store) AddMember(roomID string, m Member, epoch int, sealedKey []byte) error {
func (s *sqliteStore) AddMember(roomID string, m Member, epoch int, sealedKey []byte) error {
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf("membership: begin: %w", err)
@@ -185,7 +238,7 @@ func (s *Store) AddMember(roomID string, m Member, epoch int, sealedKey []byte)
}
// GetMember returns a single member of a room.
func (s *Store) GetMember(roomID, endpoint string) (Member, error) {
func (s *sqliteStore) GetMember(roomID, endpoint string) (Member, error) {
var m Member
err := s.db.QueryRow(
`SELECT endpoint, role, sign_pub, kex_pub FROM members WHERE room_id = ? AND endpoint = ?`,
@@ -198,7 +251,7 @@ func (s *Store) GetMember(roomID, endpoint string) (Member, error) {
}
// ListMembers returns all members of a room ordered by endpoint.
func (s *Store) ListMembers(roomID string) ([]Member, error) {
func (s *sqliteStore) ListMembers(roomID string) ([]Member, error) {
rows, err := s.db.Query(
`SELECT endpoint, role, sign_pub, kex_pub FROM members WHERE room_id = ? ORDER BY endpoint`,
roomID,
@@ -230,7 +283,7 @@ type RoomMembership struct {
// ListRoomsForEndpoint returns every room the given endpoint is a member of,
// with the room's current metadata and the endpoint's role, ordered by room id.
// An endpoint that is in no rooms yields an empty slice (not an error).
func (s *Store) ListRoomsForEndpoint(endpoint string) ([]RoomMembership, error) {
func (s *sqliteStore) ListRoomsForEndpoint(endpoint string) ([]RoomMembership, error) {
rows, err := s.db.Query(
`SELECT r.room_id, r.subject, r.key_epoch, r.encrypt, r.persist, r.sign_msgs, r.owner_endpoint, m.role
FROM members m JOIN rooms r ON r.room_id = m.room_id
@@ -257,7 +310,7 @@ func (s *Store) ListRoomsForEndpoint(endpoint string) ([]RoomMembership, error)
// GetSealedKey returns the sealed room key for an endpoint at a given epoch.
// If epoch <= 0, the latest epoch for that endpoint is returned.
func (s *Store) GetSealedKey(roomID, endpoint string, epoch int) (int, []byte, error) {
func (s *sqliteStore) GetSealedKey(roomID, endpoint string, epoch int) (int, []byte, error) {
var ep int
var sealed []byte
var err error
@@ -275,6 +328,12 @@ func (s *Store) GetSealedKey(roomID, endpoint string, epoch int) (int, []byte, e
).Scan(&ep, &sealed)
}
if err != nil {
// Map "no such row" to the store-agnostic sentinel so the control plane
// can tell "not invited / no key yet" (-> 403 with a helpful message) from
// a genuine backend failure, the same way the KV store will.
if errors.Is(err, sql.ErrNoRows) {
return 0, nil, fmt.Errorf("membership: get sealed key %q/%q@%d: %w", roomID, endpoint, epoch, ErrNotFound)
}
return 0, nil, fmt.Errorf("membership: get sealed key %q/%q@%d: %w", roomID, endpoint, epoch, err)
}
return ep, sealed, nil
@@ -282,7 +341,7 @@ func (s *Store) GetSealedKey(roomID, endpoint string, epoch int) (int, []byte, e
// PutSealedKeys stores a batch of sealed keys for the given epoch (endpoint ->
// sealed bytes), upserting on conflict so a rekey can overwrite stale entries.
func (s *Store) PutSealedKeys(roomID string, epoch int, keys map[string][]byte) error {
func (s *sqliteStore) PutSealedKeys(roomID string, epoch int, keys map[string][]byte) error {
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf("membership: begin: %w", err)
@@ -301,7 +360,7 @@ func (s *Store) PutSealedKeys(roomID string, epoch int, keys map[string][]byte)
}
// BumpEpoch sets the room's current key_epoch to newEpoch.
func (s *Store) BumpEpoch(roomID string, newEpoch int) error {
func (s *sqliteStore) BumpEpoch(roomID string, newEpoch int) error {
if _, err := s.db.Exec(`UPDATE rooms SET key_epoch = ? WHERE room_id = ?`, newEpoch, roomID); err != nil {
return fmt.Errorf("membership: bump epoch %q->%d: %w", roomID, newEpoch, err)
}
@@ -310,7 +369,7 @@ func (s *Store) BumpEpoch(roomID string, newEpoch int) error {
// RemoveMember deletes a member from a room. Their sealed keys for past epochs
// are left intact (they encrypt only data that member could already read).
func (s *Store) RemoveMember(roomID, endpoint string) error {
func (s *sqliteStore) RemoveMember(roomID, endpoint string) error {
if _, err := s.db.Exec(`DELETE FROM members WHERE room_id = ? AND endpoint = ?`, roomID, endpoint); err != nil {
return fmt.Errorf("membership: remove member %q/%q: %w", roomID, endpoint, err)
}