// Package client is the unibus client library: the single API that every peer // (process worker, human chat UI, LLM agent) uses to talk to the bus. // // It hides the two planes behind one object: // - control plane (HTTP to membershipd): create/join rooms, invite, fetch // sealed keys, rekey on kick. // - data plane (NATS): publish/subscribe/request/reply of frames. // // In encrypted rooms it transparently seals/opens payloads with the room key K, // distributes K to invitees via sealed boxes, signs and verifies messages, and // rotates K to a new epoch on kick (forward secrecy). All crypto comes from the // fn-registry cybersecurity package; this library never reimplements primitives. package client import ( "bytes" "context" "crypto/rand" "crypto/tls" "encoding/base64" "encoding/hex" "encoding/json" "fmt" "io" "net/http" "strconv" "strings" "sync" "time" cs "fn-registry/functions/cybersecurity" "github.com/enmanuel/unibus/pkg/busauth" "github.com/enmanuel/unibus/pkg/frame" "github.com/enmanuel/unibus/pkg/membership" "github.com/enmanuel/unibus/pkg/room" "github.com/nats-io/nats.go" "github.com/nats-io/nats.go/jetstream" ) // Endpoint is the public identity of a peer. type Endpoint struct { ID string SignPub []byte KexPub []byte } // Client is a connected unibus peer. type Client struct { id cs.Identity endpoint string nc *nats.Conn js jetstream.JetStream // durable plane for rooms with Policy.Persist ctrlURLs []string // control-plane HTTP endpoints, tried in order (failover) http *http.Client // natsServers + natsOpts are retained so RefreshSession can rebuild the // data-plane connection (re-triggering the server's subject-ACL evaluation). natsServers []string natsOpts []nats.Option mu sync.RWMutex keyCache map[string]map[int][]byte // roomID -> epoch -> K signCache map[string][]byte // sender endpoint -> sign pub (for verification) } // Options configures how a client connects to the bus. The zero value is the // legacy behavior: a plain NATS connection with no nkey and no TLS — what dev // stacks and a not-yet-secured server expect. Secured deployments set these. type Options struct { // UseNkey authenticates the NATS connection with the peer's Ed25519 identity // reused as a NATS nkey. It MUST match the server: nats.go refuses to connect // with an nkey to a server that does not advertise nkey auth ("nkeys not // supported by the server"), so this is opt-in rather than always-on. UseNkey bool // TLS, when non-nil, secures the NATS (data plane) connection and pins the // server to this config's RootCAs (the bus's self-signed CA). Build it with // busauth.LoadCATLSConfig(caPath). Nil keeps the data plane plaintext. TLS *tls.Config // CtrlTLS, when non-nil, secures the HTTP control-plane connection and pins it // to this config's RootCAs. It is separate from TLS so the two planes can be // secured independently (a test may TLS one and not the other); production // sets both to the same CA via Connect. Nil keeps the control plane plaintext. CtrlTLS *tls.Config // NatsServers are ADDITIONAL NATS seed URLs for cluster failover (issue // 0003e), beyond the primary natsURL passed to the constructor. With more // than one server nats.go reconnects to a surviving node automatically when // the one a client is attached to dies, so a node loss is transparent. NatsServers []string // CtrlURLs are ADDITIONAL control-plane HTTP endpoints (one per node) beyond // the primary ctrlURL. Each request is tried against them in order until one // answers, so the control plane survives a node loss too. With the // decentralized KV store every node serves the same state, so any of them // can answer any request. CtrlURLs []string } // dedupNonEmpty returns the input with empty strings dropped and duplicates // removed, preserving order. Used to build the NATS seed list and control-plane // list from a primary URL plus optional extras without a redundant entry. func dedupNonEmpty(in []string) []string { seen := map[string]bool{} var out []string for _, s := range in { if s == "" || seen[s] { continue } seen[s] = true out = append(out, s) } return out } // New connects to NATS and records the control-plane URL with default Options // (no nkey, no TLS). The identity holds the peer's long-term keypairs. func New(natsURL, ctrlURL string, id cs.Identity) (*Client, error) { return NewWithOptions(natsURL, ctrlURL, id, Options{}) } // Connect is the single migration seam every peer (worker, chat, mobile, // gateway) uses to pick its security posture from one input: the CA path. With // a non-empty caPath it connects securely — TLS pinned to that CA plus nkey // authentication on the data plane — matching a bus running with bus-auth // enforce + bus-tls. With an empty caPath it falls back to the legacy plaintext, // no-nkey connection for local dev against an unsecured bus. The control-plane // HTTP requests are signed in both cases (that signing is unconditional). func Connect(natsURL, ctrlURL string, id cs.Identity, caPath string) (*Client, error) { if caPath == "" { return New(natsURL, ctrlURL, id) } // A CA implies the bus is TLS on BOTH planes. Refuse a plaintext control-plane // URL: signing gives integrity, not confidentiality, so sending metadata over // http:// when the operator provisioned a CA would silently leak it to a MITM // (audit H5). Force https rather than silently downgrade. if !strings.HasPrefix(ctrlURL, "https://") { return nil, fmt.Errorf("client: control-plane URL %q must be https:// when a CA is provided", ctrlURL) } tlsCfg, err := busauth.LoadCATLSConfig(caPath) if err != nil { return nil, fmt.Errorf("client: load CA %q: %w", caPath, err) } // Pin the same CA on both planes: nkey+TLS on NATS, TLS on the HTTP control plane. return NewWithOptions(natsURL, ctrlURL, id, Options{UseNkey: true, TLS: tlsCfg, CtrlTLS: tlsCfg}) } // NewWithOptions is New with explicit connection options (nkey auth, and, from // phase 0001d, TLS). It is the single place the data-plane connection is built, // so every peer (worker, chat, mobile, gateway) gets identical behavior by // passing the same Options. func NewWithOptions(natsURL, ctrlURL string, id cs.Identity, opts Options) (*Client, error) { // Seed list = primary + extras. With more than one seed, nats.go fails over // to a surviving node on disconnect; MaxReconnects(-1) keeps it retrying // indefinitely so a node coming back is rejoined rather than given up on. natsServers := dedupNonEmpty(append([]string{natsURL}, opts.NatsServers...)) natsOpts := []nats.Option{ nats.Name("unibus-client"), nats.MaxReconnects(-1), nats.ReconnectWait(250 * time.Millisecond), } if len(natsServers) > 1 { // Try every seed on the initial connect too, so startup tolerates one // seed being down. natsOpts = append(natsOpts, nats.RetryOnFailedConnect(true)) } if opts.UseNkey { nkeyPub, nkeySign, err := busauth.ClientNkey(id.SignPriv) if err != nil { return nil, fmt.Errorf("client: derive nkey: %w", err) } natsOpts = append(natsOpts, nats.Nkey(nkeyPub, nkeySign)) } if opts.TLS != nil { natsOpts = append(natsOpts, nats.Secure(opts.TLS)) } nc, err := nats.Connect(strings.Join(natsServers, ","), natsOpts...) if err != nil { return nil, fmt.Errorf("client: connect nats %v: %w", natsServers, err) } // JetStream context for the durable plane. Obtaining it does not require any // stream to exist yet and has no effect on cleartext/ephemeral rooms — those // keep using core nc.Publish / nc.Subscribe untouched. js, err := jetstream.New(nc) if err != nil { nc.Close() return nil, fmt.Errorf("client: init jetstream: %w", err) } // The control-plane HTTP client pins the bus CA when CtrlTLS is set, so an // https:// control plane is verified against the bus's own CA rather than the // system roots (audit H5). Without it the client stays plaintext for dev. httpClient := &http.Client{Timeout: 10 * time.Second} if opts.CtrlTLS != nil { httpClient.Transport = &http.Transport{TLSClientConfig: opts.CtrlTLS.Clone()} } return &Client{ id: id, endpoint: frame.EndpointID(id.SignPub), nc: nc, js: js, ctrlURLs: dedupNonEmpty(append([]string{ctrlURL}, opts.CtrlURLs...)), http: httpClient, natsServers: natsServers, natsOpts: natsOpts, keyCache: map[string]map[int][]byte{}, signCache: map[string][]byte{}, }, nil } // RefreshSession rebuilds the data-plane NATS connection so the server's // subject-ACL authenticator re-evaluates this peer's room membership (issue // 0003e, audit H4 residual). Call it after a membership change — a room you // created, were invited to, or joined — when the bus enforces per-subject // permissions, so the new room's subject becomes publishable and subscribable // (NATS freezes permissions at connect time, so the prior connection cannot see // the new room). // // It opens a fresh connection with the same seeds/options and swaps it in. // IMPORTANT: active subscriptions from the previous connection are dropped — // re-subscribe (client.Subscribe) to your rooms after calling this. The key and // signer caches are preserved. On a non-ACL bus this is a no-op-safe reconnect. func (c *Client) RefreshSession() error { nc, err := nats.Connect(strings.Join(c.natsServers, ","), c.natsOpts...) if err != nil { return fmt.Errorf("client: refresh session: reconnect nats: %w", err) } js, err := jetstream.New(nc) if err != nil { nc.Close() return fmt.Errorf("client: refresh session: init jetstream: %w", err) } old := c.nc c.mu.Lock() c.nc = nc c.js = js c.mu.Unlock() old.Close() return nil } // Endpoint returns this client's public identity. func (c *Client) Endpoint() Endpoint { return Endpoint{ID: c.endpoint, SignPub: c.id.SignPub, KexPub: c.id.KexPub} } // Close releases the NATS connection. func (c *Client) Close() error { c.nc.Close() return nil } // ConnectedServer returns the URL of the NATS node this client is currently // attached to (empty when disconnected). It is observability for cluster // failover: after a node dies, this reports the surviving node nats.go // reconnected to. IsConnected reports whether the data-plane link is up. func (c *Client) ConnectedServer() string { return c.nc.ConnectedUrl() } // IsConnected reports whether the NATS data-plane connection is currently up. func (c *Client) IsConnected() bool { return c.nc.IsConnected() } // ---- key cache ------------------------------------------------------------ func (c *Client) cacheKey(roomID string, epoch int, k []byte) { c.mu.Lock() defer c.mu.Unlock() m := c.keyCache[roomID] if m == nil { m = map[int][]byte{} c.keyCache[roomID] = m } m[epoch] = k } func (c *Client) getCachedKey(roomID string, epoch int) ([]byte, bool) { c.mu.RLock() defer c.mu.RUnlock() if m := c.keyCache[roomID]; m != nil { k, ok := m[epoch] return k, ok } return nil, false } // ---- control-plane HTTP helpers ------------------------------------------ func (c *Client) doJSON(method, path string, body, out any) error { var bodyBytes []byte if body != nil { b, err := json.Marshal(body) if err != nil { return fmt.Errorf("client: marshal request: %w", err) } bodyBytes = b } // Try each control-plane endpoint in order. A transport error (a dead node) // falls over to the next; an HTTP response (any status) is authoritative and // returned, since every node serves the same state. Each attempt is freshly // signed (new nonce), so a failed-over retry is never seen as a replay. var lastErr error for _, base := range c.ctrlURLs { req, err := c.newSignedRequestTo(base, method, path, bodyBytes) if err != nil { return err } if body != nil { req.Header.Set("Content-Type", "application/json") } resp, err := c.http.Do(req) if err != nil { lastErr = err continue // dead node: try the next control plane } defer resp.Body.Close() respBody, _ := io.ReadAll(resp.Body) if resp.StatusCode >= 300 { // Surface the server's structured {"error": "..."} message when present, // instead of leaking the raw HTTP envelope (method, path, status, body). var er struct { Error string `json:"error"` } if json.Unmarshal(respBody, &er) == nil && er.Error != "" { return fmt.Errorf("%s (HTTP %d)", er.Error, resp.StatusCode) } return fmt.Errorf("client: %s %s -> %d: %s", method, path, resp.StatusCode, string(respBody)) } if out != nil { if err := json.Unmarshal(respBody, out); err != nil { return fmt.Errorf("client: decode response: %w", err) } } return nil } return fmt.Errorf("client: %s %s: all control planes failed: %w", method, path, lastErr) } // signRequest signs the canonical bytes of req (req must already have its Sig // field cleared) with the client's Ed25519 key. It is symmetric with the // server's verifyOwnerSig. This is the PAYLOAD-level owner signature that // authorizes room operations (invite/rekey) by ownership — distinct from the // transport-level request signature applied by newSignedRequest below, which // authenticates the caller's identity on every request. func (c *Client) signRequest(req any) []byte { b, _ := json.Marshal(req) return cs.SignEd25519(c.id.SignPriv, b) } // newSignedRequestTo builds an *http.Request to the control-plane endpoint // `base` and attaches the transport authentication headers // (X-Unibus-Pub/Ts/Nonce/Sig) signing the canonical request bytes with this // peer's Ed25519 key. path is the request URI (path plus any query); body is the // raw request body (nil for GET). The server (membership.authenticate) verifies // these headers under the bus-auth flag. The signature covers method+path+ts+ // nonce+sha256(body), NOT the host, so the same request can be addressed to any // node — and each failover attempt mints a fresh nonce so it is never a replay. // // Signing happens on every request — including GETs — so that under enforce the // server can authenticate the caller and reject unregistered or revoked // identities uniformly. The canonical construction is the single source of truth // in membership.CanonicalRequest, shared by both sides. func (c *Client) newSignedRequestTo(base, method, path string, body []byte) (*http.Request, error) { var rdr io.Reader if body != nil { rdr = bytes.NewReader(body) } req, err := http.NewRequest(method, base+path, rdr) if err != nil { return nil, fmt.Errorf("client: new request: %w", err) } ts := strconv.FormatInt(time.Now().Unix(), 10) nonceRaw := make([]byte, 16) if _, err := rand.Read(nonceRaw); err != nil { return nil, fmt.Errorf("client: generate nonce: %w", err) } nonce := base64.StdEncoding.EncodeToString(nonceRaw) canonical := membership.CanonicalRequest(method, path, ts, nonce, body) sig := cs.SignEd25519(c.id.SignPriv, canonical) req.Header.Set("X-Unibus-Pub", hex.EncodeToString(c.id.SignPub)) req.Header.Set("X-Unibus-Ts", ts) req.Header.Set("X-Unibus-Nonce", nonce) req.Header.Set("X-Unibus-Sig", base64.StdEncoding.EncodeToString(sig)) return req, nil } // ---- mirror of server wire types (control plane) ------------------------- type policyJSON struct { Encrypt bool `json:"encrypt"` Persist bool `json:"persist"` SignMsgs bool `json:"sign_msgs"` } type endpointJSON struct { Endpoint string `json:"endpoint"` SignPub []byte `json:"sign_pub"` KexPub []byte `json:"kex_pub"` } type createRoomReq struct { Subject string `json:"subject"` Policy policyJSON `json:"policy"` Owner endpointJSON `json:"owner"` SealedKeySelf []byte `json:"sealed_key_self"` } type createRoomResp struct { RoomID string `json:"room_id"` } type inviteReq struct { By string `json:"by"` Sig []byte `json:"sig"` Member endpointJSON `json:"member"` SealedKey []byte `json:"sealed_key"` } type keyResp struct { Epoch int `json:"epoch"` SealedKey []byte `json:"sealed_key"` } type memberJSON struct { Endpoint string `json:"endpoint"` Role string `json:"role"` SignPub []byte `json:"sign_pub"` KexPub []byte `json:"kex_pub"` } type roomResp struct { Subject string `json:"subject"` Epoch int `json:"epoch"` Policy policyJSON `json:"policy"` } type rekeyKey struct { Endpoint string `json:"endpoint"` SealedKey []byte `json:"sealed_key"` } type rekeyReq struct { By string `json:"by"` Sig []byte `json:"sig"` NewEpoch int `json:"new_epoch"` Keys []rekeyKey `json:"keys"` Remove []string `json:"remove"` } type blobResp struct { Hash string `json:"hash"` } type memberRoomJSON struct { RoomID string `json:"room_id"` Subject string `json:"subject"` Epoch int `json:"epoch"` Policy policyJSON `json:"policy"` Role string `json:"role"` } // ---- room operations ------------------------------------------------------ // RoomRef is a room this peer belongs to, returned by ListMyRooms. It is the // unit of room discovery: a peer that was invited to a new room finds it here // and can then Join (fetch the sealed key) and Subscribe. type RoomRef struct { RoomID string Subject string Epoch int Policy room.Policy Role string } // ListMyRooms returns every room this peer is currently a member of. A peer // polls this to discover rooms it has been invited to (the control plane is // pull-based: there is no server push of invitations). func (c *Client) ListMyRooms() ([]RoomRef, error) { var resp []memberRoomJSON if err := c.doJSON("GET", "/members/"+c.endpoint+"/rooms", nil, &resp); err != nil { return nil, err } out := make([]RoomRef, 0, len(resp)) for _, r := range resp { out = append(out, RoomRef{ RoomID: r.RoomID, Subject: r.Subject, Epoch: r.Epoch, Policy: room.Policy{Encrypt: r.Policy.Encrypt, Persist: r.Policy.Persist, SignMsgs: r.Policy.SignMsgs}, Role: r.Role, }) } return out, nil } // newRoomKey returns 32 random bytes for a symmetric room key. func newRoomKey() ([]byte, error) { k := make([]byte, 32) if _, err := rand.Read(k); err != nil { return nil, fmt.Errorf("client: generate room key: %w", err) } return k, nil } // CreateRoom creates a room with the given subject and policy. For encrypted // rooms it generates K, seals K to itself, and caches K at epoch 1. func (c *Client) CreateRoom(subject string, p room.Policy) (string, error) { req := createRoomReq{ Subject: subject, Policy: policyJSON{Encrypt: p.Encrypt, Persist: p.Persist, SignMsgs: p.SignMsgs}, Owner: endpointJSON{Endpoint: c.endpoint, SignPub: c.id.SignPub, KexPub: c.id.KexPub}, } var k []byte if p.Encrypt { var err error if k, err = newRoomKey(); err != nil { return "", err } sealed, err := cs.SealKeyBox(c.id.KexPub, k) if err != nil { return "", fmt.Errorf("client: seal own key: %w", err) } req.SealedKeySelf = sealed } var resp createRoomResp if err := c.doJSON("POST", "/rooms", req, &resp); err != nil { return "", err } if p.Encrypt { c.cacheKey(resp.RoomID, 1, k) } // For persisted rooms, provision the durable JetStream stream up front so the // first publish (even before any subscriber exists) is captured for history. if p.Persist { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() if err := c.ensureStream(ctx, resp.RoomID, subject); err != nil { return "", err } } return resp.RoomID, nil } // Invite adds a member to a room. It seals the current-epoch room key to the // invitee's X25519 public key and signs the request as the owner. func (c *Client) Invite(roomID string, m Endpoint) error { info, err := c.fetchRoom(roomID) if err != nil { return err } var sealed []byte if info.Policy.Encrypt { k, ok := c.getCachedKey(roomID, info.Epoch) if !ok { return fmt.Errorf("client: invite: no cached key for room %s epoch %d", roomID, info.Epoch) } if sealed, err = cs.SealKeyBox(m.KexPub, k); err != nil { return fmt.Errorf("client: seal key for invitee: %w", err) } } req := inviteReq{ By: c.endpoint, Member: endpointJSON{Endpoint: m.ID, SignPub: m.SignPub, KexPub: m.KexPub}, SealedKey: sealed, } req.Sig = c.signRequest(req) // Sig is zero-valued at sign time return c.doJSON("POST", "/rooms/"+roomID+"/invite", req, nil) } // roomView is the resolved room metadata. type roomView struct { Subject string Epoch int Policy room.Policy } func (c *Client) fetchRoom(roomID string) (roomView, error) { var resp roomResp if err := c.doJSON("GET", "/rooms/"+roomID, nil, &resp); err != nil { return roomView{}, err } return roomView{ Subject: resp.Subject, Epoch: resp.Epoch, Policy: room.Policy{Encrypt: resp.Policy.Encrypt, Persist: resp.Policy.Persist, SignMsgs: resp.Policy.SignMsgs}, }, nil } // fetchKey retrieves and caches the room key K for the given epoch (epoch <= 0 // means latest). It opens the sealed box with the client's own X25519 keypair. func (c *Client) fetchKey(roomID string, epoch int) ([]byte, int, error) { if epoch > 0 { if k, ok := c.getCachedKey(roomID, epoch); ok { return k, epoch, nil } } path := fmt.Sprintf("/rooms/%s/key?endpoint=%s", roomID, c.endpoint) if epoch > 0 { path += fmt.Sprintf("&epoch=%d", epoch) } var resp keyResp if err := c.doJSON("GET", path, nil, &resp); err != nil { return nil, 0, err } k, err := cs.OpenKeyBox(c.id.KexPub, c.id.KexPriv, resp.SealedKey) if err != nil { return nil, 0, fmt.Errorf("client: open room key: %w", err) } c.cacheKey(roomID, resp.Epoch, k) return k, resp.Epoch, nil } // Join resolves room metadata and, for encrypted rooms, fetches and caches the // current room key. It does not subscribe to NATS (use Subscribe for that). func (c *Client) Join(roomID string) error { info, err := c.fetchRoom(roomID) if err != nil { return err } if info.Policy.Encrypt { if _, _, err := c.fetchKey(roomID, info.Epoch); err != nil { return err } } return nil } // signerPub returns the sign public key of a sender endpoint, fetching the // member list once and caching it. func (c *Client) signerPub(roomID, sender string) ([]byte, error) { c.mu.RLock() pub, ok := c.signCache[sender] c.mu.RUnlock() if ok { return pub, nil } var members []memberJSON if err := c.doJSON("GET", "/rooms/"+roomID+"/members", nil, &members); err != nil { return nil, err } c.mu.Lock() for _, m := range members { c.signCache[m.Endpoint] = m.SignPub } pub, ok = c.signCache[sender] c.mu.Unlock() if !ok { return nil, fmt.Errorf("client: no sign key for sender %q", sender) } return pub, nil } // ---- data plane: publish/subscribe --------------------------------------- // threadMeta carries the optional threading/reaction routing of a published // frame. The zero value yields a plain top-level message whose wire bytes are // identical to a pre-threading frame (the fields are omitempty). type threadMeta struct { threadID string // thread root message id replyTo string // message id being replied to / reacted to } // publishFrame is the single publish path shared by Publish, PublishReply and // React. It builds the envelope, seals+signs per the room policy, and routes // through JetStream (persisted rooms) or core NATS (ephemeral rooms). The only // thing the callers vary is the frame type and the threading metadata. func (c *Client) publishFrame(roomID string, ftype frame.FrameType, plaintext []byte, tm threadMeta) error { info, err := c.fetchRoom(roomID) if err != nil { return err } f := frame.Frame{ Type: ftype, Subject: info.Subject, Sender: c.endpoint, MsgID: newULID(), Epoch: info.Epoch, ThreadID: tm.threadID, ReplyTo: tm.replyTo, } if info.Policy.Encrypt { k, ep, err := c.fetchKey(roomID, info.Epoch) if err != nil { return err } nonce, ct, err := cs.SealAEAD(k, plaintext, []byte(info.Subject)) if err != nil { return fmt.Errorf("client: seal payload: %w", err) } f.Epoch, f.Nonce, f.Payload = ep, nonce, ct } else { f.Payload = plaintext } if info.Policy.SignMsgs { f.Sig = cs.SignEd25519(c.id.SignPriv, f.SigningBytes()) } b, err := f.Marshal() if err != nil { return fmt.Errorf("client: marshal frame: %w", err) } // Persisted rooms go through JetStream (durable, acked); ephemeral rooms keep // the exact core-NATS publish they had before. if info.Policy.Persist { return c.publishPersistent(roomID, info.Subject, b) } return c.nc.Publish(info.Subject, b) } // Publish sends plaintext to a room. For encrypted rooms it seals the payload // with the current K using the subject as AEAD additional-authenticated-data; // for signed rooms it attaches an Ed25519 signature. func (c *Client) Publish(roomID string, plaintext []byte) error { return c.publishFrame(roomID, frame.PUB, plaintext, threadMeta{}) } // PublishReply sends plaintext as a reply inside a thread. replyTo is the id of // the message being replied to; threadID is the thread root — pass replyTo when // you are starting a new thread off a top-level message, or the existing // ThreadID to keep replying within one. Encryption and signing are identical to // Publish; the threading metadata rides the cleartext envelope. Receivers read // Frame.ReplyTo / Frame.ThreadID to render the conversation tree. func (c *Client) PublishReply(roomID string, plaintext []byte, replyTo, threadID string) error { return c.publishFrame(roomID, frame.PUB, plaintext, threadMeta{threadID: threadID, replyTo: replyTo}) } // React publishes a reaction (emoji/shortcode) to a target message. The reaction // content travels in the payload, so it is sealed exactly like a normal message // and stays confidential in E2E rooms. Receivers dispatch on Frame.Type == // frame.REACT and read Frame.ReplyTo for the message being reacted to. func (c *Client) React(roomID, targetMsgID, emoji string) error { return c.publishFrame(roomID, frame.REACT, []byte(emoji), threadMeta{replyTo: targetMsgID}) } // Sub is a transport-agnostic handle to an active room subscription. It wraps // either a core NATS subscription (ephemeral rooms) or a JetStream durable // consumer (persisted rooms) behind a single Unsubscribe() method, so callers // (and tests) treat both planes uniformly. For a persisted room, Unsubscribe // stops local delivery but leaves the durable consumer's ack position on the // server, so a later Subscribe with the same peer resumes (offline replay). type Sub struct { nsub *nats.Subscription cc jetstream.ConsumeContext } // Unsubscribe stops delivery for this subscription. The error return keeps the // signature compatible with *nats.Subscription's Unsubscribe. func (s *Sub) Unsubscribe() error { if s == nil { return nil } if s.nsub != nil { return s.nsub.Unsubscribe() } if s.cc != nil { s.cc.Stop() } return nil } // Subscribe subscribes to a room and invokes handler for each message with the // decoded frame and (for encrypted rooms) the decrypted plaintext. Signature // verification and epoch-driven key refresh happen transparently. Messages that // fail verification or decryption are dropped (handler not called). // // For ephemeral rooms (Policy.Persist == false) this is a plain core-NATS // subscription, identical to before. For persisted rooms it binds a per-peer // durable JetStream consumer with DeliverAll, so a late joiner receives the // full history (scrollback) and a reconnecting peer resumes from its last ack // (offline replay). The frame-decode / verify / decrypt logic is shared between // both planes via processFrame. func (c *Client) Subscribe(roomID string, handler func(f frame.Frame, plaintext []byte)) (*Sub, error) { info, err := c.fetchRoom(roomID) if err != nil { return nil, err } deliver := func(data []byte) { c.processFrame(roomID, info, data, handler) } if info.Policy.Persist { cc, err := c.subscribePersistent(roomID, info.Subject, deliver) if err != nil { return nil, err } return &Sub{cc: cc}, nil } nsub, err := c.nc.Subscribe(info.Subject, func(msg *nats.Msg) { deliver(msg.Data) }) if err != nil { return nil, err } return &Sub{nsub: nsub}, nil } // processFrame decodes one wire message, verifies its signature and (for // encrypted rooms) decrypts its inline payload, then invokes handler. Messages // that fail verification or decryption are dropped (handler not called). This // is the single code path shared by the ephemeral and persisted subscribe // planes so their decode/verify/decrypt semantics never drift apart. func (c *Client) processFrame(roomID string, info roomView, data []byte, handler func(f frame.Frame, plaintext []byte)) { f, err := frame.Unmarshal(data) if err != nil { return } // A room with SignMsgs REQUIRES a signature, so an unsigned frame is // unauthenticated and must be dropped — not silently accepted. The previous // `&& f.Sig != nil` guard verified the signature only when one was present, so // an attacker with data-plane access could publish a frame with Sig==nil and a // forged Sender and have the receiver accept it as authentic in a room that // demands signatures (audit N3, report 0006). Requiring the signature first // closes that spoof. if info.Policy.SignMsgs { if f.Sig == nil { return // signature required by room policy but absent: drop } pub, err := c.signerPub(roomID, f.Sender) if err != nil || !cs.VerifyEd25519(pub, f.SigningBytes(), f.Sig) { return // unauthenticated frame: drop } } plaintext := f.Payload // Decrypt only inline payloads. Media frames carry their bytes in the // blob store (referenced by f.Blob) with the nonce in BlobRef.Nonce; // the handler decrypts those on demand via FetchMedia. A frame with an // inline ciphertext always has a non-empty Nonce. if info.Policy.Encrypt && len(f.Nonce) > 0 && len(f.Payload) > 0 { k, ok := c.getCachedKey(roomID, f.Epoch) if !ok { // Sender used a newer (or unknown) epoch: refresh K from the control plane. k, _, err = c.fetchKey(roomID, f.Epoch) if err != nil { // Cannot obtain the key for this epoch. For persisted history this is // expected and NOT fatal: a member invited at a later epoch reading // older history (or a kicked peer) simply cannot read those frames. // Skip this message and keep processing the rest (megolm semantics: // new members do not read prior history). return } } pt, err := cs.OpenAEAD(k, f.Nonce, f.Payload, []byte(info.Subject)) if err != nil { return // cannot decrypt (wrong epoch/kicked): skip this frame, continue } plaintext = pt } handler(f, plaintext) } // ---- request/reply (cleartext v1) ---------------------------------------- // Request performs a NATS request/reply on subject (cleartext in v1, intended // for rpc.* subjects). func (c *Client) Request(subject string, plaintext []byte, timeout time.Duration) ([]byte, error) { msg, err := c.nc.Request(subject, plaintext, timeout) if err != nil { return nil, fmt.Errorf("client: request %q: %w", subject, err) } return msg.Data, nil } // Reply registers a responder for subject; handler receives the request bytes // and returns the reply bytes (cleartext in v1). func (c *Client) Reply(subject string, handler func([]byte) []byte) (*nats.Subscription, error) { return c.nc.Subscribe(subject, func(msg *nats.Msg) { if msg.Reply == "" { return } _ = c.nc.Publish(msg.Reply, handler(msg.Data)) }) } // ---- kick / forward secrecy ---------------------------------------------- // Kick removes a member and rotates the room key to a new epoch, re-sealing it // only for the remaining members. The kicked member can no longer decrypt // messages published after the rotation (forward secrecy). func (c *Client) Kick(roomID string, endpoint string) error { info, err := c.fetchRoom(roomID) if err != nil { return err } newEpoch := info.Epoch + 1 if !info.Policy.Encrypt { // Unencrypted room: just remove the member, no key rotation needed. req := rekeyReq{By: c.endpoint, NewEpoch: newEpoch, Remove: []string{endpoint}} req.Sig = c.signRequest(req) return c.doJSON("POST", "/rooms/"+roomID+"/rekey", req, nil) } kPrime, err := newRoomKey() if err != nil { return err } var members []memberJSON if err := c.doJSON("GET", "/rooms/"+roomID+"/members", nil, &members); err != nil { return err } var keys []rekeyKey for _, m := range members { if m.Endpoint == endpoint { continue // exclude the kicked member } sealed, err := cs.SealKeyBox(m.KexPub, kPrime) if err != nil { return fmt.Errorf("client: seal new key for %q: %w", m.Endpoint, err) } keys = append(keys, rekeyKey{Endpoint: m.Endpoint, SealedKey: sealed}) } req := rekeyReq{By: c.endpoint, NewEpoch: newEpoch, Keys: keys, Remove: []string{endpoint}} req.Sig = c.signRequest(req) if err := c.doJSON("POST", "/rooms/"+roomID+"/rekey", req, nil); err != nil { return err } c.cacheKey(roomID, newEpoch, kPrime) return nil } // ---- media (object store) ------------------------------------------------- // PublishMedia encrypts data with the room key, uploads the ciphertext to the // blob store, and publishes a frame carrying only a BlobRef. Receivers whose // handler sees f.Blob != nil should GET /blobs/{hash} and OpenAEAD it. func (c *Client) PublishMedia(roomID string, data []byte) error { info, err := c.fetchRoom(roomID) if err != nil { return err } f := frame.Frame{ Type: frame.PUB, Subject: info.Subject, Sender: c.endpoint, MsgID: newULID(), Epoch: info.Epoch, } var ciphertext []byte var nonce []byte if info.Policy.Encrypt { k, ep, err := c.fetchKey(roomID, info.Epoch) if err != nil { return err } nonce, ciphertext, err = cs.SealAEAD(k, data, []byte(info.Subject)) if err != nil { return fmt.Errorf("client: seal media: %w", err) } f.Epoch = ep } else { ciphertext = data } hash, err := c.putBlob(ciphertext) if err != nil { return err } f.Blob = &frame.BlobRef{Hash: hash, Nonce: nonce, Size: int64(len(ciphertext))} if info.Policy.SignMsgs { f.Sig = cs.SignEd25519(c.id.SignPriv, f.SigningBytes()) } b, err := f.Marshal() if err != nil { return fmt.Errorf("client: marshal media frame: %w", err) } // Persisted rooms route the media frame (the BlobRef envelope, not the blob // bytes) through JetStream so it appears in history/replay like any other // frame; ephemeral rooms keep the original core-NATS publish. if info.Policy.Persist { return c.publishPersistent(roomID, info.Subject, b) } return c.nc.Publish(info.Subject, b) } // FetchMedia downloads and (for encrypted rooms) decrypts a blob referenced by // a received frame. It is a convenience for handlers that see f.Blob != nil. func (c *Client) FetchMedia(roomID string, f frame.Frame) ([]byte, error) { if f.Blob == nil { return nil, fmt.Errorf("client: frame has no blob ref") } ct, err := c.getBlob(f.Blob.Hash) if err != nil { return nil, err } info, err := c.fetchRoom(roomID) if err != nil { return nil, err } if !info.Policy.Encrypt { return ct, nil } k, ok := c.getCachedKey(roomID, f.Epoch) if !ok { if k, _, err = c.fetchKey(roomID, f.Epoch); err != nil { return nil, err } } return cs.OpenAEAD(k, f.Blob.Nonce, ct, []byte(info.Subject)) } func (c *Client) putBlob(ciphertext []byte) (string, error) { var lastErr error for _, base := range c.ctrlURLs { req, err := c.newSignedRequestTo(base, "POST", "/blobs", ciphertext) if err != nil { return "", err } req.Header.Set("Content-Type", "application/octet-stream") resp, err := c.http.Do(req) if err != nil { lastErr = err continue // dead node: try the next control plane } defer resp.Body.Close() body, _ := io.ReadAll(resp.Body) if resp.StatusCode >= 300 { return "", fmt.Errorf("client: put blob -> %d: %s", resp.StatusCode, string(body)) } var r blobResp if err := json.Unmarshal(body, &r); err != nil { return "", fmt.Errorf("client: decode blob resp: %w", err) } return r.Hash, nil } return "", fmt.Errorf("client: put blob: all control planes failed: %w", lastErr) } func (c *Client) getBlob(hash string) ([]byte, error) { var lastErr error for _, base := range c.ctrlURLs { req, err := c.newSignedRequestTo(base, "GET", "/blobs/"+hash, nil) if err != nil { return nil, err } resp, err := c.http.Do(req) if err != nil { lastErr = err continue // dead node: try the next control plane } defer resp.Body.Close() if resp.StatusCode >= 300 { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("client: get blob -> %d: %s", resp.StatusCode, string(body)) } return io.ReadAll(resp.Body) } return nil, fmt.Errorf("client: get blob: all control planes failed: %w", lastErr) }