Compare commits
93 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5af945778b | |||
| f92973f5fe | |||
| 380d795ffb | |||
| caf005f04b | |||
| 9787c218ac | |||
| 926b8e96af | |||
| ae39e35fb4 | |||
| 48a3d6be33 | |||
| 24ff45ca7e | |||
| b8201a82cd | |||
| 3a33656cac | |||
| 2f5b372a80 | |||
| 32bec75665 | |||
| 9b96537aa6 | |||
| 18ee7c469b | |||
| e9ad719424 | |||
| d1e1a478f8 | |||
| cacf608fde | |||
| a9c245d468 | |||
| 8b6a01d280 | |||
| 5df99fa4c4 | |||
| df3b62a601 | |||
| 6976537842 | |||
| a4bbe8209b | |||
| 87ef52cc80 | |||
| a2ec78c81d | |||
| d01da9d396 | |||
| db8618ddc3 | |||
| e7d59fd01d | |||
| 0f79708338 | |||
| ef3af6dfd1 | |||
| 88b47912bd | |||
| a3ac58fb70 | |||
| fb0291ad8a | |||
| d821bc1794 | |||
| da420513b6 | |||
| 96abb75a2e | |||
| 37c778ca9a | |||
| c6ad63059f | |||
| 649dc9e244 | |||
| d6e668b984 | |||
| 94e7ced1ef | |||
| 9013ea5e33 | |||
| b8c9b2b652 | |||
| 6b3ace1d39 | |||
| 3230b31ade | |||
| c90f145a05 | |||
| 618f6b61da | |||
| d483c90356 | |||
| 1bcca987a4 | |||
| 0aa2caae43 | |||
| 957b728160 | |||
| 07f4af817e | |||
| 0d56c3c81d | |||
| fb6c796059 | |||
| e502b16675 | |||
| 47ff74d837 | |||
| b81e5f26f1 | |||
| d742f91881 | |||
| 30577145ce | |||
| 01e2ee1aa0 | |||
| e7bdcc978c | |||
| 60d6a86655 | |||
| bcd02716d5 | |||
| 484a07d6fd | |||
| 04e27518af | |||
| 6b0916f1fa | |||
| 87dbc421cd | |||
| b647779521 | |||
| 74c8d4f941 | |||
| 2ccd11b68c | |||
| 75939a192c | |||
| 1b56f14c20 | |||
| 2786ae2dde | |||
| 6d3d6d2562 | |||
| 217daae472 | |||
| 00058ea0af | |||
| 1630f6f163 | |||
| b09bafe242 | |||
| 413dd61041 | |||
| 89e0d0e64a | |||
| 2130eaa44d | |||
| 567e604fc7 | |||
| 0f8a38d62b | |||
| e0ef3a27cc | |||
| 3e39e23fe0 | |||
| e9711bf74b | |||
| 822982b71b | |||
| ddc6cabc24 | |||
| 0d7ab22d4a | |||
| c5387028e0 | |||
| 7de05c8591 | |||
| 9a915839c8 |
@@ -14,3 +14,7 @@ worker.id
|
|||||||
/chat
|
/chat
|
||||||
*.exe
|
*.exe
|
||||||
registry.db
|
registry.db
|
||||||
|
|
||||||
|
# local workspace (no committear: replace absoluto al registry)
|
||||||
|
go.work
|
||||||
|
go.work.sum
|
||||||
|
|||||||
+13
-10
@@ -1,12 +1,15 @@
|
|||||||
.gradle/
|
# Android / Gradle build artifacts
|
||||||
build/
|
|
||||||
local.properties
|
|
||||||
*.iml
|
*.iml
|
||||||
.idea/
|
.gradle/
|
||||||
captures/
|
/local.properties
|
||||||
.cxx/
|
/.idea
|
||||||
|
.DS_Store
|
||||||
|
/build
|
||||||
|
/app/build
|
||||||
|
/captures
|
||||||
|
.externalNativeBuild
|
||||||
|
.cxx
|
||||||
|
|
||||||
# The gomobile binding is a build artifact (~24 MB). Regenerate it from ../mobile
|
# binding gomobile regenerable (38MB): ver mobile/gen_aar.sh
|
||||||
# with `gomobile bind` (see README.md); it is not versioned.
|
/app/libs/*.aar
|
||||||
app/libs/*.aar
|
/app/libs/*-sources.jar
|
||||||
app/libs/*.jar
|
|
||||||
|
|||||||
@@ -1,83 +0,0 @@
|
|||||||
# unibus · app Android
|
|
||||||
|
|
||||||
Cliente móvil nativo de unibus. La app no habla con un gateway: embebe un **peer
|
|
||||||
real** del bus a través del binding gomobile `mobile/unibus.go`, de modo que el
|
|
||||||
cifrado extremo a extremo corre **en el dispositivo**. Cada teléfono es un peer
|
|
||||||
de primera clase del bus, igual que cualquier peer Go.
|
|
||||||
|
|
||||||
## Arquitectura
|
|
||||||
|
|
||||||
```
|
|
||||||
Kotlin/Compose UI ──> BusViewModel ──> com.unibus.core.mobile.Session (.aar)
|
|
||||||
│ (NATS data plane + E2E crypto, en Go)
|
|
||||||
▼
|
|
||||||
membershipd (control plane HTTP :8470)
|
|
||||||
NATS (data plane :4250)
|
|
||||||
```
|
|
||||||
|
|
||||||
- `BusViewModel` traduce intents de UI en llamadas al binding. Las llamadas de red
|
|
||||||
(`newSession`, `createRoom`, `join`, `publish`) corren en `Dispatchers.IO`.
|
|
||||||
- Los frames entrantes llegan por `FrameListener.onFrame` en una goroutine NATS
|
|
||||||
(hilo JNI); se publican en un `StateFlow` (thread-safe) que Compose recolecta en
|
|
||||||
el hilo principal.
|
|
||||||
|
|
||||||
## Requisitos
|
|
||||||
|
|
||||||
- Android SDK (compileSdk 34), NDK (para regenerar el `.aar`), JDK 17.
|
|
||||||
- El binding `app/libs/unibus.aar` (no versionado: es un artefacto de ~24 MB).
|
|
||||||
|
|
||||||
## 1. Generar el binding (.aar)
|
|
||||||
|
|
||||||
Desde la raíz del repo de la app (`projects/message_bus/apps/unibus`):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export ANDROID_HOME=$HOME/android-sdk
|
|
||||||
export ANDROID_NDK_HOME=$HOME/android-sdk/ndk/26.3.11579264
|
|
||||||
mkdir -p android/app/libs
|
|
||||||
gomobile bind -target=android -androidapi 21 -javapkg com.unibus.core \
|
|
||||||
-o android/app/libs/unibus.aar ./mobile
|
|
||||||
```
|
|
||||||
|
|
||||||
Esto produce `unibus.aar` con la clase estática `com.unibus.core.mobile.Mobile`
|
|
||||||
(`generateIdentity`, `newSession`) y los tipos `Session` y `FrameListener`.
|
|
||||||
|
|
||||||
## 2. Compilar el APK
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd android
|
|
||||||
export JAVA_HOME=$HOME/android-sdk/jdk-17/jdk-17.0.19+10
|
|
||||||
export ANDROID_HOME=$HOME/android-sdk
|
|
||||||
./gradlew assembleDebug
|
|
||||||
# APK: app/build/outputs/apk/debug/app-debug.apk
|
|
||||||
```
|
|
||||||
|
|
||||||
`local.properties` apunta a `sdk.dir`; ajústalo si tu SDK está en otra ruta.
|
|
||||||
|
|
||||||
## 3. Arrancar el bus y probar en el emulador
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 1. En el PC: control plane + NATS embebido (HTTP :8470, NATS :4250)
|
|
||||||
cd projects/message_bus/apps/unibus && go run ./cmd/membershipd
|
|
||||||
|
|
||||||
# 2. Emulador Pixel_API34
|
|
||||||
$ANDROID_HOME/emulator/emulator -avd Pixel_API34 &
|
|
||||||
|
|
||||||
# 3. Instalar + lanzar
|
|
||||||
adb install -r app/build/outputs/apk/debug/app-debug.apk
|
|
||||||
adb shell am start -n com.unibus.app/.MainActivity
|
|
||||||
```
|
|
||||||
|
|
||||||
En la pantalla de conexión, desde el emulador el host del PC es `10.0.2.2`:
|
|
||||||
|
|
||||||
- **Host (control plane):** `http://10.0.2.2:8470`
|
|
||||||
- **NATS (data plane):** `nats://10.0.2.2:4250`
|
|
||||||
|
|
||||||
Para un teléfono físico en la misma LAN, usa la IP LAN del PC en lugar de
|
|
||||||
`10.0.2.2`.
|
|
||||||
|
|
||||||
## Notas
|
|
||||||
|
|
||||||
- La identidad del peer se guarda en `filesDir/peer.id` (claves privadas
|
|
||||||
Ed25519 + X25519). No se sincroniza ni se respalda.
|
|
||||||
- Una room creada en modo "cifrar (E2E)" usa la política Matrix (cifrada,
|
|
||||||
persistida, firmada); en modo normal usa NATS cleartext.
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
plugins {
|
plugins {
|
||||||
id("com.android.application")
|
id("com.android.application")
|
||||||
id("org.jetbrains.kotlin.android")
|
id("org.jetbrains.kotlin.android")
|
||||||
id("org.jetbrains.kotlin.plugin.compose")
|
id("org.jetbrains.kotlin.plugin.serialization")
|
||||||
}
|
}
|
||||||
|
|
||||||
android {
|
android {
|
||||||
@@ -14,10 +14,21 @@ android {
|
|||||||
targetSdk = 34
|
targetSdk = 34
|
||||||
versionCode = 1
|
versionCode = 1
|
||||||
versionName = "0.1.0"
|
versionName = "0.1.0"
|
||||||
|
// The unibus.aar ships native libgojni.so for these ABIs. Limit the APK
|
||||||
|
// to the desktop/emulator + phone ABIs we actually target.
|
||||||
|
ndk {
|
||||||
|
abiFilters += listOf("arm64-v8a", "armeabi-v7a", "x86", "x86_64")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
buildFeatures {
|
buildTypes {
|
||||||
compose = true
|
release {
|
||||||
|
isMinifyEnabled = false
|
||||||
|
proguardFiles(
|
||||||
|
getDefaultProguardFile("proguard-android-optimize.txt"),
|
||||||
|
"proguard-rules.pro",
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
compileOptions {
|
compileOptions {
|
||||||
@@ -27,17 +38,13 @@ android {
|
|||||||
kotlinOptions {
|
kotlinOptions {
|
||||||
jvmTarget = "17"
|
jvmTarget = "17"
|
||||||
}
|
}
|
||||||
|
buildFeatures {
|
||||||
buildTypes {
|
compose = true
|
||||||
getByName("release") {
|
}
|
||||||
isMinifyEnabled = false
|
composeOptions {
|
||||||
proguardFiles(
|
// Compose compiler matching Kotlin 1.9.24.
|
||||||
getDefaultProguardFile("proguard-android-optimize.txt"),
|
kotlinCompilerExtensionVersion = "1.5.14"
|
||||||
"proguard-rules.pro",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
packaging {
|
packaging {
|
||||||
resources {
|
resources {
|
||||||
excludes += "/META-INF/{AL2.0,LGPL2.1}"
|
excludes += "/META-INF/{AL2.0,LGPL2.1}"
|
||||||
@@ -46,21 +53,23 @@ android {
|
|||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
// The unibus gomobile binding: a real bus peer that does NATS + E2E crypto
|
// gomobile binding over pkg/client (real end-to-end crypto on device).
|
||||||
// on the device. All protocol logic lives here, shared with every other peer.
|
|
||||||
implementation(files("libs/unibus.aar"))
|
implementation(files("libs/unibus.aar"))
|
||||||
|
|
||||||
val composeBom = platform("androidx.compose:compose-bom:2024.09.03")
|
implementation("androidx.core:core-ktx:1.13.1")
|
||||||
|
implementation("androidx.activity:activity-compose:1.9.0")
|
||||||
|
implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.8.2")
|
||||||
|
implementation("androidx.lifecycle:lifecycle-viewmodel-compose:2.8.2")
|
||||||
|
|
||||||
|
val composeBom = platform("androidx.compose:compose-bom:2024.06.00")
|
||||||
implementation(composeBom)
|
implementation(composeBom)
|
||||||
implementation("androidx.compose.ui:ui")
|
implementation("androidx.compose.ui:ui")
|
||||||
implementation("androidx.compose.ui:ui-graphics")
|
implementation("androidx.compose.ui:ui-graphics")
|
||||||
implementation("androidx.compose.ui:ui-tooling-preview")
|
|
||||||
implementation("androidx.compose.material3:material3")
|
implementation("androidx.compose.material3:material3")
|
||||||
implementation("androidx.compose.material:material-icons-extended")
|
implementation("androidx.compose.material:material-icons-extended")
|
||||||
implementation("androidx.activity:activity-compose:1.9.2")
|
implementation("androidx.compose.ui:ui-tooling-preview")
|
||||||
implementation("androidx.lifecycle:lifecycle-viewmodel-compose:2.8.6")
|
|
||||||
implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.8.6")
|
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.8.1")
|
|
||||||
|
|
||||||
debugImplementation("androidx.compose.ui:ui-tooling")
|
debugImplementation("androidx.compose.ui:ui-tooling")
|
||||||
|
|
||||||
|
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.3")
|
||||||
|
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.8.1")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,12 @@
|
|||||||
|
# libs/
|
||||||
|
|
||||||
|
`unibus.aar` (binding gomobile sobre `pkg/client`, ~38 MB con `libgojni.so` para
|
||||||
|
4 ABIs) vive aquí pero **no se versiona** — es un artefacto de build reproducible.
|
||||||
|
|
||||||
|
Regenéralo con:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
../../mobile/gen_aar.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
(desde la raíz del repo: `./mobile/gen_aar.sh`). Requiere Go + gomobile + Android NDK.
|
||||||
Vendored
+3
-3
@@ -1,4 +1,4 @@
|
|||||||
# gomobile generates JNI-bound classes under com.unibus.core.mobile and go.*.
|
# gomobile binding: keep the generated Go<->Java bridge classes intact so the
|
||||||
# They are reached from native code, so keep them intact even when minifying.
|
# JNI layer can find them by name at runtime.
|
||||||
-keep class com.unibus.core.mobile.** { *; }
|
|
||||||
-keep class go.** { *; }
|
-keep class go.** { *; }
|
||||||
|
-keep class com.unibus.core.mobile.** { *; }
|
||||||
|
|||||||
@@ -1,25 +1,25 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
||||||
|
|
||||||
|
<!-- The bus is reached over the network (NATS data plane + control plane). -->
|
||||||
<uses-permission android:name="android.permission.INTERNET" />
|
<uses-permission android:name="android.permission.INTERNET" />
|
||||||
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
|
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
|
||||||
|
|
||||||
<application
|
<application
|
||||||
android:allowBackup="true"
|
android:allowBackup="true"
|
||||||
android:label="@string/app_name"
|
android:label="unibus"
|
||||||
android:supportsRtl="true"
|
android:icon="@mipmap/ic_launcher"
|
||||||
android:usesCleartextTraffic="true"
|
android:theme="@style/Theme.Unibus"
|
||||||
android:theme="@style/Theme.Unibus">
|
android:supportsRtl="true">
|
||||||
|
|
||||||
<activity
|
<activity
|
||||||
android:name=".MainActivity"
|
android:name=".MainActivity"
|
||||||
android:exported="true"
|
android:exported="true"
|
||||||
android:label="@string/app_name"
|
android:theme="@style/Theme.Unibus">
|
||||||
android:windowSoftInputMode="adjustResize">
|
|
||||||
<intent-filter>
|
<intent-filter>
|
||||||
<action android:name="android.intent.action.MAIN" />
|
<action android:name="android.intent.action.MAIN" />
|
||||||
<category android:name="android.intent.category.LAUNCHER" />
|
<category android:name="android.intent.category.LAUNCHER" />
|
||||||
</intent-filter>
|
</intent-filter>
|
||||||
</activity>
|
</activity>
|
||||||
</application>
|
</application>
|
||||||
|
|
||||||
</manifest>
|
</manifest>
|
||||||
|
|||||||
@@ -0,0 +1,88 @@
|
|||||||
|
package com.unibus.app
|
||||||
|
|
||||||
|
import androidx.compose.runtime.getValue
|
||||||
|
import androidx.compose.runtime.mutableStateOf
|
||||||
|
import androidx.compose.runtime.setValue
|
||||||
|
import androidx.lifecycle.ViewModel
|
||||||
|
import androidx.lifecycle.viewModelScope
|
||||||
|
import com.unibus.app.data.Message
|
||||||
|
import com.unibus.app.data.MockUnibusRepository
|
||||||
|
import com.unibus.app.data.Room
|
||||||
|
import com.unibus.app.data.UnibusRepository
|
||||||
|
import com.unibus.app.data.User
|
||||||
|
import kotlinx.coroutines.launch
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estado de la app. Orquesta el [UnibusRepository] (mock por defecto) y expone
|
||||||
|
* estado observable a Compose. Cambiar el repo por [com.unibus.app.data.BindingUnibusRepository]
|
||||||
|
* conecta la UI al bus real sin tocar las pantallas.
|
||||||
|
*/
|
||||||
|
class AppViewModel(
|
||||||
|
private val repo: UnibusRepository,
|
||||||
|
) : ViewModel() {
|
||||||
|
|
||||||
|
// Constructor no-arg para que androidx `viewModel()` lo instancie por
|
||||||
|
// reflexión. Por defecto usa el repositorio mock (iteración de diseño).
|
||||||
|
constructor() : this(MockUnibusRepository())
|
||||||
|
|
||||||
|
var user by mutableStateOf<User?>(null)
|
||||||
|
private set
|
||||||
|
var rooms by mutableStateOf<List<Room>>(emptyList())
|
||||||
|
private set
|
||||||
|
var activeRoomId by mutableStateOf<String?>(null)
|
||||||
|
private set
|
||||||
|
var messages by mutableStateOf<List<Message>>(emptyList())
|
||||||
|
private set
|
||||||
|
var connecting by mutableStateOf(false)
|
||||||
|
private set
|
||||||
|
var error by mutableStateOf<String?>(null)
|
||||||
|
private set
|
||||||
|
|
||||||
|
val activeRoom: Room?
|
||||||
|
get() = rooms.firstOrNull { it.id == activeRoomId }
|
||||||
|
|
||||||
|
fun connect(handle: String, password: String) {
|
||||||
|
if (connecting) return
|
||||||
|
connecting = true
|
||||||
|
error = null
|
||||||
|
viewModelScope.launch {
|
||||||
|
repo.connect(handle, password)
|
||||||
|
.onSuccess {
|
||||||
|
user = it
|
||||||
|
rooms = repo.listRooms()
|
||||||
|
}
|
||||||
|
.onFailure { error = it.message ?: "No se pudo conectar" }
|
||||||
|
connecting = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun openRoom(id: String) {
|
||||||
|
activeRoomId = id
|
||||||
|
messages = repo.messagesOf(id)
|
||||||
|
repo.subscribe(id) { incoming ->
|
||||||
|
if (activeRoomId == id) messages = messages + incoming
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun closeRoom() {
|
||||||
|
activeRoomId = null
|
||||||
|
messages = emptyList()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun send(text: String) {
|
||||||
|
val rid = activeRoomId ?: return
|
||||||
|
val body = text.trim()
|
||||||
|
if (body.isEmpty()) return
|
||||||
|
viewModelScope.launch {
|
||||||
|
repo.send(rid, body).onSuccess { messages = messages + it }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun logout() {
|
||||||
|
repo.close()
|
||||||
|
user = null
|
||||||
|
rooms = emptyList()
|
||||||
|
activeRoomId = null
|
||||||
|
messages = emptyList()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,162 +0,0 @@
|
|||||||
package com.unibus.app
|
|
||||||
|
|
||||||
import android.app.Application
|
|
||||||
import androidx.lifecycle.AndroidViewModel
|
|
||||||
import androidx.lifecycle.viewModelScope
|
|
||||||
import com.unibus.core.mobile.FrameListener
|
|
||||||
import com.unibus.core.mobile.Mobile
|
|
||||||
import com.unibus.core.mobile.Session
|
|
||||||
import kotlinx.coroutines.Dispatchers
|
|
||||||
import kotlinx.coroutines.flow.MutableStateFlow
|
|
||||||
import kotlinx.coroutines.flow.StateFlow
|
|
||||||
import kotlinx.coroutines.flow.asStateFlow
|
|
||||||
import kotlinx.coroutines.flow.update
|
|
||||||
import kotlinx.coroutines.launch
|
|
||||||
import java.io.File
|
|
||||||
|
|
||||||
/** One chat message shown in the UI. */
|
|
||||||
data class ChatMessage(
|
|
||||||
val sender: String,
|
|
||||||
val text: String,
|
|
||||||
val mine: Boolean,
|
|
||||||
val ts: Long,
|
|
||||||
)
|
|
||||||
|
|
||||||
/** The whole observable UI state of the app. */
|
|
||||||
data class BusState(
|
|
||||||
val connecting: Boolean = false,
|
|
||||||
val connected: Boolean = false,
|
|
||||||
val endpointId: String = "",
|
|
||||||
val roomId: String = "",
|
|
||||||
val roomSubject: String = "",
|
|
||||||
val status: String = "",
|
|
||||||
val error: String? = null,
|
|
||||||
val messages: List<ChatMessage> = emptyList(),
|
|
||||||
)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* BusViewModel drives a real unibus peer on the device through the gomobile
|
|
||||||
* binding. The binding performs NATS transport and end-to-end crypto natively;
|
|
||||||
* this class only translates UI intents into binding calls and exposes the
|
|
||||||
* incoming frames as observable state.
|
|
||||||
*
|
|
||||||
* Threading: every binding call that touches the network (newSession, createRoom,
|
|
||||||
* join, publish) runs off the main thread on Dispatchers.IO to avoid
|
|
||||||
* NetworkOnMainThreadException. Incoming frames arrive on a JNI-attached NATS
|
|
||||||
* goroutine via [onFrame]; we only append to a thread-safe StateFlow there, and
|
|
||||||
* Compose collects that flow on the main thread.
|
|
||||||
*/
|
|
||||||
class BusViewModel(app: Application) : AndroidViewModel(app), FrameListener {
|
|
||||||
private val _state = MutableStateFlow(BusState())
|
|
||||||
val state: StateFlow<BusState> = _state.asStateFlow()
|
|
||||||
|
|
||||||
private var session: Session? = null
|
|
||||||
private var myEndpoint: String = ""
|
|
||||||
|
|
||||||
private val idPath: String
|
|
||||||
get() = File(getApplication<Application>().filesDir, "peer.id").absolutePath
|
|
||||||
|
|
||||||
override fun onFrame(roomID: String, sender: String, msgID: String, text: String) {
|
|
||||||
_state.update {
|
|
||||||
it.copy(
|
|
||||||
messages = it.messages + ChatMessage(
|
|
||||||
sender = sender,
|
|
||||||
text = text,
|
|
||||||
mine = sender == myEndpoint,
|
|
||||||
ts = System.currentTimeMillis(),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun connect(host: String, nats: String, peerName: String) {
|
|
||||||
if (_state.value.connecting) return
|
|
||||||
_state.update { it.copy(connecting = true, error = null, status = "Conectando…") }
|
|
||||||
viewModelScope.launch(Dispatchers.IO) {
|
|
||||||
try {
|
|
||||||
val s = Mobile.newSession(idPath, nats.trim(), host.trim())
|
|
||||||
session = s
|
|
||||||
myEndpoint = s.endpointID()
|
|
||||||
_state.update {
|
|
||||||
it.copy(
|
|
||||||
connecting = false,
|
|
||||||
connected = true,
|
|
||||||
endpointId = myEndpoint,
|
|
||||||
status = "Conectado como $peerName",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} catch (e: Exception) {
|
|
||||||
_state.update {
|
|
||||||
it.copy(connecting = false, connected = false, error = e.message ?: "error desconocido")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun createRoom(subject: String, encrypted: Boolean) {
|
|
||||||
val s = session ?: return
|
|
||||||
viewModelScope.launch(Dispatchers.IO) {
|
|
||||||
try {
|
|
||||||
val mode = if (encrypted) "matrix" else "nats"
|
|
||||||
val roomId = s.createRoom(subject.trim(), mode)
|
|
||||||
s.subscribe(roomId, this@BusViewModel)
|
|
||||||
_state.update {
|
|
||||||
it.copy(
|
|
||||||
roomId = roomId,
|
|
||||||
roomSubject = subject.trim(),
|
|
||||||
messages = emptyList(),
|
|
||||||
status = "Room creada",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} catch (e: Exception) {
|
|
||||||
_state.update { it.copy(error = e.message ?: "error al crear room") }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun joinRoom(roomId: String) {
|
|
||||||
val s = session ?: return
|
|
||||||
viewModelScope.launch(Dispatchers.IO) {
|
|
||||||
try {
|
|
||||||
val rid = roomId.trim()
|
|
||||||
s.join(rid)
|
|
||||||
s.subscribe(rid, this@BusViewModel)
|
|
||||||
_state.update {
|
|
||||||
it.copy(roomId = rid, roomSubject = "(unida)", messages = emptyList(), status = "Unido a la room")
|
|
||||||
}
|
|
||||||
} catch (e: Exception) {
|
|
||||||
_state.update { it.copy(error = e.message ?: "error al unirse") }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun publish(text: String) {
|
|
||||||
val s = session ?: return
|
|
||||||
val room = _state.value.roomId
|
|
||||||
if (room.isEmpty() || text.isBlank()) return
|
|
||||||
viewModelScope.launch(Dispatchers.IO) {
|
|
||||||
try {
|
|
||||||
s.publish(room, text)
|
|
||||||
} catch (e: Exception) {
|
|
||||||
_state.update { it.copy(error = e.message ?: "error al publicar") }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** card returns this peer's shareable public identity (no secret). */
|
|
||||||
fun card(): String = try {
|
|
||||||
session?.card() ?: ""
|
|
||||||
} catch (_: Exception) {
|
|
||||||
""
|
|
||||||
}
|
|
||||||
|
|
||||||
fun clearError() = _state.update { it.copy(error = null) }
|
|
||||||
|
|
||||||
override fun onCleared() {
|
|
||||||
try {
|
|
||||||
session?.close()
|
|
||||||
} catch (_: Exception) {
|
|
||||||
}
|
|
||||||
session = null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -2,306 +2,62 @@ package com.unibus.app
|
|||||||
|
|
||||||
import android.os.Bundle
|
import android.os.Bundle
|
||||||
import androidx.activity.ComponentActivity
|
import androidx.activity.ComponentActivity
|
||||||
|
import androidx.activity.compose.BackHandler
|
||||||
import androidx.activity.compose.setContent
|
import androidx.activity.compose.setContent
|
||||||
import androidx.activity.viewModels
|
|
||||||
import androidx.compose.foundation.layout.Arrangement
|
|
||||||
import androidx.compose.foundation.layout.Box
|
|
||||||
import androidx.compose.foundation.layout.Column
|
|
||||||
import androidx.compose.foundation.layout.Row
|
|
||||||
import androidx.compose.foundation.layout.Spacer
|
|
||||||
import androidx.compose.foundation.layout.fillMaxSize
|
|
||||||
import androidx.compose.foundation.layout.fillMaxWidth
|
|
||||||
import androidx.compose.foundation.layout.height
|
|
||||||
import androidx.compose.foundation.layout.padding
|
|
||||||
import androidx.compose.foundation.layout.width
|
|
||||||
import androidx.compose.foundation.lazy.LazyColumn
|
|
||||||
import androidx.compose.foundation.lazy.itemsIndexed
|
|
||||||
import androidx.compose.foundation.lazy.rememberLazyListState
|
|
||||||
import androidx.compose.material.icons.Icons
|
|
||||||
import androidx.compose.material.icons.automirrored.filled.Send
|
|
||||||
import androidx.compose.material.icons.filled.Add
|
|
||||||
import androidx.compose.material.icons.filled.Lock
|
|
||||||
import androidx.compose.material3.Button
|
|
||||||
import androidx.compose.material3.Card
|
|
||||||
import androidx.compose.material3.CircularProgressIndicator
|
|
||||||
import androidx.compose.material3.ExperimentalMaterial3Api
|
|
||||||
import androidx.compose.material3.Icon
|
|
||||||
import androidx.compose.material3.IconButton
|
|
||||||
import androidx.compose.material3.MaterialTheme
|
|
||||||
import androidx.compose.material3.OutlinedButton
|
|
||||||
import androidx.compose.material3.OutlinedTextField
|
|
||||||
import androidx.compose.material3.Scaffold
|
|
||||||
import androidx.compose.material3.Surface
|
|
||||||
import androidx.compose.material3.Switch
|
|
||||||
import androidx.compose.material3.Text
|
|
||||||
import androidx.compose.material3.TopAppBar
|
|
||||||
import androidx.compose.material3.darkColorScheme
|
|
||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
import androidx.compose.runtime.LaunchedEffect
|
import androidx.compose.runtime.CompositionLocalProvider
|
||||||
import androidx.compose.runtime.collectAsState
|
import androidx.lifecycle.viewmodel.compose.viewModel
|
||||||
import androidx.compose.runtime.getValue
|
import com.unibus.app.ui.ChatScreen
|
||||||
import androidx.compose.runtime.mutableStateOf
|
import com.unibus.app.ui.LoginScreen
|
||||||
import androidx.compose.runtime.remember
|
import com.unibus.app.ui.RoomListScreen
|
||||||
import androidx.compose.runtime.saveable.rememberSaveable
|
import com.unibus.app.ui.theme.LocalUnibusColors
|
||||||
import androidx.compose.runtime.setValue
|
import com.unibus.app.ui.theme.UnibusColors
|
||||||
import androidx.compose.ui.Alignment
|
import com.unibus.app.ui.theme.UnibusTheme
|
||||||
import androidx.compose.ui.Modifier
|
|
||||||
import androidx.compose.ui.text.style.TextOverflow
|
|
||||||
import androidx.compose.ui.unit.dp
|
|
||||||
import java.text.SimpleDateFormat
|
|
||||||
import java.util.Date
|
|
||||||
import java.util.Locale
|
|
||||||
|
|
||||||
class MainActivity : ComponentActivity() {
|
class MainActivity : ComponentActivity() {
|
||||||
private val vm: BusViewModel by viewModels()
|
|
||||||
|
|
||||||
override fun onCreate(savedInstanceState: Bundle?) {
|
override fun onCreate(savedInstanceState: Bundle?) {
|
||||||
super.onCreate(savedInstanceState)
|
super.onCreate(savedInstanceState)
|
||||||
setContent {
|
setContent {
|
||||||
MaterialTheme(colorScheme = darkColorScheme()) {
|
UnibusTheme {
|
||||||
Surface(modifier = Modifier.fillMaxSize()) {
|
CompositionLocalProvider(LocalUnibusColors provides UnibusColors()) {
|
||||||
UnibusApp(vm)
|
UnibusApp()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Navegación por estado (sin librería de routing — KISS): el usuario fluye
|
||||||
|
* Login → lista de rooms → chat, igual que la web pero en una sola columna.
|
||||||
|
*/
|
||||||
@Composable
|
@Composable
|
||||||
fun UnibusApp(vm: BusViewModel) {
|
private fun UnibusApp(vm: AppViewModel = viewModel()) {
|
||||||
val state by vm.state.collectAsState()
|
val user = vm.user
|
||||||
if (!state.connected) {
|
val activeRoom = vm.activeRoom
|
||||||
ConnectScreen(
|
|
||||||
connecting = state.connecting,
|
|
||||||
error = state.error,
|
|
||||||
onConnect = { host, nats, name -> vm.connect(host, nats, name) },
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
ChatScreen(state = state, vm = vm)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Composable
|
when {
|
||||||
fun ConnectScreen(
|
user == null -> LoginScreen(
|
||||||
connecting: Boolean,
|
connecting = vm.connecting,
|
||||||
error: String?,
|
error = vm.error,
|
||||||
onConnect: (String, String, String) -> Unit,
|
onLogin = { handle, password -> vm.connect(handle, password) },
|
||||||
) {
|
|
||||||
var host by rememberSaveable { mutableStateOf("http://10.0.2.2:8470") }
|
|
||||||
var nats by rememberSaveable { mutableStateOf("nats://10.0.2.2:4250") }
|
|
||||||
var name by rememberSaveable { mutableStateOf("android") }
|
|
||||||
|
|
||||||
Column(
|
|
||||||
modifier = Modifier
|
|
||||||
.fillMaxSize()
|
|
||||||
.padding(24.dp),
|
|
||||||
verticalArrangement = Arrangement.Center,
|
|
||||||
) {
|
|
||||||
Text("unibus", style = MaterialTheme.typography.headlineMedium)
|
|
||||||
Text(
|
|
||||||
"chat cifrado extremo a extremo sobre NATS",
|
|
||||||
style = MaterialTheme.typography.bodyMedium,
|
|
||||||
color = MaterialTheme.colorScheme.onSurfaceVariant,
|
|
||||||
)
|
)
|
||||||
Spacer(Modifier.height(24.dp))
|
|
||||||
OutlinedTextField(
|
|
||||||
value = host,
|
|
||||||
onValueChange = { host = it },
|
|
||||||
label = { Text("Host (control plane)") },
|
|
||||||
singleLine = true,
|
|
||||||
modifier = Modifier.fillMaxWidth(),
|
|
||||||
)
|
|
||||||
Spacer(Modifier.height(12.dp))
|
|
||||||
OutlinedTextField(
|
|
||||||
value = nats,
|
|
||||||
onValueChange = { nats = it },
|
|
||||||
label = { Text("NATS (data plane)") },
|
|
||||||
singleLine = true,
|
|
||||||
modifier = Modifier.fillMaxWidth(),
|
|
||||||
)
|
|
||||||
Spacer(Modifier.height(12.dp))
|
|
||||||
OutlinedTextField(
|
|
||||||
value = name,
|
|
||||||
onValueChange = { name = it },
|
|
||||||
label = { Text("Identidad") },
|
|
||||||
singleLine = true,
|
|
||||||
modifier = Modifier.fillMaxWidth(),
|
|
||||||
)
|
|
||||||
if (error != null) {
|
|
||||||
Spacer(Modifier.height(12.dp))
|
|
||||||
Text(error, color = MaterialTheme.colorScheme.error)
|
|
||||||
}
|
|
||||||
Spacer(Modifier.height(24.dp))
|
|
||||||
Button(
|
|
||||||
onClick = { onConnect(host, nats, name) },
|
|
||||||
enabled = !connecting,
|
|
||||||
modifier = Modifier.fillMaxWidth(),
|
|
||||||
) {
|
|
||||||
if (connecting) {
|
|
||||||
CircularProgressIndicator(modifier = Modifier.height(18.dp).width(18.dp), strokeWidth = 2.dp)
|
|
||||||
Spacer(Modifier.width(8.dp))
|
|
||||||
}
|
|
||||||
Text(if (connecting) "Conectando…" else "Conectar")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@OptIn(ExperimentalMaterial3Api::class)
|
activeRoom == null -> RoomListScreen(
|
||||||
@Composable
|
user = user,
|
||||||
fun ChatScreen(state: BusState, vm: BusViewModel) {
|
rooms = vm.rooms,
|
||||||
var subject by rememberSaveable { mutableStateOf("room.general") }
|
onSelect = { vm.openRoom(it) },
|
||||||
var encrypt by rememberSaveable { mutableStateOf(false) }
|
onLogout = { vm.logout() },
|
||||||
var joinId by rememberSaveable { mutableStateOf("") }
|
)
|
||||||
var draft by rememberSaveable { mutableStateOf("") }
|
|
||||||
val listState = rememberLazyListState()
|
|
||||||
|
|
||||||
LaunchedEffect(state.messages.size) {
|
else -> {
|
||||||
if (state.messages.isNotEmpty()) listState.animateScrollToItem(state.messages.size - 1)
|
BackHandler { vm.closeRoom() }
|
||||||
}
|
ChatScreen(
|
||||||
|
room = activeRoom,
|
||||||
Scaffold(
|
messages = vm.messages,
|
||||||
topBar = {
|
onSend = { vm.send(it) },
|
||||||
TopAppBar(
|
onBack = { vm.closeRoom() },
|
||||||
title = {
|
|
||||||
Column {
|
|
||||||
Text("unibus", style = MaterialTheme.typography.titleMedium)
|
|
||||||
Text(
|
|
||||||
state.status.ifEmpty { state.endpointId.take(12) + "…" },
|
|
||||||
style = MaterialTheme.typography.bodySmall,
|
|
||||||
maxLines = 1,
|
|
||||||
overflow = TextOverflow.Ellipsis,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
},
|
|
||||||
) { inner ->
|
|
||||||
Column(
|
|
||||||
modifier = Modifier
|
|
||||||
.fillMaxSize()
|
|
||||||
.padding(inner)
|
|
||||||
.padding(horizontal = 12.dp),
|
|
||||||
) {
|
|
||||||
// Room controls.
|
|
||||||
Card(modifier = Modifier.fillMaxWidth().padding(vertical = 8.dp)) {
|
|
||||||
Column(Modifier.padding(12.dp)) {
|
|
||||||
Row(verticalAlignment = Alignment.CenterVertically) {
|
|
||||||
OutlinedTextField(
|
|
||||||
value = subject,
|
|
||||||
onValueChange = { subject = it },
|
|
||||||
label = { Text("subject") },
|
|
||||||
singleLine = true,
|
|
||||||
modifier = Modifier.weight(1f),
|
|
||||||
)
|
|
||||||
Spacer(Modifier.width(8.dp))
|
|
||||||
Button(onClick = { vm.createRoom(subject, encrypt) }) {
|
|
||||||
Icon(Icons.Filled.Add, contentDescription = "crear")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Row(verticalAlignment = Alignment.CenterVertically) {
|
|
||||||
Switch(checked = encrypt, onCheckedChange = { encrypt = it })
|
|
||||||
Spacer(Modifier.width(8.dp))
|
|
||||||
Icon(Icons.Filled.Lock, contentDescription = null, modifier = Modifier.height(16.dp))
|
|
||||||
Text("cifrar (E2E)", style = MaterialTheme.typography.bodySmall)
|
|
||||||
}
|
|
||||||
Spacer(Modifier.height(4.dp))
|
|
||||||
Row(verticalAlignment = Alignment.CenterVertically) {
|
|
||||||
OutlinedTextField(
|
|
||||||
value = joinId,
|
|
||||||
onValueChange = { joinId = it },
|
|
||||||
label = { Text("unirse por room id") },
|
|
||||||
singleLine = true,
|
|
||||||
modifier = Modifier.weight(1f),
|
|
||||||
)
|
|
||||||
Spacer(Modifier.width(8.dp))
|
|
||||||
OutlinedButton(onClick = { if (joinId.isNotBlank()) vm.joinRoom(joinId) }) {
|
|
||||||
Text("Unir")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (state.roomId.isNotEmpty()) {
|
|
||||||
Spacer(Modifier.height(4.dp))
|
|
||||||
Text(
|
|
||||||
"room: ${state.roomSubject} · ${state.roomId}",
|
|
||||||
style = MaterialTheme.typography.bodySmall,
|
|
||||||
color = MaterialTheme.colorScheme.onSurfaceVariant,
|
|
||||||
maxLines = 1,
|
|
||||||
overflow = TextOverflow.Ellipsis,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (state.error != null) {
|
|
||||||
Text(
|
|
||||||
state.error,
|
|
||||||
color = MaterialTheme.colorScheme.error,
|
|
||||||
modifier = Modifier.fillMaxWidth().padding(vertical = 4.dp),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Messages.
|
|
||||||
LazyColumn(
|
|
||||||
state = listState,
|
|
||||||
modifier = Modifier.weight(1f).fillMaxWidth(),
|
|
||||||
verticalArrangement = Arrangement.spacedBy(6.dp),
|
|
||||||
) {
|
|
||||||
itemsIndexed(state.messages, key = { i, m -> "${m.ts}-$i" }) { _, m ->
|
|
||||||
MessageBubble(m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Composer.
|
|
||||||
Row(
|
|
||||||
modifier = Modifier.fillMaxWidth().padding(vertical = 8.dp),
|
|
||||||
verticalAlignment = Alignment.CenterVertically,
|
|
||||||
) {
|
|
||||||
OutlinedTextField(
|
|
||||||
value = draft,
|
|
||||||
onValueChange = { draft = it },
|
|
||||||
placeholder = { Text("Mensaje…") },
|
|
||||||
singleLine = true,
|
|
||||||
enabled = state.roomId.isNotEmpty(),
|
|
||||||
modifier = Modifier.weight(1f),
|
|
||||||
)
|
|
||||||
Spacer(Modifier.width(8.dp))
|
|
||||||
IconButton(
|
|
||||||
onClick = {
|
|
||||||
vm.publish(draft)
|
|
||||||
draft = ""
|
|
||||||
},
|
|
||||||
enabled = state.roomId.isNotEmpty() && draft.isNotBlank(),
|
|
||||||
) {
|
|
||||||
Icon(Icons.AutoMirrored.Filled.Send, contentDescription = "enviar")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private val timeFmt = SimpleDateFormat("HH:mm:ss", Locale.getDefault())
|
|
||||||
|
|
||||||
@Composable
|
|
||||||
fun MessageBubble(m: ChatMessage) {
|
|
||||||
val align = if (m.mine) Alignment.End else Alignment.Start
|
|
||||||
Column(modifier = Modifier.fillMaxWidth(), horizontalAlignment = align) {
|
|
||||||
Card(
|
|
||||||
modifier = Modifier.fillMaxWidth(0.8f),
|
|
||||||
) {
|
|
||||||
Column(Modifier.padding(8.dp)) {
|
|
||||||
if (!m.mine) {
|
|
||||||
Text(
|
|
||||||
m.sender.take(12) + "…",
|
|
||||||
style = MaterialTheme.typography.labelSmall,
|
|
||||||
color = MaterialTheme.colorScheme.primary,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
Text(m.text, style = MaterialTheme.typography.bodyMedium)
|
|
||||||
Text(
|
|
||||||
timeFmt.format(Date(m.ts)),
|
|
||||||
style = MaterialTheme.typography.labelSmall,
|
|
||||||
color = MaterialTheme.colorScheme.onSurfaceVariant,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,157 @@
|
|||||||
|
package com.unibus.app.data
|
||||||
|
|
||||||
|
import android.content.Context
|
||||||
|
import android.os.Handler
|
||||||
|
import android.os.Looper
|
||||||
|
import com.unibus.core.mobile.FrameListener
|
||||||
|
import com.unibus.core.mobile.Mobile
|
||||||
|
import com.unibus.core.mobile.Session
|
||||||
|
import kotlinx.coroutines.Dispatchers
|
||||||
|
import kotlinx.coroutines.withContext
|
||||||
|
import kotlinx.serialization.Serializable
|
||||||
|
import kotlinx.serialization.json.Json
|
||||||
|
import java.io.File
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementación real sobre el binding gomobile (pkg/client): cifrado de extremo
|
||||||
|
* a extremo EN el dispositivo, igual que cualquier otro peer del bus. Comparte
|
||||||
|
* interfaz con [MockUnibusRepository], así que la UI no cambia al enchufarla.
|
||||||
|
*
|
||||||
|
* Estado: cableado completo y compilable contra unibus.aar. La iteración 1 de la
|
||||||
|
* app corre sobre el mock para iterar el diseño; para activar el bus real basta
|
||||||
|
* con instanciar este repo en [com.unibus.app.MainActivity] pasando las URLs del
|
||||||
|
* bus y (si el bus exige TLS+auth) el ca.crt en assets.
|
||||||
|
*
|
||||||
|
* Contrato de membresía (issue 0006e): tras CreateRoom / Join / Invite hay que
|
||||||
|
* llamar [refresh] ANTES de subscribe/publish en esa room, o un bus seguro
|
||||||
|
* deniega el subject. refresh() además tira las suscripciones: re-suscribir luego.
|
||||||
|
*/
|
||||||
|
class BindingUnibusRepository(
|
||||||
|
context: Context,
|
||||||
|
private val natsURL: String,
|
||||||
|
private val ctrlURL: String,
|
||||||
|
) : UnibusRepository {
|
||||||
|
|
||||||
|
private val appContext = context.applicationContext
|
||||||
|
private val mainHandler = Handler(Looper.getMainLooper())
|
||||||
|
private val json = Json { ignoreUnknownKeys = true }
|
||||||
|
|
||||||
|
private var session: Session? = null
|
||||||
|
private var user: User? = null
|
||||||
|
|
||||||
|
@Serializable
|
||||||
|
private data class RoomDTO(
|
||||||
|
val room_id: String,
|
||||||
|
val subject: String,
|
||||||
|
val epoch: Int = 0,
|
||||||
|
val encrypted: Boolean = false,
|
||||||
|
val role: String = "",
|
||||||
|
)
|
||||||
|
|
||||||
|
/** Ruta sandbox de la identidad de larga duración (claves privadas). */
|
||||||
|
private fun identityPath(): String =
|
||||||
|
File(appContext.filesDir, "identity.key").absolutePath
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copia ca.crt de assets a un fichero local y devuelve su ruta; "" si no hay
|
||||||
|
* (bus de desarrollo en texto plano). El binding pinea TLS a este CA cuando
|
||||||
|
* la ruta no está vacía.
|
||||||
|
*/
|
||||||
|
private fun caPathOrEmpty(): String {
|
||||||
|
return try {
|
||||||
|
val out = File(appContext.filesDir, "ca.crt")
|
||||||
|
appContext.assets.open("ca.crt").use { input ->
|
||||||
|
out.outputStream().use { input.copyTo(it) }
|
||||||
|
}
|
||||||
|
out.absolutePath
|
||||||
|
} catch (_: Exception) {
|
||||||
|
""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun connect(handle: String, password: String): Result<User> =
|
||||||
|
withContext(Dispatchers.IO) {
|
||||||
|
try {
|
||||||
|
// La identidad se persiste cifrada en el sandbox; password la
|
||||||
|
// desbloquea en una iteración futura (hoy LoadOrCreateIdentity la
|
||||||
|
// crea/lee directamente). handle es la etiqueta visible local.
|
||||||
|
Mobile.generateIdentity(identityPath())
|
||||||
|
val s = Mobile.newSession(identityPath(), natsURL, ctrlURL, caPathOrEmpty())
|
||||||
|
session = s
|
||||||
|
val u = User(id = s.endpointID(), handle = handle)
|
||||||
|
user = u
|
||||||
|
Result.success(u)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Result.failure(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun listRooms(): List<Room> = withContext(Dispatchers.IO) {
|
||||||
|
val s = session ?: return@withContext emptyList()
|
||||||
|
val raw = runCatching { s.listRoomsJSON() }.getOrDefault("[]")
|
||||||
|
val dtos = runCatching { json.decodeFromString<List<RoomDTO>>(raw) }.getOrDefault(emptyList())
|
||||||
|
dtos.map {
|
||||||
|
Room(
|
||||||
|
id = it.room_id,
|
||||||
|
name = it.subject,
|
||||||
|
encrypted = it.encrypted,
|
||||||
|
lastMessage = "",
|
||||||
|
lastTs = System.currentTimeMillis(),
|
||||||
|
unread = 0,
|
||||||
|
messages = emptyList(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun messagesOf(roomId: String): List<Message> = emptyList()
|
||||||
|
|
||||||
|
override fun subscribe(roomId: String, onMessage: (Message) -> Unit) {
|
||||||
|
val s = session ?: return
|
||||||
|
val myId = user?.id
|
||||||
|
// FrameListener.onFrame llega en una goroutine de NATS: saltamos al hilo
|
||||||
|
// principal antes de tocar estado de Compose.
|
||||||
|
val listener = object : FrameListener {
|
||||||
|
override fun onFrame(rid: String, sender: String, msgID: String, text: String) {
|
||||||
|
val msg = Message(
|
||||||
|
id = msgID,
|
||||||
|
sender = sender,
|
||||||
|
body = text,
|
||||||
|
ts = System.currentTimeMillis(),
|
||||||
|
mine = sender == myId,
|
||||||
|
)
|
||||||
|
mainHandler.post { onMessage(msg) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
runCatching { s.subscribe(roomId, listener) }
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun send(roomId: String, text: String): Result<Message> =
|
||||||
|
withContext(Dispatchers.IO) {
|
||||||
|
val s = session ?: return@withContext Result.failure(IllegalStateException("sin sesión"))
|
||||||
|
try {
|
||||||
|
s.publish(roomId, text)
|
||||||
|
Result.success(
|
||||||
|
Message(
|
||||||
|
id = "local-${System.currentTimeMillis()}",
|
||||||
|
sender = user?.id ?: "yo",
|
||||||
|
body = text,
|
||||||
|
ts = System.currentTimeMillis(),
|
||||||
|
mine = true,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Result.failure(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reaplica permisos tras un cambio de membresía. Re-suscribir después. */
|
||||||
|
suspend fun refresh(): Result<Unit> = withContext(Dispatchers.IO) {
|
||||||
|
runCatching { session?.refreshSession(); Unit }
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun close() {
|
||||||
|
runCatching { session?.close() }
|
||||||
|
session = null
|
||||||
|
user = null
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
package com.unibus.app.data
|
||||||
|
|
||||||
|
// Datos de muestra para iterar el diseño sin el bus conectado (espejo de mock.ts).
|
||||||
|
private const val NOW = 1749300000000L
|
||||||
|
private fun m(n: Int): Long = NOW - n * 60_000L
|
||||||
|
|
||||||
|
val MOCK_ROOMS: List<Room> = listOf(
|
||||||
|
Room(
|
||||||
|
id = "general",
|
||||||
|
name = "general",
|
||||||
|
encrypted = true,
|
||||||
|
lastMessage = "¿Lo desplegamos hoy?",
|
||||||
|
lastTs = m(2),
|
||||||
|
unread = 3,
|
||||||
|
messages = listOf(
|
||||||
|
Message("1", "ana", "Buenas, ¿cómo va el cluster?", m(40)),
|
||||||
|
Message("2", "lucas", "Los 3 nodos en R3, quorum verde", m(38), mine = true),
|
||||||
|
Message("3", "ana", "Brutal. ¿Y el frontend?", m(30)),
|
||||||
|
Message("4", "leo", "Primera iteración lista, estilo Element", m(6)),
|
||||||
|
Message("5", "ana", "¿Lo desplegamos hoy?", m(2)),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
Room(
|
||||||
|
id = "board",
|
||||||
|
name = "board · privado",
|
||||||
|
encrypted = true,
|
||||||
|
lastMessage = "Os paso el acta cifrada",
|
||||||
|
lastTs = m(95),
|
||||||
|
unread = 0,
|
||||||
|
messages = listOf(
|
||||||
|
Message("1", "ceo", "Reunión a las 18:00", m(120)),
|
||||||
|
Message("2", "lucas", "Anotado", m(96), mine = true),
|
||||||
|
Message("3", "ceo", "Os paso el acta cifrada", m(95)),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
Room(
|
||||||
|
id = "bots",
|
||||||
|
name = "bots",
|
||||||
|
encrypted = false,
|
||||||
|
lastMessage = "echo: ping",
|
||||||
|
lastTs = m(210),
|
||||||
|
unread = 0,
|
||||||
|
messages = listOf(
|
||||||
|
Message("1", "lucas", "!ping", m(212), mine = true),
|
||||||
|
Message("2", "echobot", "echo: ping", m(210)),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
Room(
|
||||||
|
id = "infra",
|
||||||
|
name = "infra",
|
||||||
|
encrypted = true,
|
||||||
|
lastMessage = "magnus + homer + datardos OK",
|
||||||
|
lastTs = m(330),
|
||||||
|
unread = 1,
|
||||||
|
messages = listOf(
|
||||||
|
Message("1", "leo", "magnus + homer + datardos OK", m(330)),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
package com.unibus.app.data
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Modelos de dominio de la UI. En la iteración 1 se llenan con datos mock; más
|
||||||
|
* adelante vendrán del binding gomobile (pkg/client) a través de
|
||||||
|
* [UnibusRepository]. Reflejan los tipos de la app web (types.ts).
|
||||||
|
*/
|
||||||
|
|
||||||
|
data class User(
|
||||||
|
val id: String,
|
||||||
|
val handle: String,
|
||||||
|
)
|
||||||
|
|
||||||
|
data class Message(
|
||||||
|
val id: String,
|
||||||
|
val sender: String, // handle
|
||||||
|
val body: String,
|
||||||
|
val ts: Long, // epoch ms
|
||||||
|
val mine: Boolean = false,
|
||||||
|
)
|
||||||
|
|
||||||
|
data class Room(
|
||||||
|
val id: String,
|
||||||
|
val name: String,
|
||||||
|
val encrypted: Boolean,
|
||||||
|
val lastMessage: String,
|
||||||
|
val lastTs: Long,
|
||||||
|
val unread: Int,
|
||||||
|
val messages: List<Message>,
|
||||||
|
)
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
package com.unibus.app.data
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Capa de repositorio que aísla la UI de la fuente de datos. La iteración 1 usa
|
||||||
|
* [MockUnibusRepository] (en memoria) para iterar el diseño. Cuando se enchufe
|
||||||
|
* el bus real, [BindingUnibusRepository] (en BindingRepository.kt) implementa
|
||||||
|
* esta misma interfaz sobre el binding gomobile (pkg/client), sin tocar la UI.
|
||||||
|
*/
|
||||||
|
interface UnibusRepository {
|
||||||
|
/** Desbloquea/crea la identidad y conecta al bus. Devuelve el usuario logueado. */
|
||||||
|
suspend fun connect(handle: String, password: String): Result<User>
|
||||||
|
|
||||||
|
/** Rooms a las que pertenece el peer. */
|
||||||
|
suspend fun listRooms(): List<Room>
|
||||||
|
|
||||||
|
/** Mensajes históricos conocidos de una room (mock: los del propio Room). */
|
||||||
|
fun messagesOf(roomId: String): List<Message>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suscribe a una room. [onMessage] se invoca por cada mensaje entrante.
|
||||||
|
* Las implementaciones que vienen del bus DEBEN entregar [onMessage] en el
|
||||||
|
* hilo principal (el binding lo recibe en una goroutine de NATS).
|
||||||
|
*/
|
||||||
|
fun subscribe(roomId: String, onMessage: (Message) -> Unit)
|
||||||
|
|
||||||
|
/** Publica texto en la room. */
|
||||||
|
suspend fun send(roomId: String, text: String): Result<Message>
|
||||||
|
|
||||||
|
/** Cierra la sesión. */
|
||||||
|
fun close()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementación en memoria: arranca con [MOCK_ROOMS] y acumula los mensajes que
|
||||||
|
* el usuario envía. No toca red ni binding — sirve para construir y revisar la UI.
|
||||||
|
*/
|
||||||
|
class MockUnibusRepository : UnibusRepository {
|
||||||
|
private var user: User? = null
|
||||||
|
private val sent = mutableMapOf<String, MutableList<Message>>()
|
||||||
|
|
||||||
|
override suspend fun connect(handle: String, password: String): Result<User> {
|
||||||
|
val u = User(id = handle, handle = handle)
|
||||||
|
user = u
|
||||||
|
return Result.success(u)
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun listRooms(): List<Room> = MOCK_ROOMS
|
||||||
|
|
||||||
|
override fun messagesOf(roomId: String): List<Message> {
|
||||||
|
val base = MOCK_ROOMS.firstOrNull { it.id == roomId }?.messages.orEmpty()
|
||||||
|
return base + (sent[roomId].orEmpty())
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun subscribe(roomId: String, onMessage: (Message) -> Unit) {
|
||||||
|
// El mock no recibe tráfico entrante; el eco lo gestiona la UI al enviar.
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun send(roomId: String, text: String): Result<Message> {
|
||||||
|
val handle = user?.handle ?: "yo"
|
||||||
|
val msg = Message(
|
||||||
|
id = "local-${System.currentTimeMillis()}",
|
||||||
|
sender = handle,
|
||||||
|
body = text,
|
||||||
|
ts = System.currentTimeMillis(),
|
||||||
|
mine = true,
|
||||||
|
)
|
||||||
|
sent.getOrPut(roomId) { mutableListOf() }.add(msg)
|
||||||
|
return Result.success(msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun close() {
|
||||||
|
user = null
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,203 @@
|
|||||||
|
package com.unibus.app.ui
|
||||||
|
|
||||||
|
import androidx.compose.foundation.background
|
||||||
|
import androidx.compose.foundation.layout.Arrangement
|
||||||
|
import androidx.compose.foundation.layout.Box
|
||||||
|
import androidx.compose.foundation.layout.Column
|
||||||
|
import androidx.compose.foundation.layout.Row
|
||||||
|
import androidx.compose.foundation.layout.fillMaxSize
|
||||||
|
import androidx.compose.foundation.layout.fillMaxWidth
|
||||||
|
import androidx.compose.foundation.layout.padding
|
||||||
|
import androidx.compose.foundation.layout.size
|
||||||
|
import androidx.compose.foundation.lazy.LazyColumn
|
||||||
|
import androidx.compose.foundation.lazy.items
|
||||||
|
import androidx.compose.foundation.lazy.rememberLazyListState
|
||||||
|
import androidx.compose.foundation.shape.CircleShape
|
||||||
|
import androidx.compose.foundation.text.KeyboardActions
|
||||||
|
import androidx.compose.foundation.text.KeyboardOptions
|
||||||
|
import androidx.compose.material.icons.Icons
|
||||||
|
import androidx.compose.material.icons.automirrored.filled.ArrowBack
|
||||||
|
import androidx.compose.material.icons.automirrored.filled.Send
|
||||||
|
import androidx.compose.material.icons.filled.AttachFile
|
||||||
|
import androidx.compose.material.icons.filled.Lock
|
||||||
|
import androidx.compose.material.icons.filled.MoreVert
|
||||||
|
import androidx.compose.material.icons.filled.Tag
|
||||||
|
import androidx.compose.material3.HorizontalDivider
|
||||||
|
import androidx.compose.material3.Icon
|
||||||
|
import androidx.compose.material3.IconButton
|
||||||
|
import androidx.compose.material3.OutlinedTextField
|
||||||
|
import androidx.compose.material3.Text
|
||||||
|
import androidx.compose.material3.TextFieldDefaults
|
||||||
|
import androidx.compose.runtime.Composable
|
||||||
|
import androidx.compose.runtime.LaunchedEffect
|
||||||
|
import androidx.compose.runtime.getValue
|
||||||
|
import androidx.compose.runtime.mutableStateOf
|
||||||
|
import androidx.compose.runtime.remember
|
||||||
|
import androidx.compose.runtime.setValue
|
||||||
|
import androidx.compose.ui.Alignment
|
||||||
|
import androidx.compose.ui.Modifier
|
||||||
|
import androidx.compose.ui.draw.clip
|
||||||
|
import androidx.compose.ui.graphics.Color
|
||||||
|
import androidx.compose.ui.text.font.FontWeight
|
||||||
|
import androidx.compose.ui.text.style.TextOverflow
|
||||||
|
import androidx.compose.ui.unit.dp
|
||||||
|
import androidx.compose.ui.unit.sp
|
||||||
|
import com.unibus.app.data.Message
|
||||||
|
import com.unibus.app.data.Room
|
||||||
|
import com.unibus.app.ui.theme.Brand3
|
||||||
|
import com.unibus.app.ui.theme.LocalUnibusColors
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun ChatScreen(
|
||||||
|
room: Room,
|
||||||
|
messages: List<Message>,
|
||||||
|
onSend: (String) -> Unit,
|
||||||
|
onBack: () -> Unit,
|
||||||
|
) {
|
||||||
|
val colors = LocalUnibusColors.current
|
||||||
|
var draft by remember { mutableStateOf("") }
|
||||||
|
val listState = rememberLazyListState()
|
||||||
|
|
||||||
|
LaunchedEffect(messages.size, room.id) {
|
||||||
|
if (messages.isNotEmpty()) listState.animateScrollToItem(messages.size - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
Column(
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxSize()
|
||||||
|
.background(colors.chatBg),
|
||||||
|
) {
|
||||||
|
// Header
|
||||||
|
Row(
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxWidth()
|
||||||
|
.padding(horizontal = 6.dp, vertical = 8.dp),
|
||||||
|
verticalAlignment = Alignment.CenterVertically,
|
||||||
|
) {
|
||||||
|
IconButton(onClick = onBack) {
|
||||||
|
Icon(Icons.AutoMirrored.Filled.ArrowBack, contentDescription = "Atrás", tint = Color.White)
|
||||||
|
}
|
||||||
|
InitialsAvatar(room.name, size = 38.dp, rounded = true, accent = true)
|
||||||
|
Column(
|
||||||
|
modifier = Modifier
|
||||||
|
.weight(1f)
|
||||||
|
.padding(start = 10.dp),
|
||||||
|
) {
|
||||||
|
Row(verticalAlignment = Alignment.CenterVertically) {
|
||||||
|
Text(
|
||||||
|
room.name,
|
||||||
|
fontWeight = FontWeight(650),
|
||||||
|
fontSize = 16.sp,
|
||||||
|
color = Color.White,
|
||||||
|
maxLines = 1,
|
||||||
|
overflow = TextOverflow.Ellipsis,
|
||||||
|
)
|
||||||
|
Icon(
|
||||||
|
if (room.encrypted) Icons.Filled.Lock else Icons.Filled.Tag,
|
||||||
|
contentDescription = null,
|
||||||
|
tint = colors.dimmed,
|
||||||
|
modifier = Modifier
|
||||||
|
.padding(start = 6.dp)
|
||||||
|
.size(14.dp),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Text(
|
||||||
|
if (room.encrypted) "cifrada · E2E" else "abierta · cleartext",
|
||||||
|
color = colors.dimmed,
|
||||||
|
fontSize = 11.sp,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
IconButton(onClick = { /* opciones de room (futuro) */ }) {
|
||||||
|
Icon(Icons.Filled.MoreVert, contentDescription = "Opciones", tint = colors.dimmed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
HorizontalDivider(color = colors.divider)
|
||||||
|
|
||||||
|
// Mensajes
|
||||||
|
LazyColumn(
|
||||||
|
state = listState,
|
||||||
|
modifier = Modifier
|
||||||
|
.weight(1f)
|
||||||
|
.fillMaxWidth(),
|
||||||
|
contentPadding = androidx.compose.foundation.layout.PaddingValues(14.dp),
|
||||||
|
verticalArrangement = Arrangement.spacedBy(16.dp),
|
||||||
|
) {
|
||||||
|
items(messages, key = { it.id }) { msg -> MessageRow(msg) }
|
||||||
|
}
|
||||||
|
|
||||||
|
HorizontalDivider(color = colors.divider)
|
||||||
|
|
||||||
|
// Composer
|
||||||
|
Row(
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxWidth()
|
||||||
|
.padding(8.dp),
|
||||||
|
verticalAlignment = Alignment.CenterVertically,
|
||||||
|
) {
|
||||||
|
IconButton(onClick = { /* adjuntar (futuro) */ }) {
|
||||||
|
Icon(Icons.Filled.AttachFile, contentDescription = "Adjuntar", tint = colors.dimmed)
|
||||||
|
}
|
||||||
|
OutlinedTextField(
|
||||||
|
value = draft,
|
||||||
|
onValueChange = { draft = it },
|
||||||
|
placeholder = { Text("Mensaje a ${room.name}") },
|
||||||
|
singleLine = true,
|
||||||
|
shape = CircleShape,
|
||||||
|
colors = TextFieldDefaults.colors(
|
||||||
|
focusedContainerColor = colors.field,
|
||||||
|
unfocusedContainerColor = colors.field,
|
||||||
|
),
|
||||||
|
modifier = Modifier.weight(1f),
|
||||||
|
keyboardOptions = KeyboardOptions(imeAction = androidx.compose.ui.text.input.ImeAction.Send),
|
||||||
|
keyboardActions = KeyboardActions(onSend = {
|
||||||
|
if (draft.trim().isNotEmpty()) { onSend(draft); draft = "" }
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
Box(
|
||||||
|
modifier = Modifier
|
||||||
|
.padding(start = 6.dp)
|
||||||
|
.size(46.dp)
|
||||||
|
.clip(CircleShape)
|
||||||
|
.background(if (draft.trim().isEmpty()) colors.field else colors.brand),
|
||||||
|
contentAlignment = Alignment.Center,
|
||||||
|
) {
|
||||||
|
IconButton(
|
||||||
|
onClick = { if (draft.trim().isNotEmpty()) { onSend(draft); draft = "" } },
|
||||||
|
enabled = draft.trim().isNotEmpty(),
|
||||||
|
) {
|
||||||
|
Icon(Icons.AutoMirrored.Filled.Send, contentDescription = "Enviar", tint = Color.White)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
private fun MessageRow(msg: Message) {
|
||||||
|
val colors = LocalUnibusColors.current
|
||||||
|
Row(verticalAlignment = Alignment.Top) {
|
||||||
|
InitialsAvatar(msg.sender, size = 36.dp, rounded = false, accent = msg.mine)
|
||||||
|
Column(modifier = Modifier.padding(start = 10.dp)) {
|
||||||
|
Row(verticalAlignment = Alignment.Bottom) {
|
||||||
|
Text(
|
||||||
|
msg.sender,
|
||||||
|
fontWeight = FontWeight.SemiBold,
|
||||||
|
fontSize = 14.sp,
|
||||||
|
color = if (msg.mine) Brand3 else Color.White,
|
||||||
|
)
|
||||||
|
Text(
|
||||||
|
timeShort(msg.ts),
|
||||||
|
color = colors.dimmed,
|
||||||
|
fontSize = 11.sp,
|
||||||
|
modifier = Modifier.padding(start = 8.dp),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Text(
|
||||||
|
msg.body,
|
||||||
|
fontSize = 14.sp,
|
||||||
|
color = com.unibus.app.ui.theme.OnSurface,
|
||||||
|
modifier = Modifier.padding(top = 1.dp),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
package com.unibus.app.ui
|
||||||
|
|
||||||
|
import androidx.compose.foundation.background
|
||||||
|
import androidx.compose.foundation.layout.Box
|
||||||
|
import androidx.compose.foundation.layout.size
|
||||||
|
import androidx.compose.foundation.shape.CircleShape
|
||||||
|
import androidx.compose.foundation.shape.RoundedCornerShape
|
||||||
|
import androidx.compose.material3.Text
|
||||||
|
import androidx.compose.runtime.Composable
|
||||||
|
import androidx.compose.ui.Alignment
|
||||||
|
import androidx.compose.ui.Modifier
|
||||||
|
import androidx.compose.ui.draw.clip
|
||||||
|
import androidx.compose.ui.graphics.Color
|
||||||
|
import androidx.compose.ui.text.font.FontWeight
|
||||||
|
import androidx.compose.ui.unit.Dp
|
||||||
|
import androidx.compose.ui.unit.dp
|
||||||
|
import androidx.compose.ui.unit.sp
|
||||||
|
import com.unibus.app.ui.theme.Brand5
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Avatar con iniciales, equivalente al <Avatar> de la web. [rounded] = esquinas
|
||||||
|
* (rooms/chat header) vs círculo (usuarios). [accent] colorea el de marca.
|
||||||
|
*/
|
||||||
|
@Composable
|
||||||
|
fun InitialsAvatar(
|
||||||
|
text: String,
|
||||||
|
size: Dp = 42.dp,
|
||||||
|
rounded: Boolean = true,
|
||||||
|
accent: Boolean = false,
|
||||||
|
modifier: Modifier = Modifier,
|
||||||
|
) {
|
||||||
|
val shape = if (rounded) RoundedCornerShape((size.value * 0.28f).dp) else CircleShape
|
||||||
|
val bg = if (accent) Brand5 else Color(0xFF3A3D44) // gris neutro tipo Avatar color="gray"
|
||||||
|
Box(
|
||||||
|
modifier = modifier
|
||||||
|
.size(size)
|
||||||
|
.clip(shape)
|
||||||
|
.background(bg),
|
||||||
|
contentAlignment = Alignment.Center,
|
||||||
|
) {
|
||||||
|
Text(
|
||||||
|
text = initials(text),
|
||||||
|
color = Color.White,
|
||||||
|
fontWeight = FontWeight.SemiBold,
|
||||||
|
fontSize = (size.value * 0.36f).sp,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
package com.unibus.app.ui
|
||||||
|
|
||||||
|
import androidx.compose.foundation.background
|
||||||
|
import androidx.compose.foundation.layout.Arrangement
|
||||||
|
import androidx.compose.foundation.layout.Box
|
||||||
|
import androidx.compose.foundation.layout.Column
|
||||||
|
import androidx.compose.foundation.layout.fillMaxSize
|
||||||
|
import androidx.compose.foundation.layout.fillMaxWidth
|
||||||
|
import androidx.compose.foundation.layout.padding
|
||||||
|
import androidx.compose.foundation.layout.size
|
||||||
|
import androidx.compose.foundation.shape.CircleShape
|
||||||
|
import androidx.compose.foundation.text.KeyboardActions
|
||||||
|
import androidx.compose.foundation.text.KeyboardOptions
|
||||||
|
import androidx.compose.material.icons.Icons
|
||||||
|
import androidx.compose.material.icons.filled.Lock
|
||||||
|
import androidx.compose.material.icons.filled.VpnKey
|
||||||
|
import androidx.compose.material3.Button
|
||||||
|
import androidx.compose.material3.Card
|
||||||
|
import androidx.compose.material3.CardDefaults
|
||||||
|
import androidx.compose.material3.CircularProgressIndicator
|
||||||
|
import androidx.compose.material3.Icon
|
||||||
|
import androidx.compose.material3.OutlinedTextField
|
||||||
|
import androidx.compose.material3.Text
|
||||||
|
import androidx.compose.runtime.Composable
|
||||||
|
import androidx.compose.runtime.getValue
|
||||||
|
import androidx.compose.runtime.mutableStateOf
|
||||||
|
import androidx.compose.runtime.remember
|
||||||
|
import androidx.compose.runtime.setValue
|
||||||
|
import androidx.compose.ui.Alignment
|
||||||
|
import androidx.compose.ui.Modifier
|
||||||
|
import androidx.compose.ui.draw.clip
|
||||||
|
import androidx.compose.ui.text.input.ImeAction
|
||||||
|
import androidx.compose.ui.text.input.PasswordVisualTransformation
|
||||||
|
import androidx.compose.ui.text.style.TextAlign
|
||||||
|
import androidx.compose.ui.unit.dp
|
||||||
|
import androidx.compose.ui.unit.sp
|
||||||
|
import com.unibus.app.ui.theme.Brand4
|
||||||
|
import com.unibus.app.ui.theme.Dark7
|
||||||
|
import com.unibus.app.ui.theme.Dark9
|
||||||
|
import com.unibus.app.ui.theme.LocalUnibusColors
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun LoginScreen(
|
||||||
|
connecting: Boolean,
|
||||||
|
error: String?,
|
||||||
|
onLogin: (handle: String, password: String) -> Unit,
|
||||||
|
) {
|
||||||
|
val colors = LocalUnibusColors.current
|
||||||
|
var handle by remember { mutableStateOf("") }
|
||||||
|
var password by remember { mutableStateOf("") }
|
||||||
|
val ready = handle.trim().isNotEmpty() && password.isNotEmpty() && !connecting
|
||||||
|
|
||||||
|
fun submit() {
|
||||||
|
if (ready) onLogin(handle.trim(), password)
|
||||||
|
}
|
||||||
|
|
||||||
|
Box(
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxSize()
|
||||||
|
.background(Dark9),
|
||||||
|
contentAlignment = Alignment.Center,
|
||||||
|
) {
|
||||||
|
Card(
|
||||||
|
modifier = Modifier
|
||||||
|
.padding(24.dp)
|
||||||
|
.fillMaxWidth(),
|
||||||
|
colors = CardDefaults.cardColors(containerColor = Dark7),
|
||||||
|
shape = androidx.compose.foundation.shape.RoundedCornerShape(16.dp),
|
||||||
|
) {
|
||||||
|
Column(
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxWidth()
|
||||||
|
.padding(28.dp),
|
||||||
|
horizontalAlignment = Alignment.CenterHorizontally,
|
||||||
|
verticalArrangement = Arrangement.spacedBy(18.dp),
|
||||||
|
) {
|
||||||
|
// ThemeIcon "light brand" — círculo translúcido con candado.
|
||||||
|
Box(
|
||||||
|
modifier = Modifier
|
||||||
|
.size(60.dp)
|
||||||
|
.clip(CircleShape)
|
||||||
|
.background(Brand4.copy(alpha = 0.18f)),
|
||||||
|
contentAlignment = Alignment.Center,
|
||||||
|
) {
|
||||||
|
Icon(
|
||||||
|
Icons.Filled.Lock,
|
||||||
|
contentDescription = null,
|
||||||
|
tint = Brand4,
|
||||||
|
modifier = Modifier.size(30.dp),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
Column(horizontalAlignment = Alignment.CenterHorizontally) {
|
||||||
|
Text("unibus", fontSize = 26.sp, color = Brand4)
|
||||||
|
Text(
|
||||||
|
"Mensajería cifrada de extremo a extremo",
|
||||||
|
color = colors.dimmed,
|
||||||
|
fontSize = 13.sp,
|
||||||
|
textAlign = TextAlign.Center,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
OutlinedTextField(
|
||||||
|
value = handle,
|
||||||
|
onValueChange = { handle = it },
|
||||||
|
label = { Text("Identidad") },
|
||||||
|
placeholder = { Text("tu-handle") },
|
||||||
|
singleLine = true,
|
||||||
|
modifier = Modifier.fillMaxWidth(),
|
||||||
|
keyboardOptions = KeyboardOptions(imeAction = ImeAction.Next),
|
||||||
|
)
|
||||||
|
|
||||||
|
OutlinedTextField(
|
||||||
|
value = password,
|
||||||
|
onValueChange = { password = it },
|
||||||
|
label = { Text("Contraseña") },
|
||||||
|
placeholder = { Text("••••••••") },
|
||||||
|
singleLine = true,
|
||||||
|
visualTransformation = PasswordVisualTransformation(),
|
||||||
|
leadingIcon = { Icon(Icons.Filled.VpnKey, contentDescription = null) },
|
||||||
|
modifier = Modifier.fillMaxWidth(),
|
||||||
|
keyboardOptions = KeyboardOptions(imeAction = ImeAction.Go),
|
||||||
|
keyboardActions = KeyboardActions(onGo = { submit() }),
|
||||||
|
)
|
||||||
|
Text(
|
||||||
|
"Desbloquea tu identidad cifrada en este dispositivo",
|
||||||
|
color = colors.dimmed,
|
||||||
|
fontSize = 12.sp,
|
||||||
|
modifier = Modifier.fillMaxWidth(),
|
||||||
|
)
|
||||||
|
|
||||||
|
if (error != null) {
|
||||||
|
Text(error, color = androidx.compose.ui.graphics.Color(0xFFFF6B6B), fontSize = 13.sp)
|
||||||
|
}
|
||||||
|
|
||||||
|
Button(
|
||||||
|
onClick = { submit() },
|
||||||
|
enabled = ready,
|
||||||
|
modifier = Modifier.fillMaxWidth(),
|
||||||
|
) {
|
||||||
|
if (connecting) {
|
||||||
|
CircularProgressIndicator(
|
||||||
|
modifier = Modifier.size(18.dp),
|
||||||
|
strokeWidth = 2.dp,
|
||||||
|
color = androidx.compose.ui.graphics.Color.White,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
Text("Conectar")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,199 @@
|
|||||||
|
package com.unibus.app.ui
|
||||||
|
|
||||||
|
import androidx.compose.foundation.background
|
||||||
|
import androidx.compose.foundation.clickable
|
||||||
|
import androidx.compose.foundation.layout.Arrangement
|
||||||
|
import androidx.compose.foundation.layout.Box
|
||||||
|
import androidx.compose.foundation.layout.Column
|
||||||
|
import androidx.compose.foundation.layout.Row
|
||||||
|
import androidx.compose.foundation.layout.fillMaxSize
|
||||||
|
import androidx.compose.foundation.layout.fillMaxWidth
|
||||||
|
import androidx.compose.foundation.layout.padding
|
||||||
|
import androidx.compose.foundation.layout.size
|
||||||
|
import androidx.compose.foundation.lazy.LazyColumn
|
||||||
|
import androidx.compose.foundation.lazy.items
|
||||||
|
import androidx.compose.foundation.shape.CircleShape
|
||||||
|
import androidx.compose.foundation.shape.RoundedCornerShape
|
||||||
|
import androidx.compose.material.icons.Icons
|
||||||
|
import androidx.compose.material.icons.automirrored.filled.Logout
|
||||||
|
import androidx.compose.material.icons.filled.Lock
|
||||||
|
import androidx.compose.material.icons.filled.MoreVert
|
||||||
|
import androidx.compose.material.icons.filled.Search
|
||||||
|
import androidx.compose.material.icons.filled.Tag
|
||||||
|
import androidx.compose.material3.Badge
|
||||||
|
import androidx.compose.material3.DropdownMenu
|
||||||
|
import androidx.compose.material3.DropdownMenuItem
|
||||||
|
import androidx.compose.material3.HorizontalDivider
|
||||||
|
import androidx.compose.material3.Icon
|
||||||
|
import androidx.compose.material3.IconButton
|
||||||
|
import androidx.compose.material3.OutlinedTextField
|
||||||
|
import androidx.compose.material3.Text
|
||||||
|
import androidx.compose.runtime.Composable
|
||||||
|
import androidx.compose.runtime.getValue
|
||||||
|
import androidx.compose.runtime.mutableStateOf
|
||||||
|
import androidx.compose.runtime.remember
|
||||||
|
import androidx.compose.runtime.setValue
|
||||||
|
import androidx.compose.ui.Alignment
|
||||||
|
import androidx.compose.ui.Modifier
|
||||||
|
import androidx.compose.ui.draw.clip
|
||||||
|
import androidx.compose.ui.graphics.Color
|
||||||
|
import androidx.compose.ui.text.font.FontWeight
|
||||||
|
import androidx.compose.ui.text.style.TextOverflow
|
||||||
|
import androidx.compose.ui.unit.dp
|
||||||
|
import androidx.compose.ui.unit.sp
|
||||||
|
import com.unibus.app.data.Room
|
||||||
|
import com.unibus.app.data.User
|
||||||
|
import com.unibus.app.ui.theme.LocalUnibusColors
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun RoomListScreen(
|
||||||
|
user: User,
|
||||||
|
rooms: List<Room>,
|
||||||
|
onSelect: (String) -> Unit,
|
||||||
|
onLogout: () -> Unit,
|
||||||
|
) {
|
||||||
|
val colors = LocalUnibusColors.current
|
||||||
|
var query by remember { mutableStateOf("") }
|
||||||
|
val q = query.trim().lowercase()
|
||||||
|
val filtered = if (q.isEmpty()) rooms else rooms.filter {
|
||||||
|
it.name.lowercase().contains(q) || it.messages.any { m -> m.body.lowercase().contains(q) }
|
||||||
|
}
|
||||||
|
|
||||||
|
Column(
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxSize()
|
||||||
|
.background(colors.sidebarBg),
|
||||||
|
) {
|
||||||
|
// Header: avatar + handle + menú
|
||||||
|
Row(
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxWidth()
|
||||||
|
.padding(horizontal = 12.dp, vertical = 10.dp),
|
||||||
|
verticalAlignment = Alignment.CenterVertically,
|
||||||
|
) {
|
||||||
|
InitialsAvatar(user.handle, size = 36.dp, rounded = false, accent = true)
|
||||||
|
Text(
|
||||||
|
user.handle,
|
||||||
|
fontWeight = FontWeight.SemiBold,
|
||||||
|
fontSize = 15.sp,
|
||||||
|
color = Color.White,
|
||||||
|
maxLines = 1,
|
||||||
|
overflow = TextOverflow.Ellipsis,
|
||||||
|
modifier = Modifier
|
||||||
|
.weight(1f)
|
||||||
|
.padding(start = 10.dp),
|
||||||
|
)
|
||||||
|
var menuOpen by remember { mutableStateOf(false) }
|
||||||
|
Box {
|
||||||
|
IconButton(onClick = { menuOpen = true }) {
|
||||||
|
Icon(Icons.Filled.MoreVert, contentDescription = "Menú", tint = colors.dimmed)
|
||||||
|
}
|
||||||
|
DropdownMenu(expanded = menuOpen, onDismissRequest = { menuOpen = false }) {
|
||||||
|
DropdownMenuItem(
|
||||||
|
text = { Text("Desconectar") },
|
||||||
|
onClick = { menuOpen = false; onLogout() },
|
||||||
|
leadingIcon = {
|
||||||
|
Icon(Icons.AutoMirrored.Filled.Logout, contentDescription = null, modifier = Modifier.size(18.dp))
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Buscador
|
||||||
|
OutlinedTextField(
|
||||||
|
value = query,
|
||||||
|
onValueChange = { query = it },
|
||||||
|
placeholder = { Text("Buscar rooms, usuarios, mensajes…") },
|
||||||
|
leadingIcon = { Icon(Icons.Filled.Search, contentDescription = null, modifier = Modifier.size(18.dp)) },
|
||||||
|
singleLine = true,
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxWidth()
|
||||||
|
.padding(horizontal = 12.dp, vertical = 4.dp),
|
||||||
|
)
|
||||||
|
|
||||||
|
HorizontalDivider(color = colors.divider)
|
||||||
|
|
||||||
|
if (filtered.isEmpty()) {
|
||||||
|
Text(
|
||||||
|
"Sin resultados",
|
||||||
|
color = colors.dimmed,
|
||||||
|
fontSize = 14.sp,
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxWidth()
|
||||||
|
.padding(top = 24.dp),
|
||||||
|
textAlign = androidx.compose.ui.text.style.TextAlign.Center,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
LazyColumn(
|
||||||
|
modifier = Modifier.fillMaxSize(),
|
||||||
|
contentPadding = androidx.compose.foundation.layout.PaddingValues(6.dp),
|
||||||
|
verticalArrangement = Arrangement.spacedBy(2.dp),
|
||||||
|
) {
|
||||||
|
items(filtered, key = { it.id }) { room ->
|
||||||
|
RoomItem(room = room, onClick = { onSelect(room.id) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
private fun RoomItem(room: Room, onClick: () -> Unit) {
|
||||||
|
val colors = LocalUnibusColors.current
|
||||||
|
Row(
|
||||||
|
modifier = Modifier
|
||||||
|
.fillMaxWidth()
|
||||||
|
.clip(RoundedCornerShape(10.dp))
|
||||||
|
.clickable(onClick = onClick)
|
||||||
|
.padding(8.dp),
|
||||||
|
verticalAlignment = Alignment.CenterVertically,
|
||||||
|
) {
|
||||||
|
InitialsAvatar(room.name, size = 46.dp, rounded = true)
|
||||||
|
Column(
|
||||||
|
modifier = Modifier
|
||||||
|
.weight(1f)
|
||||||
|
.padding(start = 10.dp),
|
||||||
|
) {
|
||||||
|
Row(verticalAlignment = Alignment.CenterVertically) {
|
||||||
|
Icon(
|
||||||
|
if (room.encrypted) Icons.Filled.Lock else Icons.Filled.Tag,
|
||||||
|
contentDescription = if (room.encrypted) "cifrada" else "abierta",
|
||||||
|
tint = colors.dimmed,
|
||||||
|
modifier = Modifier.size(13.dp),
|
||||||
|
)
|
||||||
|
Text(
|
||||||
|
room.name,
|
||||||
|
fontWeight = FontWeight.SemiBold,
|
||||||
|
fontSize = 14.sp,
|
||||||
|
color = Color.White,
|
||||||
|
maxLines = 1,
|
||||||
|
overflow = TextOverflow.Ellipsis,
|
||||||
|
modifier = Modifier
|
||||||
|
.weight(1f)
|
||||||
|
.padding(start = 4.dp),
|
||||||
|
)
|
||||||
|
Text(timeShort(room.lastTs), color = colors.dimmed, fontSize = 11.sp)
|
||||||
|
}
|
||||||
|
Row(
|
||||||
|
verticalAlignment = Alignment.CenterVertically,
|
||||||
|
modifier = Modifier.padding(top = 2.dp),
|
||||||
|
) {
|
||||||
|
Text(
|
||||||
|
room.lastMessage,
|
||||||
|
color = colors.dimmed,
|
||||||
|
fontSize = 12.sp,
|
||||||
|
maxLines = 1,
|
||||||
|
overflow = TextOverflow.Ellipsis,
|
||||||
|
modifier = Modifier.weight(1f),
|
||||||
|
)
|
||||||
|
if (room.unread > 0) {
|
||||||
|
Badge(
|
||||||
|
containerColor = colors.brand,
|
||||||
|
contentColor = Color.White,
|
||||||
|
) { Text(room.unread.toString()) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
package com.unibus.app.ui
|
||||||
|
|
||||||
|
import java.util.Calendar
|
||||||
|
|
||||||
|
/** Iniciales (hasta 2 letras/dígitos) para los avatares, igual que la web. */
|
||||||
|
fun initials(s: String): String {
|
||||||
|
val cleaned = s.filter { it.isLetterOrDigit() }
|
||||||
|
return if (cleaned.isEmpty()) "?" else cleaned.take(2).uppercase()
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Hora corta HH:mm a partir de epoch ms. */
|
||||||
|
fun timeShort(ts: Long): String {
|
||||||
|
val c = Calendar.getInstance().apply { timeInMillis = ts }
|
||||||
|
val h = c.get(Calendar.HOUR_OF_DAY).toString().padStart(2, '0')
|
||||||
|
val min = c.get(Calendar.MINUTE).toString().padStart(2, '0')
|
||||||
|
return "$h:$min"
|
||||||
|
}
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
package com.unibus.app.ui.theme
|
||||||
|
|
||||||
|
import androidx.compose.foundation.isSystemInDarkTheme
|
||||||
|
import androidx.compose.material3.MaterialTheme
|
||||||
|
import androidx.compose.material3.Typography
|
||||||
|
import androidx.compose.material3.darkColorScheme
|
||||||
|
import androidx.compose.runtime.Composable
|
||||||
|
import androidx.compose.runtime.staticCompositionLocalOf
|
||||||
|
import androidx.compose.ui.graphics.Color
|
||||||
|
import androidx.compose.ui.text.font.FontWeight
|
||||||
|
import androidx.compose.ui.unit.sp
|
||||||
|
|
||||||
|
// --- Brand: índigo/violeta de unibus (mismos tonos que el tema Mantine de la web) ---
|
||||||
|
val Brand2 = Color(0xFFB5A3F5) // brand.2
|
||||||
|
val Brand3 = Color(0xFF8D70ED) // brand.3 — legible sobre fondo oscuro
|
||||||
|
val Brand4 = Color(0xFF6C47E6) // brand.4 — acento principal
|
||||||
|
val Brand5 = Color(0xFF5A2FE2) // brand.5 — filled
|
||||||
|
|
||||||
|
// --- Grises oscuros equivalentes a la escala dark.* de Mantine ---
|
||||||
|
val Dark9 = Color(0xFF101113) // fondo de la app (login)
|
||||||
|
val Dark8 = Color(0xFF141517) // sidebar / lista de rooms
|
||||||
|
val Dark7 = Color(0xFF1A1B1E) // panel de chat / superficie
|
||||||
|
val Dark6 = Color(0xFF25262B) // item activo / elevado
|
||||||
|
val Dark5 = Color(0xFF2C2E33) // campos de entrada
|
||||||
|
val Dark4 = Color(0xFF373A40) // bordes / divisores
|
||||||
|
val Dimmed = Color(0xFF909296) // texto secundario
|
||||||
|
val OnSurface = Color(0xFFE3E3E6) // texto principal
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tokens de color que Material 3 no expresa directamente y que la UI replica de
|
||||||
|
* la web (matices dark.6/7/8/9, color "dimmed", borde). Se exponen vía un
|
||||||
|
* CompositionLocal para que cualquier composable los lea sin prop-drilling.
|
||||||
|
*/
|
||||||
|
data class UnibusColors(
|
||||||
|
val appBg: Color = Dark9,
|
||||||
|
val sidebarBg: Color = Dark8,
|
||||||
|
val chatBg: Color = Dark7,
|
||||||
|
val activeItem: Color = Dark6,
|
||||||
|
val field: Color = Dark5,
|
||||||
|
val divider: Color = Dark4,
|
||||||
|
val dimmed: Color = Dimmed,
|
||||||
|
val brand: Color = Brand4,
|
||||||
|
)
|
||||||
|
|
||||||
|
val LocalUnibusColors = staticCompositionLocalOf { UnibusColors() }
|
||||||
|
|
||||||
|
private val UnibusDarkScheme = darkColorScheme(
|
||||||
|
primary = Brand4,
|
||||||
|
onPrimary = Color.White,
|
||||||
|
primaryContainer = Brand5,
|
||||||
|
onPrimaryContainer = Color.White,
|
||||||
|
secondary = Brand3,
|
||||||
|
background = Dark9,
|
||||||
|
onBackground = OnSurface,
|
||||||
|
surface = Dark7,
|
||||||
|
onSurface = OnSurface,
|
||||||
|
surfaceVariant = Dark6,
|
||||||
|
onSurfaceVariant = Dimmed,
|
||||||
|
outline = Dark4,
|
||||||
|
error = Color(0xFFFF6B6B),
|
||||||
|
)
|
||||||
|
|
||||||
|
private val UnibusTypography = Typography(
|
||||||
|
titleLarge = Typography().titleLarge.copy(fontWeight = FontWeight(650)),
|
||||||
|
titleMedium = Typography().titleMedium.copy(fontWeight = FontWeight(650)),
|
||||||
|
bodyMedium = Typography().bodyMedium.copy(fontSize = 14.sp),
|
||||||
|
labelLarge = Typography().labelLarge.copy(fontWeight = FontWeight.SemiBold),
|
||||||
|
)
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun UnibusTheme(content: @Composable () -> Unit) {
|
||||||
|
// unibus es dark-first; ignoramos el modo del sistema a propósito.
|
||||||
|
@Suppress("UNUSED_EXPRESSION")
|
||||||
|
isSystemInDarkTheme()
|
||||||
|
MaterialTheme(
|
||||||
|
colorScheme = UnibusDarkScheme,
|
||||||
|
typography = UnibusTypography,
|
||||||
|
content = content,
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||||
|
android:width="108dp"
|
||||||
|
android:height="108dp"
|
||||||
|
android:viewportWidth="108"
|
||||||
|
android:viewportHeight="108">
|
||||||
|
<!-- Material "lock" glyph, white, centered in the adaptive-icon safe zone.
|
||||||
|
24dp source scaled x3 (=72dp) and translated by 18 to center it. -->
|
||||||
|
<group
|
||||||
|
android:scaleX="3"
|
||||||
|
android:scaleY="3"
|
||||||
|
android:translateX="18"
|
||||||
|
android:translateY="18">
|
||||||
|
<path
|
||||||
|
android:fillColor="#FFFFFF"
|
||||||
|
android:pathData="M12,17c1.1,0 2,-0.9 2,-2s-0.9,-2 -2,-2 -2,0.9 -2,2 0.9,2 2,2zM18,8h-1V6c0,-2.76 -2.24,-5 -5,-5S7,3.24 7,6v2H6c-1.1,0 -2,0.9 -2,2v10c0,1.1 0.9,2 2,2h12c1.1,0 2,-0.9 2,-2V10c0,-1.1 -0.9,-2 -2,-2zM9,6c0,-1.66 1.34,-3 3,-3s3,1.34 3,3v2H9V6z" />
|
||||||
|
</group>
|
||||||
|
</vector>
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||||
|
<background android:drawable="@color/unibus_brand" />
|
||||||
|
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||||
|
</adaptive-icon>
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||||
|
<background android:drawable="@color/unibus_brand" />
|
||||||
|
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||||
|
</adaptive-icon>
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<resources>
|
||||||
|
<!-- dark.9 — app background -->
|
||||||
|
<color name="unibus_bg">#101113</color>
|
||||||
|
<!-- brand.5 — índigo/violeta accent, used as launcher icon background -->
|
||||||
|
<color name="unibus_brand">#5A2FE2</color>
|
||||||
|
</resources>
|
||||||
@@ -1,6 +1,11 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<resources>
|
<resources>
|
||||||
<!-- A minimal Material3 base theme; the real UI styling is driven by Compose
|
<!-- Compose-only host theme: no action bar, dark window background matching
|
||||||
Material3 (MaterialTheme) at runtime. -->
|
the app's dark.9 surface so there is no white flash before Compose draws. -->
|
||||||
<style name="Theme.Unibus" parent="android:Theme.Material.NoActionBar" />
|
<style name="Theme.Unibus" parent="android:Theme.Material.NoActionBar">
|
||||||
|
<item name="android:windowBackground">@color/unibus_bg</item>
|
||||||
|
<item name="android:statusBarColor">@color/unibus_bg</item>
|
||||||
|
<item name="android:navigationBarColor">@color/unibus_bg</item>
|
||||||
|
<item name="android:windowLightStatusBar">false</item>
|
||||||
|
</style>
|
||||||
</resources>
|
</resources>
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
// Top-level build file. Plugin versions are declared here and applied in the
|
|
||||||
// module build scripts. AGP 8.5 + Kotlin 2.0 (with the dedicated Compose
|
|
||||||
// compiler plugin) target the locally installed SDK (compileSdk 34).
|
|
||||||
plugins {
|
plugins {
|
||||||
id("com.android.application") version "8.5.2" apply false
|
id("com.android.application") version "8.5.2" apply false
|
||||||
id("org.jetbrains.kotlin.android") version "2.0.21" apply false
|
id("org.jetbrains.kotlin.android") version "1.9.24" apply false
|
||||||
id("org.jetbrains.kotlin.plugin.compose") version "2.0.21" apply false
|
id("org.jetbrains.kotlin.plugin.serialization") version "1.9.24" apply false
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
|
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
|
||||||
org.gradle.caching=true
|
|
||||||
android.useAndroidX=true
|
android.useAndroidX=true
|
||||||
android.nonTransitiveRClass=true
|
android.nonTransitiveRClass=true
|
||||||
kotlin.code.style=official
|
kotlin.code.style=official
|
||||||
|
org.gradle.caching=true
|
||||||
|
|||||||
BIN
Binary file not shown.
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip
|
||||||
networkTimeout=10000
|
networkTimeout=10000
|
||||||
validateDistributionUrl=true
|
validateDistributionUrl=true
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
|
|||||||
Vendored
+2
-5
@@ -15,8 +15,6 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
#
|
#
|
||||||
@@ -57,7 +55,7 @@
|
|||||||
# Darwin, MinGW, and NonStop.
|
# Darwin, MinGW, and NonStop.
|
||||||
#
|
#
|
||||||
# (3) This script is generated from the Groovy template
|
# (3) This script is generated from the Groovy template
|
||||||
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||||
# within the Gradle project.
|
# within the Gradle project.
|
||||||
#
|
#
|
||||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||||
@@ -86,8 +84,7 @@ done
|
|||||||
# shellcheck disable=SC2034
|
# shellcheck disable=SC2034
|
||||||
APP_BASE_NAME=${0##*/}
|
APP_BASE_NAME=${0##*/}
|
||||||
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||||
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
|
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
|
||||||
' "$PWD" ) || exit
|
|
||||||
|
|
||||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||||
MAX_FD=maximum
|
MAX_FD=maximum
|
||||||
|
|||||||
Vendored
-2
@@ -13,8 +13,6 @@
|
|||||||
@rem See the License for the specific language governing permissions and
|
@rem See the License for the specific language governing permissions and
|
||||||
@rem limitations under the License.
|
@rem limitations under the License.
|
||||||
@rem
|
@rem
|
||||||
@rem SPDX-License-Identifier: Apache-2.0
|
|
||||||
@rem
|
|
||||||
|
|
||||||
@if "%DEBUG%"=="" @echo off
|
@if "%DEBUG%"=="" @echo off
|
||||||
@rem ##########################################################################
|
@rem ##########################################################################
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ pluginManagement {
|
|||||||
gradlePluginPortal()
|
gradlePluginPortal()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dependencyResolutionManagement {
|
dependencyResolutionManagement {
|
||||||
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
|
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
|
||||||
repositories {
|
repositories {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
name: unibus
|
name: unibus
|
||||||
lang: go
|
lang: go
|
||||||
domain: infra
|
domain: infra
|
||||||
version: 0.4.0
|
version: 0.8.0
|
||||||
description: "Bus de mensajería unificado sobre NATS+JetStream con cifrado E2E por room (megolm/olm reducido): service de membresía/claves, librería cliente y peers demo."
|
description: "Bus de mensajería unificado sobre NATS+JetStream con cifrado E2E por room (megolm/olm reducido): service de membresía/claves, librería cliente y peers demo."
|
||||||
tags: [service, messaging, nats, e2e]
|
tags: [service, messaging, nats, e2e]
|
||||||
uses_functions:
|
uses_functions:
|
||||||
@@ -154,6 +154,84 @@ agent.<nombre>.{in,out} inbox/outbox de agente LLM (agent.scout.in)
|
|||||||
|
|
||||||
## Capability growth log
|
## Capability growth log
|
||||||
|
|
||||||
|
- v0.8.0 (2026-06-07) — completar y endurecer el cluster (issue 0006, fases
|
||||||
|
0006a–0006g) que cierra los bloqueantes de la auditoría dedicada del cluster
|
||||||
|
(report 0008) y cablea el control plane descentralizado que 0003 dejó a medias.
|
||||||
|
(0006a) Se cablea el nonce replicado en el binario: un nodo con `--cluster-name`
|
||||||
|
usa el bucket JetStream KV compartido obligatoriamente (fail-fast si no se crea),
|
||||||
|
cerrando el replay cross-node (N3); el "ciclo bootstrap" se resuelve con una
|
||||||
|
identidad interna efímera que el authenticator reconoce (full perms) y una
|
||||||
|
conexión in-process privilegiada. (0006b) Se cierra la fuga del control plane
|
||||||
|
por `$JS.API.>` (N2): la ACL pasa a un allow-set cerrado por-room (JS API solo de
|
||||||
|
los streams `UNIBUS_<room>` del peer), dejando `KV_UNIBUS_*`/`OBJ_*` fuera del
|
||||||
|
set y, por tanto, denegados. (0006c) Se cablea el store KV descentralizado
|
||||||
|
(`--store kv|sqlite`, default sqlite = baseline idéntico) con un `storeHolder`
|
||||||
|
fail-closed que rompe el ciclo bootstrap del authenticator. (0006d) Posture
|
||||||
|
homogénea: un nodo rechaza unirse al cluster sin `enforce`, y `/healthz` publica
|
||||||
|
la posture (N1). (0006e) Todos los clientes llaman `RefreshSession` tras cambios
|
||||||
|
de membresía (N4), de modo que la ACL es usable bajo enforce sin desactivarla.
|
||||||
|
(0006f) Bajos: secreto de cluster fuera de argv (`--cluster-pass-file`/env +
|
||||||
|
inyección en routes), `migrate-to-kv` rechaza target remoto sin `--ca`, y docs
|
||||||
|
de CA separada para routes + R1 SPOF vs R3 HA. (0006g) Material de deploy del
|
||||||
|
cluster de 3 nodos (magnus+homer+datardos) en `deploy/cluster/` (certs, unit,
|
||||||
|
script de despliegue dry-run, runbook) — sin tocar ningún VPS. Toda la
|
||||||
|
regresión de auditorías previas + los ataques 0008 siguen verdes; govulncheck 0
|
||||||
|
alcanzables. Branch-by-abstraction: con `--store sqlite` el single-node sigue
|
||||||
|
idéntico y desplegable en todo momento.
|
||||||
|
- v0.7.0 (2026-06-07) — hardening de seguridad 2 (issue 0005, fases 0005a–0005e)
|
||||||
|
que cierra los hallazgos nuevos de la re-auditoría red-team (report 0006) y
|
||||||
|
lleva el veredicto de exposición pública a "sí-con-condiciones". (0005a) Bump de
|
||||||
|
`github.com/nats-io/nats-server/v2` v2.10.22→v2.11.15 y de la toolchain a
|
||||||
|
go1.26.4: `govulncheck ./...` pasa de 16 vulnerabilidades alcanzables (14 del
|
||||||
|
servidor NATS embebido + 2 de la stdlib) a 0. (0005b) `client.processFrame`
|
||||||
|
ahora descarta cualquier frame sin firma en una room `SignMsgs` (antes verificaba
|
||||||
|
solo si la firma venía presente, lo que permitía suplantar `Sender` con
|
||||||
|
`Sig==nil`). (0005c) Nuevo limiter global de bytes en vuelo
|
||||||
|
(`pkg/membership.inflightLimiter`) que acota la memoria agregada que el control
|
||||||
|
plane bufferiza bajo concurrencia (el límite por-request y el rate-limit por-IP
|
||||||
|
no acotaban el total): un flood concurrente multi-IP se descarta con 503 en vez
|
||||||
|
de crecer sin techo (el RSS deja de escalar con N). (0005d) El guard de arranque
|
||||||
|
`validateBootConfig` ahora exige `--tls-cert/--tls-key` en bind no-loopback (un
|
||||||
|
control plane público sin TLS servía metadata en claro). (0005e) Se cablea por
|
||||||
|
fin en `membershipd` la ACL por subject que ya existía huérfana desde 0003e
|
||||||
|
(`busauth.NewNkeyAuthenticatorACL` + nuevo adaptador `busauth.PermissionsFromSubjects`
|
||||||
|
sobre `membership.SubjectACLFor`): un registrado no-miembro ya no puede
|
||||||
|
`Subscribe(">")` y captar los subjects/advisories de rooms ajenas. Residuales
|
||||||
|
documentados: `$JS.API.>` sigue compartido (cierre completo = NATS accounts por
|
||||||
|
identidad, diferido) y los clientes deben `RefreshSession` tras cambios de
|
||||||
|
membresía (chat/worker aún no lo hacen). El comportamiento de un solo nodo no
|
||||||
|
cambia y master sigue verde.
|
||||||
|
- v0.6.0 (2026-06-07) — descentralización / alta disponibilidad (issue 0003,
|
||||||
|
fases 0003a–0003e), report 0006. El servidor NATS embebido gana soporte de
|
||||||
|
cluster con routes autenticadas (secreto de cluster) y TLS mutuo de nodo
|
||||||
|
(`pkg/embeddednats.ClusterConfig` + `busauth.RouteTLSConfig`, reusando la CA
|
||||||
|
del 0001). El control plane (`pkg/membership.Store`) pasa a interfaz por
|
||||||
|
branch-by-abstraction: `sqliteStore` (default) + `jetstreamStore` nuevo sobre
|
||||||
|
JetStream KV replicado (réplicas configurables R1→R3), con `IsAuthorized`
|
||||||
|
fail-closed ante pérdida de quorum. `membershipd migrate-to-kv` mueve el
|
||||||
|
estado SQLite→KV de forma idempotente con backup previo. Los blobs
|
||||||
|
(`pkg/blobstore.Store`, ahora interfaz) ganan un backend NATS Object Store
|
||||||
|
replicado además del disco. El cliente acepta listas de seeds NATS y de
|
||||||
|
control planes con failover/reconnect nativo, el anti-replay pasa a un store
|
||||||
|
de nonces compartido en KV con TTL (cierra el agujero de replay multi-nodo), y
|
||||||
|
se implementa la ACL por subject derivada de pertenencia (audit H4 residual:
|
||||||
|
`busauth.NewNkeyAuthenticatorACL` + `membership.SubjectACLFor` +
|
||||||
|
`client.RefreshSession`). Todo viaja detrás del flag `decentralized` (off):
|
||||||
|
el comportamiento de un solo nodo (SQLite + disco) no cambia y master sigue
|
||||||
|
verde. El despliegue multi-nodo real (0003f) lo ejecuta el humano.
|
||||||
|
- v0.5.0 (2026-06-07) — hardening de seguridad (issue 0004) que cierra los
|
||||||
|
hallazgos de la auditoría red-team (report 0004) y lleva el veredicto de
|
||||||
|
exposición pública de "NO" a "sí-con-condiciones". Anti-DoS pre-auth
|
||||||
|
(`http.MaxBytesReader` por ruta + rechazo por `Content-Length` + rate-limit
|
||||||
|
por IP + `MaxHeaderBytes`); guard de fail-open que prohíbe arrancar con bind
|
||||||
|
público o TLS sin `--bus-auth enforce`; autorización por pertenencia en los GET
|
||||||
|
de room (metadata y clave sellada solo para miembros / el propio endpoint);
|
||||||
|
rooms cleartext deshabilitadas en bind público (contenido siempre E2E, mínimo
|
||||||
|
defensivo del data plane mientras la ACL por subject llega con 0003); TLS en el
|
||||||
|
control plane HTTP con la CA propia y cliente que exige `https` cuando hay CA;
|
||||||
|
y los medios H6/H7/H12 (owner ligado al firmante, `IsAuthorized` antes del
|
||||||
|
nonce-cache con poda O(expired) + cap, errores genéricos al cliente). Cada
|
||||||
|
hallazgo lleva su test adversarial `TestAudit_*` portado como regresión.
|
||||||
- v0.4.0 (2026-06-07) — descubrimiento de rooms: `GET /members/{endpoint}/rooms`
|
- v0.4.0 (2026-06-07) — descubrimiento de rooms: `GET /members/{endpoint}/rooms`
|
||||||
lista las rooms de un endpoint con su metadata y rol, y `client.ListMyRooms()`
|
lista las rooms de un endpoint con su metadata y rol, y `client.ListMyRooms()`
|
||||||
lo consume. El control plane es pull (no hay push de invitaciones), así que un
|
lo consume. El control plane es pull (no hay push de invitaciones), así que un
|
||||||
|
|||||||
+28
-12
@@ -27,11 +27,12 @@ import (
|
|||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var (
|
var (
|
||||||
natsURL = flag.String("nats-url", "nats://127.0.0.1:4250", "NATS url")
|
natsURL = flag.String("nats-url", "nats://127.0.0.1:4250", "NATS url")
|
||||||
ctrlURL = flag.String("ctrl-url", "http://127.0.0.1:8470", "membershipd control-plane url")
|
ctrlURL = flag.String("ctrl-url", "http://127.0.0.1:8470", "membershipd control-plane url")
|
||||||
roomSub = flag.String("room", "proc.test.ticks", "room subject to subscribe to")
|
roomSub = flag.String("room", "proc.test.ticks", "room subject to subscribe to")
|
||||||
idFile = flag.String("id-file", "./local_files/chat.id", "identity file path")
|
idFile = flag.String("id-file", "./local_files/chat.id", "identity file path")
|
||||||
demoEnc = flag.Bool("demo-encrypted", false, "run the encrypted forward-secrecy demo")
|
demoEnc = flag.Bool("demo-encrypted", false, "run the encrypted forward-secrecy demo")
|
||||||
|
caFile = flag.String("ca", "", "path to the bus CA cert (ca.crt); set to connect with TLS + nkey to a secured bus")
|
||||||
)
|
)
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
@@ -39,19 +40,19 @@ func main() {
|
|||||||
log.SetPrefix("[chat] ")
|
log.SetPrefix("[chat] ")
|
||||||
|
|
||||||
if *demoEnc {
|
if *demoEnc {
|
||||||
runEncryptedDemo(*natsURL, *ctrlURL)
|
runEncryptedDemo(*natsURL, *ctrlURL, *caFile)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
runSimple(*natsURL, *ctrlURL, *roomSub, *idFile)
|
runSimple(*natsURL, *ctrlURL, *roomSub, *idFile, *caFile)
|
||||||
}
|
}
|
||||||
|
|
||||||
// runSimple subscribes to a cleartext subject and prints messages live.
|
// runSimple subscribes to a cleartext subject and prints messages live.
|
||||||
func runSimple(natsURL, ctrlURL, roomSub, idFile string) {
|
func runSimple(natsURL, ctrlURL, roomSub, idFile, caFile string) {
|
||||||
id, err := client.LoadOrCreateIdentity(idFile)
|
id, err := client.LoadOrCreateIdentity(idFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("identity: %v", err)
|
log.Fatalf("identity: %v", err)
|
||||||
}
|
}
|
||||||
c, err := client.New(natsURL, ctrlURL, id)
|
c, err := client.Connect(natsURL, ctrlURL, id, caFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("connect: %v", err)
|
log.Fatalf("connect: %v", err)
|
||||||
}
|
}
|
||||||
@@ -68,6 +69,12 @@ func runSimple(natsURL, ctrlURL, roomSub, idFile string) {
|
|||||||
if err := c.Join(roomID); err != nil {
|
if err := c.Join(roomID); err != nil {
|
||||||
log.Fatalf("join: %v", err)
|
log.Fatalf("join: %v", err)
|
||||||
}
|
}
|
||||||
|
// Membership-change contract (issue 0006e): refresh so the just-created room's
|
||||||
|
// subject is subscribable under enforce+ACL (permissions are frozen at connect
|
||||||
|
// time). Must run BEFORE Subscribe — RefreshSession drops active subscriptions.
|
||||||
|
if err := c.RefreshSession(); err != nil {
|
||||||
|
log.Fatalf("refresh session after create room: %v", err)
|
||||||
|
}
|
||||||
sub, err := c.Subscribe(roomID, func(f frame.Frame, plaintext []byte) {
|
sub, err := c.Subscribe(roomID, func(f frame.Frame, plaintext []byte) {
|
||||||
fmt.Printf("[%s] %s: %s\n", f.Subject, shortID(f.Sender), string(plaintext))
|
fmt.Printf("[%s] %s: %s\n", f.Subject, shortID(f.Sender), string(plaintext))
|
||||||
})
|
})
|
||||||
@@ -91,7 +98,7 @@ func shortID(id string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// runEncryptedDemo proves E2E encryption + forward secrecy end-to-end.
|
// runEncryptedDemo proves E2E encryption + forward secrecy end-to-end.
|
||||||
func runEncryptedDemo(natsURL, ctrlURL string) {
|
func runEncryptedDemo(natsURL, ctrlURL, caFile string) {
|
||||||
log.Printf("=== encrypted forward-secrecy demo ===")
|
log.Printf("=== encrypted forward-secrecy demo ===")
|
||||||
pass := true
|
pass := true
|
||||||
check := func(name string, ok bool) {
|
check := func(name string, ok bool) {
|
||||||
@@ -109,10 +116,10 @@ func runEncryptedDemo(natsURL, ctrlURL string) {
|
|||||||
idB, err := newEphemeralIdentity()
|
idB, err := newEphemeralIdentity()
|
||||||
must(err, "generate B identity")
|
must(err, "generate B identity")
|
||||||
|
|
||||||
a, err := client.New(natsURL, ctrlURL, idA)
|
a, err := client.Connect(natsURL, ctrlURL, idA, caFile)
|
||||||
must(err, "connect A")
|
must(err, "connect A")
|
||||||
defer a.Close()
|
defer a.Close()
|
||||||
b, err := client.New(natsURL, ctrlURL, idB)
|
b, err := client.Connect(natsURL, ctrlURL, idB, caFile)
|
||||||
must(err, "connect B")
|
must(err, "connect B")
|
||||||
defer b.Close()
|
defer b.Close()
|
||||||
|
|
||||||
@@ -121,12 +128,21 @@ func runEncryptedDemo(natsURL, ctrlURL string) {
|
|||||||
must(err, "A create room")
|
must(err, "A create room")
|
||||||
fmt.Printf(" room.test -> %s (E2E, persisted, signed)\n", roomID)
|
fmt.Printf(" room.test -> %s (E2E, persisted, signed)\n", roomID)
|
||||||
|
|
||||||
|
// Membership-change contract (issue 0006e): A only became a member of this room
|
||||||
|
// after connecting, so refresh to gain its subject + per-room JetStream API
|
||||||
|
// under enforce+ACL before publishing.
|
||||||
|
must(a.RefreshSession(), "A refresh after create room")
|
||||||
|
|
||||||
// A invites B (seals K to B's X25519 key).
|
// A invites B (seals K to B's X25519 key).
|
||||||
must(a.Invite(roomID, b.Endpoint()), "A invite B")
|
must(a.Invite(roomID, b.Endpoint()), "A invite B")
|
||||||
|
|
||||||
// B joins (fetches + decrypts K).
|
// B joins (fetches + decrypts K).
|
||||||
must(b.Join(roomID), "B join")
|
must(b.Join(roomID), "B join")
|
||||||
|
|
||||||
|
// B became a member via the invite above; refresh so B can subscribe to the
|
||||||
|
// room's subject under enforce+ACL (before subscribing — refresh drops subs).
|
||||||
|
must(b.RefreshSession(), "B refresh after join")
|
||||||
|
|
||||||
// B subscribes; capture received plaintexts.
|
// B subscribes; capture received plaintexts.
|
||||||
recv := make(chan string, 4)
|
recv := make(chan string, 4)
|
||||||
subB, err := b.Subscribe(roomID, func(f frame.Frame, plaintext []byte) {
|
subB, err := b.Subscribe(roomID, func(f frame.Frame, plaintext []byte) {
|
||||||
|
|||||||
@@ -0,0 +1,221 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Regression for audit report 0008, vector N3: the binary must wire the
|
||||||
|
// replicated nonce store on a clustered node so a signed request accepted on one
|
||||||
|
// node cannot be replayed to another. The auditor's ephemeral attack showed the
|
||||||
|
// OLD binary never called UseReplicatedNonces (each node kept a per-process
|
||||||
|
// cache), so a captured request replayed to a second node with 200+200. These
|
||||||
|
// tests drive the SAME helper the binary uses (wireReplicatedNonces) so they
|
||||||
|
// prove the WIRING, not just the underlying API.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"crypto/rand"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/hex"
|
||||||
|
"io"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
func freePort(t *testing.T) int {
|
||||||
|
t.Helper()
|
||||||
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("free port: %v", err)
|
||||||
|
}
|
||||||
|
defer l.Close()
|
||||||
|
return l.Addr().(*net.TCPAddr).Port
|
||||||
|
}
|
||||||
|
|
||||||
|
// signed008 builds a transport-signed control-plane request with a caller-chosen
|
||||||
|
// ts+nonce, so a test can reuse the exact same signed bytes against two nodes to
|
||||||
|
// exercise replay.
|
||||||
|
func signed008(t *testing.T, baseURL, method, path string, body []byte, id cs.Identity, ts int64, nonce string) *http.Request {
|
||||||
|
t.Helper()
|
||||||
|
canonical := membership.CanonicalRequest(method, path, strconv.FormatInt(ts, 10), nonce, body)
|
||||||
|
sig := cs.SignEd25519(id.SignPriv, canonical)
|
||||||
|
var rdr io.Reader
|
||||||
|
if body != nil {
|
||||||
|
rdr = bytes.NewReader(body)
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest(method, baseURL+path, rdr)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("new request: %v", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("X-Unibus-Pub", hex.EncodeToString(id.SignPub))
|
||||||
|
req.Header.Set("X-Unibus-Ts", strconv.FormatInt(ts, 10))
|
||||||
|
req.Header.Set("X-Unibus-Nonce", nonce)
|
||||||
|
req.Header.Set("X-Unibus-Sig", base64.StdEncoding.EncodeToString(sig))
|
||||||
|
return req
|
||||||
|
}
|
||||||
|
|
||||||
|
func randNonce(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
raw := make([]byte, 16)
|
||||||
|
if _, err := rand.Read(raw); err != nil {
|
||||||
|
t.Fatalf("nonce: %v", err)
|
||||||
|
}
|
||||||
|
return base64.StdEncoding.EncodeToString(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestAttack0008_N3 is the blocker regression: two clustered membershipd nodes
|
||||||
|
// wired through wireReplicatedNonces share a JetStream KV nonce bucket, so a
|
||||||
|
// request accepted on node A is rejected (401) when replayed to node B. Before
|
||||||
|
// the fix the binary never wired this and the replay returned 200.
|
||||||
|
func TestAttack0008_N3(t *testing.T) {
|
||||||
|
// One NATS+JetStream backing the shared nonce bucket (no client auth needed:
|
||||||
|
// the test drives the membership.Server's nonce store directly via HTTP).
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(), Host: "127.0.0.1", Port: freePort(t),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nats: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
nc, err := nats.Connect(ns.ClientURL())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(nc.Close)
|
||||||
|
js, err := jetstream.New(nc)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("jetstream: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shared control-plane state (stand-in for the replicated store) + two nodes.
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
alice, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("identity: %v", err)
|
||||||
|
}
|
||||||
|
if err := store.AddUser(hex.EncodeToString(alice.SignPub), "alice", membership.RoleAdmin); err != nil {
|
||||||
|
t.Fatalf("add alice: %v", err)
|
||||||
|
}
|
||||||
|
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
|
|
||||||
|
// Each node is wired EXACTLY as the binary wires a clustered node.
|
||||||
|
mkNode := func() *httptest.Server {
|
||||||
|
srv := membership.NewServer(store, blobs, membership.AuthEnforce)
|
||||||
|
if err := wireReplicatedNonces(srv, js, true /*clustered*/, 1); err != nil {
|
||||||
|
t.Fatalf("wireReplicatedNonces: %v", err)
|
||||||
|
}
|
||||||
|
return httptest.NewServer(srv)
|
||||||
|
}
|
||||||
|
nodeA := mkNode()
|
||||||
|
t.Cleanup(nodeA.Close)
|
||||||
|
nodeB := mkNode()
|
||||||
|
t.Cleanup(nodeB.Close)
|
||||||
|
|
||||||
|
ts := time.Now().Unix()
|
||||||
|
nonce := randNonce(t)
|
||||||
|
path := "/members/" + frame.EndpointID(alice.SignPub) + "/rooms"
|
||||||
|
|
||||||
|
// Golden: alice's signed request is accepted on node A.
|
||||||
|
respA, err := http.DefaultClient.Do(signed008(t, nodeA.URL, "GET", path, nil, alice, ts, nonce))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("do A: %v", err)
|
||||||
|
}
|
||||||
|
respA.Body.Close()
|
||||||
|
if respA.StatusCode != http.StatusOK {
|
||||||
|
t.Fatalf("node A first use: status %d, want 200", respA.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path (the attack): replay the SAME signed bytes to node B → 401.
|
||||||
|
respB, err := http.DefaultClient.Do(signed008(t, nodeB.URL, "GET", path, nil, alice, ts, nonce))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("do B: %v", err)
|
||||||
|
}
|
||||||
|
respB.Body.Close()
|
||||||
|
if respB.StatusCode != http.StatusUnauthorized {
|
||||||
|
t.Fatalf("cross-node replay to node B: status %d, want 401 (replayed nonce must be rejected)", respB.StatusCode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestAttack0008_N3_StandaloneKeepsLocalCache is the edge: a NON-clustered node
|
||||||
|
// must NOT require JetStream — wireReplicatedNonces is a no-op and the node keeps
|
||||||
|
// its in-memory cache, which still rejects a same-node replay (the single-node
|
||||||
|
// guarantee is unchanged). This proves the fix does not add a JetStream
|
||||||
|
// dependency to standalone deployments.
|
||||||
|
func TestAttack0008_N3_StandaloneKeepsLocalCache(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
alice, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("identity: %v", err)
|
||||||
|
}
|
||||||
|
if err := store.AddUser(hex.EncodeToString(alice.SignPub), "alice", membership.RoleAdmin); err != nil {
|
||||||
|
t.Fatalf("add alice: %v", err)
|
||||||
|
}
|
||||||
|
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
|
|
||||||
|
srv := membership.NewServer(store, blobs, membership.AuthEnforce)
|
||||||
|
// Standalone: clustered=false, js=nil. Must succeed (no JetStream needed).
|
||||||
|
if err := wireReplicatedNonces(srv, nil, false /*clustered*/, 1); err != nil {
|
||||||
|
t.Fatalf("standalone wireReplicatedNonces must be a no-op, got: %v", err)
|
||||||
|
}
|
||||||
|
node := httptest.NewServer(srv)
|
||||||
|
t.Cleanup(node.Close)
|
||||||
|
|
||||||
|
ts := time.Now().Unix()
|
||||||
|
nonce := randNonce(t)
|
||||||
|
path := "/members/" + frame.EndpointID(alice.SignPub) + "/rooms"
|
||||||
|
|
||||||
|
resp1, err := http.DefaultClient.Do(signed008(t, node.URL, "GET", path, nil, alice, ts, nonce))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("do 1: %v", err)
|
||||||
|
}
|
||||||
|
resp1.Body.Close()
|
||||||
|
if resp1.StatusCode != http.StatusOK {
|
||||||
|
t.Fatalf("first use: status %d, want 200", resp1.StatusCode)
|
||||||
|
}
|
||||||
|
// Same-node replay is still rejected by the in-memory cache.
|
||||||
|
resp2, err := http.DefaultClient.Do(signed008(t, node.URL, "GET", path, nil, alice, ts, nonce))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("do 2: %v", err)
|
||||||
|
}
|
||||||
|
resp2.Body.Close()
|
||||||
|
if resp2.StatusCode != http.StatusUnauthorized {
|
||||||
|
t.Fatalf("same-node replay: status %d, want 401", resp2.StatusCode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestAttack0008_N3_ClusteredRequiresJetStream proves the hard rule: a clustered
|
||||||
|
// node with NO JetStream available refuses (error), so the binary fails fast
|
||||||
|
// instead of silently running with a per-process cache.
|
||||||
|
func TestAttack0008_N3_ClusteredRequiresJetStream(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
|
srv := membership.NewServer(store, blobs, membership.AuthEnforce)
|
||||||
|
if err := wireReplicatedNonces(srv, nil, true /*clustered*/, 1); err == nil {
|
||||||
|
t.Fatalf("clustered node with no JetStream must fail, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,198 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
)
|
||||||
|
|
||||||
|
// splitRoutes parses the comma-separated --routes flag into a clean slice of
|
||||||
|
// route URLs, dropping empty entries and surrounding whitespace so a trailing
|
||||||
|
// comma or a spaced list does not yield a bogus empty route.
|
||||||
|
func splitRoutes(csv string) []string {
|
||||||
|
var out []string
|
||||||
|
for _, r := range strings.Split(csv, ",") {
|
||||||
|
if r = strings.TrimSpace(r); r != "" {
|
||||||
|
out = append(out, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveClusterPass resolves the cluster route secret WITHOUT leaking it through
|
||||||
|
// argv (audit 0008 N1-low: --cluster-pass in argv is visible in ps/journald).
|
||||||
|
// Precedence: --cluster-pass-file (read + trim the file), then the env var
|
||||||
|
// UNIBUS_CLUSTER_PASS, then the legacy --cluster-pass flag (argv-visible, kept for
|
||||||
|
// dev/compat). env is injected (os.Getenv result) so the function stays testable.
|
||||||
|
// It returns the secret and a short source label for logging (never the secret).
|
||||||
|
func resolveClusterPass(passFlag, passFile, env string) (secret, source string, err error) {
|
||||||
|
if passFile != "" {
|
||||||
|
b, rerr := os.ReadFile(passFile)
|
||||||
|
if rerr != nil {
|
||||||
|
return "", "", fmt.Errorf("read --cluster-pass-file %q: %w", passFile, rerr)
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(string(b)), "file", nil
|
||||||
|
}
|
||||||
|
if env != "" {
|
||||||
|
return env, "env", nil
|
||||||
|
}
|
||||||
|
if passFlag != "" {
|
||||||
|
return passFlag, "flag", nil
|
||||||
|
}
|
||||||
|
return "", "none", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// injectRouteCreds rewrites each route URL that carries NO userinfo to embed
|
||||||
|
// user:pass, so the cluster secret is supplied once (via file/env) instead of
|
||||||
|
// repeated in every --routes argv entry where ps/journald would expose it. A route
|
||||||
|
// that already carries userinfo is left untouched (operator override). With an
|
||||||
|
// empty user it is a no-op. A malformed route URL is an error (configuration bug)
|
||||||
|
// rather than a silently dropped peer.
|
||||||
|
func injectRouteCreds(routes []string, user, pass string) ([]string, error) {
|
||||||
|
if user == "" {
|
||||||
|
return routes, nil
|
||||||
|
}
|
||||||
|
out := make([]string, 0, len(routes))
|
||||||
|
for _, r := range routes {
|
||||||
|
u, err := url.Parse(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("parse route %q: %w", r, err)
|
||||||
|
}
|
||||||
|
if u.User == nil {
|
||||||
|
u.User = url.UserPassword(user, pass)
|
||||||
|
}
|
||||||
|
out = append(out, u.String())
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isLoopbackURL reports whether a NATS url targets this host only (loopback). Used
|
||||||
|
// to guard migrate-to-kv (audit 0008 N6): pushing the allowlist to a REMOTE NATS
|
||||||
|
// without TLS would send handles/roles/sign-pubs in cleartext, so a remote target
|
||||||
|
// must be TLS-pinned (--ca). A url we cannot classify is treated as NON-loopback
|
||||||
|
// (conservative: it then requires --ca).
|
||||||
|
func isLoopbackURL(natsURL string) bool {
|
||||||
|
u, err := url.Parse(natsURL)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
host := u.Hostname()
|
||||||
|
switch host {
|
||||||
|
case "localhost":
|
||||||
|
return true
|
||||||
|
case "":
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ip := net.ParseIP(host)
|
||||||
|
return ip != nil && ip.IsLoopback()
|
||||||
|
}
|
||||||
|
|
||||||
|
// isLoopbackBind reports whether the --bind value keeps the service reachable
|
||||||
|
// only from this host. An empty bind means "all interfaces" (public), and a
|
||||||
|
// hostname we cannot resolve to a loopback literal is treated as public — the
|
||||||
|
// conservative choice, so an unusual bind never silently slips past the guard.
|
||||||
|
func isLoopbackBind(bind string) bool {
|
||||||
|
switch bind {
|
||||||
|
case "localhost":
|
||||||
|
return true
|
||||||
|
case "":
|
||||||
|
return false // empty binds every interface
|
||||||
|
}
|
||||||
|
ip := net.ParseIP(bind)
|
||||||
|
if ip == nil {
|
||||||
|
return false // a hostname we can't classify: assume public
|
||||||
|
}
|
||||||
|
return ip.IsLoopback()
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateBootConfig is the fail-open guard (audit H2). It refuses any startup
|
||||||
|
// configuration that would expose the bus without enforced authentication:
|
||||||
|
//
|
||||||
|
// - a non-loopback --bind without --bus-auth enforce (the data plane and
|
||||||
|
// control plane would both accept anyone),
|
||||||
|
// - --tls-cert/--tls-key without --bus-auth enforce (TLS encrypts the channel
|
||||||
|
// but authenticates no one — encrypted access for everybody is still open), and
|
||||||
|
// - a non-loopback --bind WITHOUT --tls-cert/--tls-key (the control plane would
|
||||||
|
// serve metadata over plaintext HTTP publicly — audit H5 reappearing, the N4
|
||||||
|
// gap the re-audit found: TLS was available but not mandatory).
|
||||||
|
//
|
||||||
|
// It is a pure function of the parsed flags so the command can fail fast at
|
||||||
|
// startup and tests can assert the policy without booting a server.
|
||||||
|
func validateBootConfig(bind string, mode membership.AuthMode, tlsCert, tlsKey string) error {
|
||||||
|
if !isLoopbackBind(bind) && mode != membership.AuthEnforce {
|
||||||
|
return fmt.Errorf(
|
||||||
|
"refusing to start: --bind %q is not loopback but --bus-auth is %q; a public bind requires --bus-auth enforce (or bind 127.0.0.1 for local dev)",
|
||||||
|
bind, mode)
|
||||||
|
}
|
||||||
|
if (tlsCert != "" || tlsKey != "") && mode != membership.AuthEnforce {
|
||||||
|
return fmt.Errorf(
|
||||||
|
"refusing to start: --tls-cert/--tls-key set but --bus-auth is %q; TLS without enforced auth is fail-open (encrypted channel, no authentication) — set --bus-auth enforce",
|
||||||
|
mode)
|
||||||
|
}
|
||||||
|
if !isLoopbackBind(bind) && (tlsCert == "" || tlsKey == "") {
|
||||||
|
return fmt.Errorf(
|
||||||
|
"refusing to start: --bind %q is not loopback but --tls-cert/--tls-key are not both set; a public control plane must serve HTTPS or its metadata (subjects, pubkeys, sealed keys, the social graph) travels in cleartext to a network MITM (audit H5/N4) — provide a CA-signed --tls-cert/--tls-key, or bind 127.0.0.1 for local dev",
|
||||||
|
bind)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateClusterConfig guards the cluster route layer (issue 0003a). The route
|
||||||
|
// layer is a server-to-server trust boundary distinct from the client data
|
||||||
|
// plane: leaving it open lets anyone who reaches the route port join the cluster
|
||||||
|
// or inject messages into the whole bus (audit 0004, "auth of the cluster
|
||||||
|
// routes"). So on a public (non-loopback) bind, a cluster MUST carry both a
|
||||||
|
// shared route secret AND mutual route TLS. It is a pure function of the parsed
|
||||||
|
// flags. An empty clusterName means "no cluster" (standalone) and is always
|
||||||
|
// allowed.
|
||||||
|
//
|
||||||
|
// The three route-TLS paths are all-or-nothing (mutual TLS needs the node cert,
|
||||||
|
// its key, and the CA together), independent of the bind, so a partial TLS
|
||||||
|
// config never silently degrades to plaintext routes.
|
||||||
|
//
|
||||||
|
// Homogeneous posture (issue 0006d, audit 0008 N1): a cluster is only as secure
|
||||||
|
// as its weakest node — the data plane forwards every subject between nodes, so a
|
||||||
|
// single node running without enforced auth lets an unauthenticated peer
|
||||||
|
// Subscribe(">") on it and harvest the traffic forwarded from the ACL'd nodes.
|
||||||
|
// This node therefore REFUSES to join a cluster unless it runs --bus-auth enforce,
|
||||||
|
// regardless of bind: a clustered node is a production node, and there is no safe
|
||||||
|
// "dev cluster without auth". (A peer running a tampered binary is out of this
|
||||||
|
// node's control; /healthz exposes each node's posture so a monitor can detect
|
||||||
|
// one that is not enforce+ACL — see Server.Posture.)
|
||||||
|
func validateClusterConfig(clusterName, bind, user, pass, rtCert, rtKey, rtCA string, mode membership.AuthMode) error {
|
||||||
|
rtAny := rtCert != "" || rtKey != "" || rtCA != ""
|
||||||
|
rtAll := rtCert != "" && rtKey != "" && rtCA != ""
|
||||||
|
if rtAny && !rtAll {
|
||||||
|
return fmt.Errorf(
|
||||||
|
"refusing to start: --route-tls-cert/--route-tls-key/--route-tls-ca must be set together (mutual route TLS needs all three)")
|
||||||
|
}
|
||||||
|
if clusterName == "" {
|
||||||
|
return nil // standalone: no route layer to secure
|
||||||
|
}
|
||||||
|
// A clustered node MUST enforce auth (homogeneous posture). Checked before the
|
||||||
|
// loopback shortcut so even a loopback cluster cannot form without enforce.
|
||||||
|
if mode != membership.AuthEnforce {
|
||||||
|
return fmt.Errorf(
|
||||||
|
"refusing to start: cluster %q requires --bus-auth enforce; a cluster node without enforced auth+ACL lets an unauthenticated peer harvest the traffic forwarded from the other nodes (audit 0008 N1) — every node must run the same enforce+ACL+TLS posture",
|
||||||
|
clusterName)
|
||||||
|
}
|
||||||
|
if isLoopbackBind(bind) {
|
||||||
|
return nil // loopback cluster is dev-only and unreachable from outside
|
||||||
|
}
|
||||||
|
// Public cluster: demand a route secret and mutual route TLS.
|
||||||
|
if user == "" || pass == "" {
|
||||||
|
return fmt.Errorf(
|
||||||
|
"refusing to start: cluster %q on public bind %q requires --cluster-user and --cluster-pass; an unauthenticated route port lets anyone join the cluster",
|
||||||
|
clusterName, bind)
|
||||||
|
}
|
||||||
|
if !rtAll {
|
||||||
|
return fmt.Errorf(
|
||||||
|
"refusing to start: cluster %q on public bind %q requires mutual route TLS (--route-tls-cert/--route-tls-key/--route-tls-ca); plaintext routes expose server-to-server traffic and admit unsigned nodes",
|
||||||
|
clusterName, bind)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,188 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestAudit_FailOpenTLSWithoutAuth ports the auditor's H2 vector. Before the
|
||||||
|
// guard, booting with TLS on but the authenticator off ("--bind 0.0.0.0
|
||||||
|
// --tls-cert … " without enforce) produced an encrypted data plane that an
|
||||||
|
// unregistered, nkey-less client could still connect to — a fail-open config
|
||||||
|
// wearing the appearance of security. validateBootConfig now refuses it, so the
|
||||||
|
// insecure server never starts (the client therefore has nothing to connect to).
|
||||||
|
func TestAudit_FailOpenTLSWithoutAuth(t *testing.T) {
|
||||||
|
// The exact auditor configuration: public bind, TLS provided, auth off.
|
||||||
|
err := validateBootConfig("0.0.0.0", membership.AuthOff, "server.crt", "server.key")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("TLS without enforce on a public bind must be refused at startup")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "enforce") {
|
||||||
|
t.Fatalf("error should point the operator at --bus-auth enforce, got: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// And TLS without enforce is rejected even on loopback: TLS implies a
|
||||||
|
// security posture, so authenticating no one is always a misconfiguration.
|
||||||
|
if err := validateBootConfig("127.0.0.1", membership.AuthOff, "server.crt", "server.key"); err == nil {
|
||||||
|
t.Fatalf("TLS flags without enforce must be refused regardless of bind")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGap_PublicEnforceNoTLS ports the re-auditor's N4 gap: the H2 guard refused
|
||||||
|
// "public without enforce" and "TLS without enforce", but ALLOWED a public bind
|
||||||
|
// with enforce and NO --tls-cert, so the control plane served metadata over
|
||||||
|
// plaintext HTTP publicly (H5 reappearing). The guard now refuses it.
|
||||||
|
func TestGap_PublicEnforceNoTLS(t *testing.T) {
|
||||||
|
// The exact auditor configuration: public bind, enforce on, no TLS cert/key.
|
||||||
|
err := validateBootConfig("0.0.0.0", membership.AuthEnforce, "", "")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("public bind + enforce + NO --tls-cert must be refused: the control plane would serve plaintext HTTP publicly (audit N4)")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "tls-cert") {
|
||||||
|
t.Fatalf("error should point the operator at --tls-cert/--tls-key, got: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden: the same public+enforce config WITH a cert/key is allowed.
|
||||||
|
if err := validateBootConfig("0.0.0.0", membership.AuthEnforce, "server.crt", "server.key"); err != nil {
|
||||||
|
t.Fatalf("public + enforce + TLS is the intended production config, got: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Edge: loopback without TLS stays allowed (local dev is not a public exposure).
|
||||||
|
if err := validateBootConfig("127.0.0.1", membership.AuthOff, "", ""); err != nil {
|
||||||
|
t.Fatalf("loopback dev without TLS must remain allowed, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBootConfigPolicy is the full table: the golden secure-public config is
|
||||||
|
// allowed, dev loopback is allowed, and every fail-open shape is refused.
|
||||||
|
func TestBootConfigPolicy(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
bind string
|
||||||
|
mode membership.AuthMode
|
||||||
|
cert string
|
||||||
|
key string
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
// Golden: the intended public production config — enforce AND TLS.
|
||||||
|
{"public+enforce+tls", "0.0.0.0", membership.AuthEnforce, "s.crt", "s.key", false},
|
||||||
|
// Edge: local dev on loopback may stay open (no auth, no TLS).
|
||||||
|
{"loopback+off", "127.0.0.1", membership.AuthOff, "", "", false},
|
||||||
|
{"loopback-ipv6+off", "::1", membership.AuthOff, "", "", false},
|
||||||
|
{"localhost+off", "localhost", membership.AuthOff, "", "", false},
|
||||||
|
{"loopback+soft", "127.0.0.1", membership.AuthSoft, "", "", false},
|
||||||
|
// Edge: loopback with full enforce+TLS is also fine.
|
||||||
|
{"loopback+enforce+tls", "127.0.0.1", membership.AuthEnforce, "s.crt", "s.key", false},
|
||||||
|
// Error: public bind without enforce.
|
||||||
|
{"public+off", "0.0.0.0", membership.AuthOff, "", "", true},
|
||||||
|
{"public+soft", "0.0.0.0", membership.AuthSoft, "", "", true},
|
||||||
|
{"lan-ip+off", "192.168.1.10", membership.AuthOff, "", "", true},
|
||||||
|
{"empty-bind+off", "", membership.AuthOff, "", "", true},
|
||||||
|
// Error (N4): public bind + enforce but NO TLS -> plaintext control plane.
|
||||||
|
{"public+enforce+notls", "0.0.0.0", membership.AuthEnforce, "", "", true},
|
||||||
|
{"public+enforce+certonly", "0.0.0.0", membership.AuthEnforce, "s.crt", "", true},
|
||||||
|
{"public+enforce+keyonly", "0.0.0.0", membership.AuthEnforce, "", "s.key", true},
|
||||||
|
{"lan-ip+enforce+notls", "192.168.1.10", membership.AuthEnforce, "", "", true},
|
||||||
|
// Error: TLS flags without enforce (cert or key alone is enough to trip it).
|
||||||
|
{"loopback+tlscert+off", "127.0.0.1", membership.AuthOff, "s.crt", "", true},
|
||||||
|
{"loopback+tlskey+soft", "127.0.0.1", membership.AuthSoft, "", "s.key", true},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
err := validateBootConfig(c.bind, c.mode, c.cert, c.key)
|
||||||
|
if c.wantErr && err == nil {
|
||||||
|
t.Fatalf("config %+v should be refused", c)
|
||||||
|
}
|
||||||
|
if !c.wantErr && err != nil {
|
||||||
|
t.Fatalf("config %+v should be allowed, got: %v", c, err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestClusterConfigPolicy is the cluster route guard (issue 0003a): a standalone
|
||||||
|
// server is always fine; a loopback cluster is dev-only and unguarded; a public
|
||||||
|
// cluster demands both a route secret and complete mutual route TLS; and the
|
||||||
|
// route-TLS flags are all-or-nothing regardless of bind.
|
||||||
|
func TestClusterConfigPolicy(t *testing.T) {
|
||||||
|
const c, k, ca = "node.crt", "node.key", "ca.crt"
|
||||||
|
en := membership.AuthEnforce
|
||||||
|
off := membership.AuthOff
|
||||||
|
soft := membership.AuthSoft
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
clusterName, bind string
|
||||||
|
user, pass string
|
||||||
|
rtCert, rtKey, rtCA string
|
||||||
|
mode membership.AuthMode
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
// Standalone (no cluster name) is always allowed, even on a public bind and
|
||||||
|
// without enforce — the cluster posture rule does not apply to a single node.
|
||||||
|
{"standalone-public-off", "", "0.0.0.0", "", "", "", "", "", off, false},
|
||||||
|
// Loopback dev cluster WITH enforce: allowed (unreachable from outside).
|
||||||
|
{"loopback-cluster-enforce", "unibus", "127.0.0.1", "", "", "", "", "", en, false},
|
||||||
|
// Golden: full public HA config under enforce.
|
||||||
|
{"public-full-enforce", "unibus", "0.0.0.0", "u", "p", c, k, ca, en, false},
|
||||||
|
// N1 (audit 0008): a clustered node WITHOUT enforce is refused — even on
|
||||||
|
// loopback — so no weak node can join the cluster.
|
||||||
|
{"cluster-off-refused", "unibus", "127.0.0.1", "", "", "", "", "", off, true},
|
||||||
|
{"cluster-soft-refused", "unibus", "0.0.0.0", "u", "p", c, k, ca, soft, true},
|
||||||
|
// Error: public cluster without a route secret (enforce on, fails on secret).
|
||||||
|
{"public-no-secret", "unibus", "0.0.0.0", "", "", c, k, ca, en, true},
|
||||||
|
{"public-half-secret", "unibus", "0.0.0.0", "u", "", c, k, ca, en, true},
|
||||||
|
// Error: public cluster without mutual route TLS.
|
||||||
|
{"public-no-tls", "unibus", "10.0.0.1", "u", "p", "", "", "", en, true},
|
||||||
|
// Error: partial route-TLS flags trip regardless of bind/mode.
|
||||||
|
{"loopback-partial-tls", "unibus", "127.0.0.1", "", "", c, "", "", en, true},
|
||||||
|
{"standalone-partial-tls", "", "127.0.0.1", "", "", c, k, "", off, true},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
err := validateClusterConfig(tc.clusterName, tc.bind, tc.user, tc.pass, tc.rtCert, tc.rtKey, tc.rtCA, tc.mode)
|
||||||
|
if tc.wantErr && err == nil {
|
||||||
|
t.Fatalf("cluster config %+v should be refused", tc)
|
||||||
|
}
|
||||||
|
if !tc.wantErr && err != nil {
|
||||||
|
t.Fatalf("cluster config %+v should be allowed, got: %v", tc, err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestAttack0008_N1 is the regression for audit 0008 N1 scenario 2: a node
|
||||||
|
// configured to join a cluster while NOT enforcing auth (the weak node that lets
|
||||||
|
// an unauthenticated peer harvest the cluster's forwarded traffic) must be refused
|
||||||
|
// at startup. The homogeneous-posture rule makes this binary unable to BE that
|
||||||
|
// weak node.
|
||||||
|
func TestAttack0008_N1(t *testing.T) {
|
||||||
|
// Weak node: clustered but --bus-auth off -> refused.
|
||||||
|
if err := validateClusterConfig("unibus", "0.0.0.0", "u", "p", "n.crt", "n.key", "ca.crt", membership.AuthOff); err == nil {
|
||||||
|
t.Fatalf("a clustered node without enforce must be refused (audit 0008 N1)")
|
||||||
|
}
|
||||||
|
// Same node WITH enforce + full route security -> allowed.
|
||||||
|
if err := validateClusterConfig("unibus", "0.0.0.0", "u", "p", "n.crt", "n.key", "ca.crt", membership.AuthEnforce); err != nil {
|
||||||
|
t.Fatalf("a clustered enforce node with full route security must be allowed, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSplitRoutes(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
in string
|
||||||
|
want int
|
||||||
|
}{
|
||||||
|
{"", 0},
|
||||||
|
{"nats://a:1", 1},
|
||||||
|
{"nats://a:1,nats://b:2", 2},
|
||||||
|
{" nats://a:1 , nats://b:2 ", 2}, // spaces trimmed
|
||||||
|
{"nats://a:1,,", 1}, // empty entries dropped
|
||||||
|
{",", 0},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
if got := splitRoutes(c.in); len(got) != c.want {
|
||||||
|
t.Fatalf("splitRoutes(%q) = %v (len %d), want len %d", c.in, got, len(got), c.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
|
|
||||||
|
server "github.com/nats-io/nats-server/v2/server"
|
||||||
|
)
|
||||||
|
|
||||||
|
// connectInternalJS opens a privileged JetStream client from membershipd to its
|
||||||
|
// OWN embedded NATS server. This is the resolution of the "bootstrap cycle"
|
||||||
|
// (issue 0006a/c): the service needs JetStream to create the replicated nonce
|
||||||
|
// bucket and the control-plane KV, but under enforce the data plane only accepts
|
||||||
|
// allowlisted clients confined to their rooms. The connection therefore
|
||||||
|
// authenticates with the process's ephemeral internal identity — the identity the
|
||||||
|
// authenticator was built to recognize (NewNkeyAuthenticatorACLInternal) and
|
||||||
|
// grant full permissions — without ever appearing in the user allowlist.
|
||||||
|
//
|
||||||
|
// It uses the in-process transport (nats.InProcessServer), a Go pipe inside the
|
||||||
|
// process, so it bypasses TLS entirely: no CA wiring is needed for this
|
||||||
|
// self-connection even when the public data plane is TLS-only. useNkey mirrors
|
||||||
|
// whether the embedded server enforces auth: under enforce the internal identity
|
||||||
|
// presents its nkey; without enforce the server accepts an unauthenticated
|
||||||
|
// in-process client and the nkey is omitted.
|
||||||
|
//
|
||||||
|
// The caller owns the returned connection and must Close it on shutdown (after
|
||||||
|
// the JetStream context is no longer used).
|
||||||
|
func connectInternalJS(ns *server.Server, internalID cs.Identity, useNkey bool) (*nats.Conn, jetstream.JetStream, error) {
|
||||||
|
opts := []nats.Option{
|
||||||
|
nats.Name("membershipd-internal"),
|
||||||
|
nats.InProcessServer(ns),
|
||||||
|
}
|
||||||
|
if useNkey {
|
||||||
|
pub, sign, err := busauth.ClientNkey(internalID.SignPriv)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("internal nkey: %w", err)
|
||||||
|
}
|
||||||
|
opts = append(opts, nats.Nkey(pub, sign))
|
||||||
|
}
|
||||||
|
// The URL is ignored for an in-process connection; the InProcessServer option
|
||||||
|
// supplies the transport.
|
||||||
|
nc, err := nats.Connect("", opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("connect internal nats: %w", err)
|
||||||
|
}
|
||||||
|
js, err := jetstream.New(nc)
|
||||||
|
if err != nil {
|
||||||
|
nc.Close()
|
||||||
|
return nil, nil, fmt.Errorf("internal jetstream: %w", err)
|
||||||
|
}
|
||||||
|
return nc, js, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// connectExternalJS opens a JetStream client to an EXTERNAL NATS the operator
|
||||||
|
// runs (membershipd started with --nats-url). Unlike the embedded path there is
|
||||||
|
// no in-process transport and no internal identity: the external server enforces
|
||||||
|
// its own auth, so membershipd connects as a plain client (optionally TLS-pinned
|
||||||
|
// to the bus CA). It is best-effort and intended for an operator-managed cluster;
|
||||||
|
// the standard unibus deploy uses the embedded server (connectInternalJS).
|
||||||
|
func connectExternalJS(natsURL, caPath string) (*nats.Conn, jetstream.JetStream, error) {
|
||||||
|
opts := []nats.Option{nats.Name("membershipd-internal")}
|
||||||
|
if caPath != "" {
|
||||||
|
tlsCfg, err := busauth.LoadCATLSConfig(caPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("load CA %q: %w", caPath, err)
|
||||||
|
}
|
||||||
|
opts = append(opts, nats.Secure(tlsCfg))
|
||||||
|
}
|
||||||
|
nc, err := nats.Connect(natsURL, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("connect external nats %q: %w", natsURL, err)
|
||||||
|
}
|
||||||
|
js, err := jetstream.New(nc)
|
||||||
|
if err != nil {
|
||||||
|
nc.Close()
|
||||||
|
return nil, nil, fmt.Errorf("external jetstream: %w", err)
|
||||||
|
}
|
||||||
|
return nc, js, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,119 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Bootstrap test for issue 0006a/c: under enforce, membershipd must still reach
|
||||||
|
// JetStream on its OWN embedded server to create the nonce/KV buckets. It does so
|
||||||
|
// with an ephemeral internal identity the authenticator grants full permissions
|
||||||
|
// (NewNkeyAuthenticatorACLInternal). These tests prove that privileged
|
||||||
|
// self-connection works AND that no other identity can claim it.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/hex"
|
||||||
|
"net"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
func icFreePort(t *testing.T) int {
|
||||||
|
t.Helper()
|
||||||
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("free port: %v", err)
|
||||||
|
}
|
||||||
|
defer l.Close()
|
||||||
|
return l.Addr().(*net.TCPAddr).Port
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInternalConnPrivilegedUnderEnforce: with an enforce authenticator that
|
||||||
|
// authorizes NO bus user, the internal identity still connects in-process and has
|
||||||
|
// full permissions — it creates a KV bucket and round-trips a value. This is the
|
||||||
|
// resolution of the bootstrap cycle the audit flagged as the reason the KV store
|
||||||
|
// was never wired.
|
||||||
|
func TestInternalConnPrivilegedUnderEnforce(t *testing.T) {
|
||||||
|
internalID, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("internal identity: %v", err)
|
||||||
|
}
|
||||||
|
internalPubHex := hex.EncodeToString(internalID.SignPub)
|
||||||
|
|
||||||
|
// Authenticator: no bus user is authorized; only the internal identity passes.
|
||||||
|
auth := busauth.NewNkeyAuthenticatorACLInternal(
|
||||||
|
func(string) bool { return false },
|
||||||
|
busauth.PermissionsFromSubjects(func(string) ([]string, error) { return []string{"_INBOX.>"}, nil }),
|
||||||
|
internalPubHex,
|
||||||
|
)
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(), Host: "127.0.0.1", Port: icFreePort(t), Auth: auth,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nats: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
|
||||||
|
nc, js, err := connectInternalJS(ns, internalID, true /*useNkey*/)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connectInternalJS: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(nc.Close)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
kv, err := js.CreateKeyValue(ctx, jetstream.KeyValueConfig{Bucket: "KV_UNIBUS_test", Replicas: 1})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("internal conn could not create KV bucket (full perms expected): %v", err)
|
||||||
|
}
|
||||||
|
if _, err := kv.Put(ctx, "k", []byte("v")); err != nil {
|
||||||
|
t.Fatalf("kv put: %v", err)
|
||||||
|
}
|
||||||
|
e, err := kv.Get(ctx, "k")
|
||||||
|
if err != nil || string(e.Value()) != "v" {
|
||||||
|
t.Fatalf("kv get: val=%q err=%v", e, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInternalConnOutsiderRejected: an identity that is neither the internal one
|
||||||
|
// nor an allowlisted bus user cannot connect — proving the internal bypass is
|
||||||
|
// scoped to the exact internal key, not a blanket hole.
|
||||||
|
func TestInternalConnOutsiderRejected(t *testing.T) {
|
||||||
|
internalID, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("internal identity: %v", err)
|
||||||
|
}
|
||||||
|
auth := busauth.NewNkeyAuthenticatorACLInternal(
|
||||||
|
func(string) bool { return false },
|
||||||
|
busauth.PermissionsFromSubjects(func(string) ([]string, error) { return []string{"_INBOX.>"}, nil }),
|
||||||
|
hex.EncodeToString(internalID.SignPub),
|
||||||
|
)
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(), Host: "127.0.0.1", Port: icFreePort(t), Auth: auth,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nats: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
|
||||||
|
outsider, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("outsider identity: %v", err)
|
||||||
|
}
|
||||||
|
pub, sign, err := busauth.ClientNkey(outsider.SignPriv)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("outsider nkey: %v", err)
|
||||||
|
}
|
||||||
|
conn, err := nats.Connect(ns.ClientURL(),
|
||||||
|
nats.Nkey(pub, sign),
|
||||||
|
nats.MaxReconnects(0),
|
||||||
|
nats.Timeout(2*time.Second),
|
||||||
|
)
|
||||||
|
if err == nil {
|
||||||
|
conn.Close()
|
||||||
|
t.Fatalf("outsider (unauthorized, non-internal) must be rejected, but connected")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Wiring tests for issue 0006c: --store kv selects the replicated JetStream KV
|
||||||
|
// control plane, the authenticator serves from it through the storeHolder, and a
|
||||||
|
// new node sees state created by another (the divergence that per-node SQLite
|
||||||
|
// caused — audit 0008 N5 — is gone). Branch-by-abstraction is verified elsewhere
|
||||||
|
// (the SQLite default path is the unchanged baseline covered by the existing
|
||||||
|
// suite).
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestKVStoreBootstrapUnderEnforce drives the exact decentralized boot the binary
|
||||||
|
// performs: build the authenticator over an empty holder, start NATS, open the
|
||||||
|
// privileged internal connection, open the KV store, publish it into the holder,
|
||||||
|
// then a real bus user (seeded into the KV store) authenticates over nkey. This
|
||||||
|
// proves the bootstrap cycle is broken correctly — the KV-backed control plane
|
||||||
|
// authorizes live clients under enforce.
|
||||||
|
func TestKVStoreBootstrapUnderEnforce(t *testing.T) {
|
||||||
|
internalID, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("internal identity: %v", err)
|
||||||
|
}
|
||||||
|
holder := &storeHolder{}
|
||||||
|
auth := busauth.NewNkeyAuthenticatorACLInternal(
|
||||||
|
holder.IsAuthorized,
|
||||||
|
busauth.PermissionsFromSubjects(holder.subjectACL),
|
||||||
|
hex.EncodeToString(internalID.SignPub),
|
||||||
|
)
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(), Host: "127.0.0.1", Port: freePort(t), Auth: auth,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nats: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
|
||||||
|
// Privileged internal connection opens the KV store while the holder still
|
||||||
|
// denies every normal client.
|
||||||
|
intNC, js, err := connectInternalJS(ns, internalID, true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connectInternalJS: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(intNC.Close)
|
||||||
|
kvStore, err := membership.OpenJetStream(js, membership.JetStreamConfig{Replicas: 1, OpTimeout: 3 * time.Second})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open kv store: %v", err)
|
||||||
|
}
|
||||||
|
holder.set(kvStore)
|
||||||
|
|
||||||
|
// Seed a bus user into the KV control plane.
|
||||||
|
alice, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("alice: %v", err)
|
||||||
|
}
|
||||||
|
if err := kvStore.AddUser(hex.EncodeToString(alice.SignPub), "alice", membership.RoleMember); err != nil {
|
||||||
|
t.Fatalf("seed alice: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// alice authenticates over nkey — authorized via the KV store through the holder.
|
||||||
|
pub, sign, err := busauth.ClientNkey(alice.SignPriv)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("alice nkey: %v", err)
|
||||||
|
}
|
||||||
|
aliceNC, err := nats.Connect(ns.ClientURL(), nats.Nkey(pub, sign), nats.MaxReconnects(0), nats.Timeout(2*time.Second))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("alice (KV-authorized) must connect under enforce: %v", err)
|
||||||
|
}
|
||||||
|
aliceNC.Close()
|
||||||
|
|
||||||
|
// An outsider not in the KV store is denied (fail closed).
|
||||||
|
outsider, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("outsider: %v", err)
|
||||||
|
}
|
||||||
|
opub, osign, err := busauth.ClientNkey(outsider.SignPriv)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("outsider nkey: %v", err)
|
||||||
|
}
|
||||||
|
if oc, err := nats.Connect(ns.ClientURL(), nats.Nkey(opub, osign), nats.MaxReconnects(0), nats.Timeout(2*time.Second)); err == nil {
|
||||||
|
oc.Close()
|
||||||
|
t.Fatalf("an outsider absent from the KV store must be rejected")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestKVStoreDecentralizedConsistency: a room/user created via one node's KV store
|
||||||
|
// is immediately visible to another node's KV store over the same JetStream — the
|
||||||
|
// shared, replicated control plane that ends the per-node SQLite divergence.
|
||||||
|
func TestKVStoreDecentralizedConsistency(t *testing.T) {
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(), Host: "127.0.0.1", Port: freePort(t),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nats: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
|
||||||
|
open := func() membership.Store {
|
||||||
|
nc, err := nats.Connect(ns.ClientURL())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(nc.Close)
|
||||||
|
js, err := jetstream.New(nc)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("jetstream: %v", err)
|
||||||
|
}
|
||||||
|
st, err := membership.OpenJetStream(js, membership.JetStreamConfig{Replicas: 1, OpTimeout: 3 * time.Second})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open kv: %v", err)
|
||||||
|
}
|
||||||
|
return st
|
||||||
|
}
|
||||||
|
nodeA := open()
|
||||||
|
nodeB := open()
|
||||||
|
|
||||||
|
owner, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("owner: %v", err)
|
||||||
|
}
|
||||||
|
ownerPub := hex.EncodeToString(owner.SignPub)
|
||||||
|
if err := nodeA.AddUser(ownerPub, "owner", membership.RoleAdmin); err != nil {
|
||||||
|
t.Fatalf("nodeA add user: %v", err)
|
||||||
|
}
|
||||||
|
if err := nodeA.CreateRoom(
|
||||||
|
membership.RoomInfo{RoomID: "ROOMX", Subject: "room.shared.x", OwnerEndpoint: "owner-ep"},
|
||||||
|
owner.SignPub, owner.KexPub, nil,
|
||||||
|
); err != nil {
|
||||||
|
t.Fatalf("nodeA create room: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// nodeB (a different connection, same buckets) sees both immediately.
|
||||||
|
if !nodeB.IsAuthorized(ownerPub) {
|
||||||
|
t.Fatalf("nodeB must see the user created on nodeA (decentralized state divergence)")
|
||||||
|
}
|
||||||
|
got, err := nodeB.GetRoom("ROOMX")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nodeB must see the room created on nodeA: %v", err)
|
||||||
|
}
|
||||||
|
if got.Subject != "room.shared.x" {
|
||||||
|
t.Fatalf("nodeB read wrong room subject: %q", got.Subject)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestResolveClusterPass verifies the secret resolution precedence
|
||||||
|
// (file > env > flag) that keeps the cluster password out of argv (issue 0006f).
|
||||||
|
func TestResolveClusterPass(t *testing.T) {
|
||||||
|
// file wins over env and flag, and is trimmed.
|
||||||
|
f := filepath.Join(t.TempDir(), "pass")
|
||||||
|
if err := os.WriteFile(f, []byte("filesecret\n"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write: %v", err)
|
||||||
|
}
|
||||||
|
if got, src, err := resolveClusterPass("flagsecret", f, "envsecret"); err != nil || got != "filesecret" || src != "file" {
|
||||||
|
t.Fatalf("file precedence: got %q src %q err %v", got, src, err)
|
||||||
|
}
|
||||||
|
// env wins over flag when no file.
|
||||||
|
if got, src, err := resolveClusterPass("flagsecret", "", "envsecret"); err != nil || got != "envsecret" || src != "env" {
|
||||||
|
t.Fatalf("env precedence: got %q src %q err %v", got, src, err)
|
||||||
|
}
|
||||||
|
// flag is the last resort.
|
||||||
|
if got, src, err := resolveClusterPass("flagsecret", "", ""); err != nil || got != "flagsecret" || src != "flag" {
|
||||||
|
t.Fatalf("flag fallback: got %q src %q err %v", got, src, err)
|
||||||
|
}
|
||||||
|
// none set.
|
||||||
|
if got, src, err := resolveClusterPass("", "", ""); err != nil || got != "" || src != "none" {
|
||||||
|
t.Fatalf("none: got %q src %q err %v", got, src, err)
|
||||||
|
}
|
||||||
|
// missing file is an error.
|
||||||
|
if _, _, err := resolveClusterPass("", filepath.Join(t.TempDir(), "nope"), ""); err == nil {
|
||||||
|
t.Fatalf("missing file must error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInjectRouteCreds verifies the secret is injected only into routes that omit
|
||||||
|
// userinfo, so --routes argv need not carry the password (issue 0006f).
|
||||||
|
func TestInjectRouteCreds(t *testing.T) {
|
||||||
|
in := []string{"nats://10.0.0.2:6250", "nats://override:pw@10.0.0.3:6250"}
|
||||||
|
out, err := injectRouteCreds(in, "user", "secret")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("inject: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(out[0], "user:secret@10.0.0.2:6250") {
|
||||||
|
t.Fatalf("creds not injected into bare route: %q", out[0])
|
||||||
|
}
|
||||||
|
if !strings.Contains(out[1], "override:pw@10.0.0.3:6250") {
|
||||||
|
t.Fatalf("existing userinfo must be preserved: %q", out[1])
|
||||||
|
}
|
||||||
|
// empty user is a no-op.
|
||||||
|
noop, err := injectRouteCreds(in, "", "")
|
||||||
|
if err != nil || noop[0] != in[0] {
|
||||||
|
t.Fatalf("empty user must be a no-op: %v %q", err, noop[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestIsLoopbackURL guards migrate-to-kv against pushing the allowlist cleartext
|
||||||
|
// to a remote NATS (issue 0006f, audit 0008 N6).
|
||||||
|
func TestIsLoopbackURL(t *testing.T) {
|
||||||
|
loop := []string{"nats://127.0.0.1:4250", "nats://localhost:4250", "nats://[::1]:4250"}
|
||||||
|
for _, u := range loop {
|
||||||
|
if !isLoopbackURL(u) {
|
||||||
|
t.Fatalf("%q should be loopback", u)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
remote := []string{"nats://10.0.0.2:4250", "nats://bus.example.com:4250", "::not-a-url"}
|
||||||
|
for _, u := range remote {
|
||||||
|
if isLoopbackURL(u) {
|
||||||
|
t.Fatalf("%q should NOT be loopback", u)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
+297
-23
@@ -6,6 +6,8 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/tls"
|
||||||
|
"encoding/hex"
|
||||||
"flag"
|
"flag"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -14,14 +16,36 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
server "github.com/nats-io/nats-server/v2/server"
|
server "github.com/nats-io/nats-server/v2/server"
|
||||||
|
|
||||||
"github.com/enmanuel/unibus/pkg/blobstore"
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
"github.com/enmanuel/unibus/pkg/embeddednats"
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
"github.com/enmanuel/unibus/pkg/membership"
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
// Subcommand dispatch: `membershipd user ...` is the local administration CLI
|
||||||
|
// (seed/list/revoke bus users) and must be handled before the server flag set
|
||||||
|
// parses os.Args. Running the CLI on the bus host is trusted by design (whoever
|
||||||
|
// has a shell there already controls the service), which is how the first admin
|
||||||
|
// is seeded without a chicken-egg auth problem.
|
||||||
|
if len(os.Args) > 1 && os.Args[1] == "user" {
|
||||||
|
runUserCLI(os.Args[2:])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// `membershipd migrate-to-kv` is the one-time, idempotent SQLite->JetStream KV
|
||||||
|
// data move for decentralization (issue 0003c). Like the user CLI it runs on
|
||||||
|
// the host and is dispatched before the server flag set parses os.Args.
|
||||||
|
if len(os.Args) > 1 && os.Args[1] == "migrate-to-kv" {
|
||||||
|
runMigrateCLI(os.Args[2:])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
bind = flag.String("bind", "127.0.0.1", "network interface to bind the HTTP API and the embedded NATS to; use 0.0.0.0 to accept LAN/remote peers")
|
bind = flag.String("bind", "127.0.0.1", "network interface to bind the HTTP API and the embedded NATS to; use 0.0.0.0 to accept LAN/remote peers")
|
||||||
natsURL = flag.String("nats-url", "", "external NATS url; empty starts an embedded server")
|
natsURL = flag.String("nats-url", "", "external NATS url; empty starts an embedded server")
|
||||||
@@ -30,21 +54,196 @@ func main() {
|
|||||||
storeDir = flag.String("store-dir", "./local_files/blobs", "blob store directory")
|
storeDir = flag.String("store-dir", "./local_files/blobs", "blob store directory")
|
||||||
natsPort = flag.Int("nats-port", 4250, "embedded NATS listen port (when --nats-url empty)")
|
natsPort = flag.Int("nats-port", 4250, "embedded NATS listen port (when --nats-url empty)")
|
||||||
natsStore = flag.String("nats-store", "./local_files/jetstream", "embedded JetStream store dir")
|
natsStore = flag.String("nats-store", "./local_files/jetstream", "embedded JetStream store dir")
|
||||||
|
busAuth = flag.String("bus-auth", "off", "control-plane auth rollout: off|soft|enforce (feature flag bus-auth)")
|
||||||
|
tlsCert = flag.String("tls-cert", "", "PATH to the NATS server certificate (deploy/tls/server.crt); enables TLS on the embedded data plane")
|
||||||
|
tlsKey = flag.String("tls-key", "", "path to the NATS server private key (deploy/tls/server.key); required with --tls-cert")
|
||||||
|
// Cluster (issue 0003a): empty --cluster-name keeps the server standalone.
|
||||||
|
clusterName = flag.String("cluster-name", "", "NATS cluster name (identical on every node); empty = standalone, no HA")
|
||||||
|
serverName = flag.String("server-name", "", "unique node name within the cluster (required by JetStream RAFT when clustered)")
|
||||||
|
clusterPort = flag.Int("cluster-port", 6250, "route listener port for server-to-server cluster traffic")
|
||||||
|
routesCSV = flag.String("routes", "", "comma-separated nats-route URLs of the OTHER nodes, e.g. nats://user:pass@10.0.0.2:6250")
|
||||||
|
clusterUser = flag.String("cluster-user", "", "shared route secret username (gates the route listener)")
|
||||||
|
clusterPass = flag.String("cluster-pass", "", "shared route secret password (argv-visible — prefer --cluster-pass-file or UNIBUS_CLUSTER_PASS)")
|
||||||
|
// Secret out of argv (issue 0006f, audit 0008 N1-low): a password in
|
||||||
|
// --cluster-pass / --routes is visible in ps/journald. Prefer a file or the
|
||||||
|
// UNIBUS_CLUSTER_PASS env var; routes may then omit userinfo and the secret
|
||||||
|
// is injected from here.
|
||||||
|
clusterPassFile = flag.String("cluster-pass-file", "", "path to a file holding the cluster route password (preferred over --cluster-pass; keeps the secret out of argv)")
|
||||||
|
routeTLSCert = flag.String("route-tls-cert", "", "this node's route certificate (CA-signed); enables mutual route TLS with --route-tls-key/--route-tls-ca")
|
||||||
|
routeTLSKey = flag.String("route-tls-key", "", "this node's route private key")
|
||||||
|
routeTLSCA = flag.String("route-tls-ca", "", "bus CA that signs every node's route certificate (deploy/tls/ca.crt)")
|
||||||
|
// Replicated control plane (issue 0006a/c): the JetStream replication factor
|
||||||
|
// for the shared nonce bucket (and, with --store kv, the control-plane KV).
|
||||||
|
// 1 for a 1-2 node rollout, 3 for real HA quorum (raise in place with
|
||||||
|
// `nats stream update --replicas 3` when the third node joins).
|
||||||
|
kvReplicas = flag.Int("kv-replicas", 1, "JetStream replication factor for the shared nonce/KV buckets (1..3)")
|
||||||
|
caFile = flag.String("ca", "", "bus CA cert; only used to pin TLS on the internal JetStream connection to an EXTERNAL --nats-url (the embedded server uses an in-process connection that needs no CA)")
|
||||||
|
// Control-plane store backend (issue 0006c, feature flag decentralized):
|
||||||
|
// "sqlite" (default) keeps the local single-node SQLite control plane;
|
||||||
|
// "kv" puts rooms/members/keys/users in replicated JetStream KV so any node
|
||||||
|
// in the cluster serves the same state.
|
||||||
|
storeBackend = flag.String("store", "sqlite", "control-plane store backend: sqlite (default, single-node) | kv (replicated JetStream, decentralized)")
|
||||||
)
|
)
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
authMode, err := membership.ParseAuthMode(*busAuth)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("%v", err)
|
||||||
|
}
|
||||||
|
if *storeBackend != "sqlite" && *storeBackend != "kv" {
|
||||||
|
log.Fatalf("--store must be \"sqlite\" or \"kv\", got %q", *storeBackend)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve the cluster route secret out of argv (file/env preferred). The
|
||||||
|
// resolved value (not *clusterPass) is what guards the route layer and is
|
||||||
|
// injected into peer route URLs below.
|
||||||
|
clusterPassResolved, passSource, err := resolveClusterPass(*clusterPass, *clusterPassFile, os.Getenv("UNIBUS_CLUSTER_PASS"))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("%v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fail-open guard (audit H2): a non-loopback bind, or any TLS flag, demands
|
||||||
|
// --bus-auth enforce. This makes an insecure public startup impossible rather
|
||||||
|
// than silently exposing the bus with the appearance of security.
|
||||||
|
if err := validateBootConfig(*bind, authMode, *tlsCert, *tlsKey); err != nil {
|
||||||
|
log.Fatalf("%v", err)
|
||||||
|
}
|
||||||
|
// Cluster route guard (issue 0003a): a public cluster needs a route secret
|
||||||
|
// and mutual route TLS, and the route-TLS flags are all-or-nothing.
|
||||||
|
if err := validateClusterConfig(*clusterName, *bind, *clusterUser, clusterPassResolved, *routeTLSCert, *routeTLSKey, *routeTLSCA, authMode); err != nil {
|
||||||
|
log.Fatalf("%v", err)
|
||||||
|
}
|
||||||
|
|
||||||
log.SetFlags(log.LstdFlags | log.Lmsgprefix)
|
log.SetFlags(log.LstdFlags | log.Lmsgprefix)
|
||||||
log.SetPrefix("[membershipd] ")
|
log.SetPrefix("[membershipd] ")
|
||||||
|
|
||||||
// Data plane: embedded or external NATS.
|
// A clustered node shares its control plane with peers, so it needs a JetStream
|
||||||
|
// client to manage the replicated nonce bucket (issue 0006a). --store kv (issue
|
||||||
|
// 0006c) also needs JetStream, for the control-plane KV itself. A standalone
|
||||||
|
// single-node SQLite deployment needs none of this and keeps the in-process,
|
||||||
|
// in-memory behavior unchanged.
|
||||||
|
clustered := *clusterName != ""
|
||||||
|
decentralized := *storeBackend == "kv"
|
||||||
|
needJS := clustered || decentralized
|
||||||
|
enforce := authMode == membership.AuthEnforce
|
||||||
|
|
||||||
|
// Internal service identity (issue 0006a): when the embedded data plane enforces
|
||||||
|
// auth, membershipd must still connect to its OWN server to manage JetStream.
|
||||||
|
// It does so with this ephemeral identity, which the authenticator is built to
|
||||||
|
// recognize and grant full permissions (it never enters the user allowlist). It
|
||||||
|
// is only generated when actually needed (JetStream required AND enforce on AND
|
||||||
|
// the server is embedded), so a standalone or non-enforce node is unchanged.
|
||||||
|
var internalID cs.Identity
|
||||||
|
var internalPubHex string
|
||||||
|
if needJS && enforce && *natsURL == "" {
|
||||||
|
internalID, err = cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("generate internal identity: %v", err)
|
||||||
|
}
|
||||||
|
internalPubHex = hex.EncodeToString(internalID.SignPub)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The authenticator consults the store through a holder so it can be built
|
||||||
|
// before the store exists: with --store kv the JetStream KV store opens only
|
||||||
|
// after NATS is up (the bootstrap cycle). In the default SQLite path the store
|
||||||
|
// is opened and set into the holder right here, before the server starts, so
|
||||||
|
// behavior is identical to the pre-0006c baseline. `store` is the final store
|
||||||
|
// used by the HTTP server (set below for the KV path).
|
||||||
|
holder := &storeHolder{}
|
||||||
|
var store membership.Store
|
||||||
|
if !decentralized {
|
||||||
|
store, err = membership.Open(*dbPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open membership store: %v", err)
|
||||||
|
}
|
||||||
|
holder.set(store)
|
||||||
|
log.Printf("membership store: sqlite %s", *dbPath)
|
||||||
|
}
|
||||||
|
// Close whichever store ends up final (SQLite closes its file; the JetStream KV
|
||||||
|
// store's Close is a no-op — its NATS connection is closed separately).
|
||||||
|
defer func() {
|
||||||
|
if store != nil {
|
||||||
|
store.Close()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
blobs, err := blobstore.New(*storeDir)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open blob store: %v", err)
|
||||||
|
}
|
||||||
|
log.Printf("blob store: %s", *storeDir)
|
||||||
|
|
||||||
|
// Data plane: embedded or external NATS. For the embedded server, enforce
|
||||||
|
// turns on the nkey authenticator (only allowlisted identities may connect)
|
||||||
|
// and --tls-cert/--tls-key turn on TLS. An external NATS manages its own
|
||||||
|
// auth/TLS, so those flags do not apply to it.
|
||||||
var ns *server.Server
|
var ns *server.Server
|
||||||
natsClientURL := *natsURL
|
natsClientURL := *natsURL
|
||||||
if natsClientURL == "" {
|
if natsClientURL == "" {
|
||||||
var err error
|
cfg := embeddednats.ServerConfig{
|
||||||
// Bind the embedded NATS to the same interface as the HTTP API so a single
|
// Bind the embedded NATS to the same interface as the HTTP API so a
|
||||||
// --bind flag governs reachability: 127.0.0.1 keeps the whole stack
|
// single --bind flag governs reachability: 127.0.0.1 keeps the whole
|
||||||
// loopback-only; 0.0.0.0 exposes both planes to the LAN.
|
// stack loopback-only; 0.0.0.0 exposes both planes to the LAN.
|
||||||
ns, err = embeddednats.StartHost(*natsStore, *bind, *natsPort)
|
StoreDir: *natsStore,
|
||||||
|
Host: *bind,
|
||||||
|
Port: *natsPort,
|
||||||
|
ServerName: *serverName,
|
||||||
|
}
|
||||||
|
// Cluster (issue 0003a): with a cluster name, join the route layer for HA.
|
||||||
|
if *clusterName != "" {
|
||||||
|
// Inject the resolved secret into peer route URLs that omit userinfo, so
|
||||||
|
// the password need not appear in --routes argv (issue 0006f).
|
||||||
|
routes, rerr := injectRouteCreds(splitRoutes(*routesCSV), *clusterUser, clusterPassResolved)
|
||||||
|
if rerr != nil {
|
||||||
|
log.Fatalf("%v", rerr)
|
||||||
|
}
|
||||||
|
cc := &embeddednats.ClusterConfig{
|
||||||
|
Name: *clusterName,
|
||||||
|
Host: *bind,
|
||||||
|
Port: *clusterPort,
|
||||||
|
Routes: routes,
|
||||||
|
Username: *clusterUser,
|
||||||
|
Password: clusterPassResolved,
|
||||||
|
}
|
||||||
|
log.Printf("cluster route secret source: %s", passSource)
|
||||||
|
if *routeTLSCert != "" {
|
||||||
|
rtls, err := busauth.RouteTLSConfig(*routeTLSCert, *routeTLSKey, *routeTLSCA)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("load route TLS: %v", err)
|
||||||
|
}
|
||||||
|
cc.TLS = rtls
|
||||||
|
log.Printf("cluster route TLS: ON (mutual, CA %s)", *routeTLSCA)
|
||||||
|
}
|
||||||
|
cfg.Cluster = cc
|
||||||
|
log.Printf("cluster: %q node %q, route port %d, %d peer route(s)", *clusterName, *serverName, *clusterPort, len(cc.Routes))
|
||||||
|
}
|
||||||
|
if authMode == membership.AuthEnforce {
|
||||||
|
// Per-subject data-plane ACL (audit H4 / N4 residual): the authenticator
|
||||||
|
// authorizes by the bus allowlist AND confines each connection to the
|
||||||
|
// subjects of the rooms it belongs to (plus client-infra subjects). This
|
||||||
|
// closes the wildcard metadata leak where a registered non-member could
|
||||||
|
// Subscribe(">") and harvest every room's subject and JetStream activity.
|
||||||
|
// NATS freezes permissions at connect time, so a peer that joins a room
|
||||||
|
// after connecting must client.RefreshSession to gain that room's subject.
|
||||||
|
cfg.Auth = busauth.NewNkeyAuthenticatorACLInternal(
|
||||||
|
holder.IsAuthorized,
|
||||||
|
busauth.PermissionsFromSubjects(holder.subjectACL),
|
||||||
|
internalPubHex,
|
||||||
|
)
|
||||||
|
log.Printf("NATS nkey authentication: ON (enforce, per-subject ACL)")
|
||||||
|
}
|
||||||
|
if *tlsCert != "" || *tlsKey != "" {
|
||||||
|
if *tlsCert == "" || *tlsKey == "" {
|
||||||
|
log.Fatalf("--tls-cert and --tls-key must be set together")
|
||||||
|
}
|
||||||
|
tlsCfg, err := busauth.ServerTLSConfig(*tlsCert, *tlsKey)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("load NATS TLS: %v", err)
|
||||||
|
}
|
||||||
|
cfg.TLS = tlsCfg
|
||||||
|
log.Printf("NATS TLS: ON (%s)", *tlsCert)
|
||||||
|
}
|
||||||
|
ns, err = embeddednats.StartServer(cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("start embedded nats: %v", err)
|
log.Fatalf("start embedded nats: %v", err)
|
||||||
}
|
}
|
||||||
@@ -54,29 +253,104 @@ func main() {
|
|||||||
log.Printf("using external NATS: %s", natsClientURL)
|
log.Printf("using external NATS: %s", natsClientURL)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Control plane: SQLite store + blob store + HTTP API.
|
// JetStream client + decentralized store (issue 0006a/c). needJS is set for a
|
||||||
store, err := membership.Open(*dbPath)
|
// clustered node (shared nonce bucket) and for --store kv (the KV control
|
||||||
if err != nil {
|
// plane). Open the privileged JetStream client first (in-process for the
|
||||||
log.Fatalf("open membership store: %v", err)
|
// embedded server, a plain client for external NATS), then — for --store kv —
|
||||||
}
|
// open the replicated KV store and publish it into the holder so the
|
||||||
defer store.Close()
|
// authenticator and HTTP server serve from it. The privileged connection is the
|
||||||
log.Printf("membership store: %s", *dbPath)
|
// only client that can connect in this window (the holder still denies everyone
|
||||||
|
// else; the internal identity bypasses the store).
|
||||||
|
var js jetstream.JetStream
|
||||||
|
if needJS {
|
||||||
|
var internalNC *nats.Conn
|
||||||
|
if *natsURL == "" {
|
||||||
|
internalNC, js, err = connectInternalJS(ns, internalID, enforce)
|
||||||
|
} else {
|
||||||
|
internalNC, js, err = connectExternalJS(natsClientURL, *caFile)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("internal JetStream connection (required by --cluster-name/--store kv): %v", err)
|
||||||
|
}
|
||||||
|
defer internalNC.Close()
|
||||||
|
|
||||||
blobs, err := blobstore.New(*storeDir)
|
if decentralized {
|
||||||
if err != nil {
|
kvStore, err := membership.OpenJetStream(js, membership.JetStreamConfig{Replicas: *kvReplicas})
|
||||||
log.Fatalf("open blob store: %v", err)
|
if err != nil {
|
||||||
|
log.Fatalf("open decentralized control-plane KV store: %v", err)
|
||||||
|
}
|
||||||
|
store = kvStore
|
||||||
|
holder.set(store)
|
||||||
|
log.Printf("membership store: jetstream KV (replicas=%d)", *kvReplicas)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
log.Printf("blob store: %s", *storeDir)
|
|
||||||
|
|
||||||
srv := membership.NewServer(store, blobs)
|
srv := membership.NewServer(store, blobs, authMode)
|
||||||
|
// On a public (non-loopback) bind, disable cleartext rooms: the embedded NATS
|
||||||
|
// has no per-subject ACL, so cleartext content would be readable by any
|
||||||
|
// registered peer. Forcing E2E keeps message content confidential regardless
|
||||||
|
// (audit H4 minimum defense; see dev/0004d-dataplane-acl.md).
|
||||||
|
if !isLoopbackBind(*bind) {
|
||||||
|
srv.RequireEncryptedRooms = true
|
||||||
|
log.Printf("cleartext rooms: DISABLED (public bind requires end-to-end encryption)")
|
||||||
|
}
|
||||||
|
// Publish this node's posture on /healthz so a monitor (or a peer) can detect a
|
||||||
|
// cluster member not running the homogeneous enforce+ACL+TLS posture (audit
|
||||||
|
// 0008 N1). enforce implies the per-subject ACL in this binary (they are wired
|
||||||
|
// together above).
|
||||||
|
srv.Posture = membership.Posture{
|
||||||
|
Enforce: enforce,
|
||||||
|
ACL: enforce,
|
||||||
|
TLS: *tlsCert != "",
|
||||||
|
Cluster: clustered,
|
||||||
|
Store: *storeBackend,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replicated anti-replay (issue 0006a, audit 0008 N3): a clustered node MUST
|
||||||
|
// share its nonce store across the cluster, or a request accepted on one node
|
||||||
|
// can be replayed to another. HARD requirement: if the bucket cannot be created
|
||||||
|
// the node refuses to start rather than run with a per-process cache that leaves
|
||||||
|
// the replay hole open.
|
||||||
|
if needJS {
|
||||||
|
if err := wireReplicatedNonces(srv, js, clustered, *kvReplicas); err != nil {
|
||||||
|
log.Fatalf("%v", err)
|
||||||
|
}
|
||||||
|
if clustered {
|
||||||
|
log.Printf("anti-replay: replicated nonce bucket \"KV_UNIBUS_nonces\" (replicas=%d) — cluster-safe", *kvReplicas)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("control-plane auth: %s", authMode)
|
||||||
addr := *bind + ":" + *httpPort
|
addr := *bind + ":" + *httpPort
|
||||||
httpSrv := &http.Server{Addr: addr, Handler: srv}
|
httpSrv := &http.Server{
|
||||||
|
Addr: addr,
|
||||||
|
Handler: srv,
|
||||||
|
// Bound request header size so a peer cannot exhaust memory with huge
|
||||||
|
// headers before any body limit applies (the body ceilings live in the
|
||||||
|
// membership middleware).
|
||||||
|
MaxHeaderBytes: membership.MaxHeaderBytes,
|
||||||
|
ReadHeaderTimeout: 10 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
log.Printf("HTTP control-plane API: http://%s", addr)
|
var serveErr error
|
||||||
log.Printf(" health: http://%s/healthz", addr)
|
if *tlsCert != "" {
|
||||||
if err := httpSrv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
// Serve the control plane over TLS with the same CA-signed cert as the
|
||||||
log.Fatalf("http server: %v", err)
|
// data plane (audit H5): metadata (subjects, pubkeys, sealed keys, the
|
||||||
|
// social graph) is no longer readable by a network MITM. The fail-open
|
||||||
|
// guard already requires --bus-auth enforce alongside these flags.
|
||||||
|
httpSrv.TLSConfig = &tls.Config{MinVersion: tls.VersionTLS12}
|
||||||
|
log.Printf("HTTPS control-plane API: https://%s", addr)
|
||||||
|
log.Printf(" health: https://%s/healthz", addr)
|
||||||
|
log.Printf("control-plane TLS: ON (%s)", *tlsCert)
|
||||||
|
serveErr = httpSrv.ListenAndServeTLS(*tlsCert, *tlsKey)
|
||||||
|
} else {
|
||||||
|
log.Printf("HTTP control-plane API: http://%s", addr)
|
||||||
|
log.Printf(" health: http://%s/healthz", addr)
|
||||||
|
serveErr = httpSrv.ListenAndServe()
|
||||||
|
}
|
||||||
|
if serveErr != nil && serveErr != http.ErrServerClosed {
|
||||||
|
log.Fatalf("http server: %v", serveErr)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,95 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
// runMigrateCLI implements `membershipd migrate-to-kv`, the idempotent move of
|
||||||
|
// the control-plane state from the local SQLite database into replicated
|
||||||
|
// JetStream KV (issue 0003c). It backs up the SQLite file first (VACUUM INTO),
|
||||||
|
// then connects to the target NATS and copies every room/member/key/user into
|
||||||
|
// the KV buckets. Re-running it converges to the same state.
|
||||||
|
//
|
||||||
|
// It runs on the bus host (no auth on the control-plane side), connecting to the
|
||||||
|
// cluster's NATS; --ca pins TLS when the data plane is secured.
|
||||||
|
func runMigrateCLI(args []string) {
|
||||||
|
fs := flag.NewFlagSet("migrate-to-kv", flag.ExitOnError)
|
||||||
|
dbPath := fs.String("db", defaultDBPath, "SQLite database path to migrate FROM")
|
||||||
|
natsURL := fs.String("nats-url", "", "NATS url of the cluster to migrate INTO (required)")
|
||||||
|
ca := fs.String("ca", "", "CA cert to pin TLS on the NATS connection (optional)")
|
||||||
|
replicas := fs.Int("replicas", 1, "KV replication factor (1 for a 1-2 node rollout, 3 for HA quorum)")
|
||||||
|
noBackup := fs.Bool("no-backup", false, "skip the SQLite backup before migrating (NOT recommended)")
|
||||||
|
_ = fs.Parse(args)
|
||||||
|
|
||||||
|
if *natsURL == "" {
|
||||||
|
fmt.Fprintln(os.Stderr, "membershipd migrate-to-kv: --nats-url is required (the cluster to write the KV buckets into)")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
// Confidentiality guard (issue 0006f, audit 0008 N6): the migration writes the
|
||||||
|
// allowlist (handles, roles, signing pubkeys) into the KV. Against a REMOTE NATS
|
||||||
|
// without TLS that metadata would travel in cleartext, so a remote target MUST
|
||||||
|
// be TLS-pinned with --ca. A loopback target is local-only and exempt.
|
||||||
|
if !isLoopbackURL(*natsURL) && *ca == "" {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd migrate-to-kv: refusing to migrate to remote %q without --ca; the allowlist (handles/roles/sign pubs) would travel in cleartext — pin TLS with --ca, or run against a loopback nats-url\n", *natsURL)
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Back up the SQLite database first so a botched migration can be undone.
|
||||||
|
var backupPath string
|
||||||
|
if !*noBackup {
|
||||||
|
bak, err := membership.BackupSQLite(*dbPath)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd migrate-to-kv: backup failed: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
backupPath = bak
|
||||||
|
fmt.Printf("backed up %s -> %s\n", *dbPath, backupPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Connect to the target NATS (optionally TLS-pinned to the bus CA).
|
||||||
|
natsOpts := []nats.Option{nats.Name("unibus-migrate")}
|
||||||
|
if *ca != "" {
|
||||||
|
tlsCfg, err := busauth.LoadCATLSConfig(*ca)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd migrate-to-kv: load CA: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
natsOpts = append(natsOpts, nats.Secure(tlsCfg))
|
||||||
|
}
|
||||||
|
nc, err := nats.Connect(*natsURL, natsOpts...)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd migrate-to-kv: connect %q: %v\n", *natsURL, err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer nc.Close()
|
||||||
|
|
||||||
|
js, err := jetstream.New(nc)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd migrate-to-kv: jetstream: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
report, err := membership.MigrateSQLiteToKV(*dbPath, js, membership.JetStreamConfig{
|
||||||
|
Replicas: *replicas,
|
||||||
|
OpTimeout: 30 * time.Second,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd migrate-to-kv: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
report.BackupPath = backupPath
|
||||||
|
|
||||||
|
fmt.Printf("migrated to KV (replicas=%d): %d rooms, %d members, %d keys, %d users\n",
|
||||||
|
*replicas, report.Rooms, report.Members, report.Keys, report.Users)
|
||||||
|
if backupPath != "" {
|
||||||
|
fmt.Printf("rollback: restore %s if needed\n", backupPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
)
|
||||||
|
|
||||||
|
// storeHolder is a concurrency-safe slot for the control-plane store, used to
|
||||||
|
// break the decentralized bootstrap cycle (issue 0006c): the NATS authenticator
|
||||||
|
// must be built BEFORE the embedded server starts, but the JetStream KV store can
|
||||||
|
// only be opened AFTER NATS is up (it needs a JetStream client). The authenticator
|
||||||
|
// therefore consults the holder instead of a concrete store.
|
||||||
|
//
|
||||||
|
// Fail-closed by construction: until the store is set, IsAuthorized denies and
|
||||||
|
// SubjectACL errors, so any client connecting in the startup window is rejected.
|
||||||
|
// The only connection expected in that window is membershipd's own internal
|
||||||
|
// service identity, which the authenticator recognizes by key and lets through
|
||||||
|
// without consulting the store at all. In the SQLite (default) path the store is
|
||||||
|
// set before StartServer, so the window does not exist and behavior is identical
|
||||||
|
// to the pre-0006c baseline.
|
||||||
|
type storeHolder struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
s membership.Store
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *storeHolder) set(s membership.Store) {
|
||||||
|
h.mu.Lock()
|
||||||
|
h.s = s
|
||||||
|
h.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *storeHolder) get() membership.Store {
|
||||||
|
h.mu.RLock()
|
||||||
|
defer h.mu.RUnlock()
|
||||||
|
return h.s
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsAuthorized reports whether signPubHex is an active bus user, denying while the
|
||||||
|
// store is not yet set (fail closed). It is the predicate the nkey authenticator
|
||||||
|
// uses for every connecting client.
|
||||||
|
func (h *storeHolder) IsAuthorized(signPubHex string) bool {
|
||||||
|
s := h.get()
|
||||||
|
if s == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return s.IsAuthorized(signPubHex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// subjectACL derives the per-subject permissions for signPubHex via the live
|
||||||
|
// store, erroring (so the caller fails closed and denies the connection) while the
|
||||||
|
// store is not yet set.
|
||||||
|
func (h *storeHolder) subjectACL(signPubHex string) ([]string, error) {
|
||||||
|
s := h.get()
|
||||||
|
if s == nil {
|
||||||
|
return nil, fmt.Errorf("control-plane store not ready")
|
||||||
|
}
|
||||||
|
return membership.SubjectACLFor(s)(signPubHex)
|
||||||
|
}
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestStoreHolderFailClosed: an empty holder denies everything (the bootstrap
|
||||||
|
// window before the store is set), and starts serving once a store is published.
|
||||||
|
func TestStoreHolderFailClosed(t *testing.T) {
|
||||||
|
h := &storeHolder{}
|
||||||
|
|
||||||
|
// Empty: deny + error (fail closed).
|
||||||
|
if h.IsAuthorized("anything") {
|
||||||
|
t.Fatalf("empty holder must deny IsAuthorized")
|
||||||
|
}
|
||||||
|
if _, err := h.subjectACL("anything"); err == nil {
|
||||||
|
t.Fatalf("empty holder must error from subjectACL (fail closed)")
|
||||||
|
}
|
||||||
|
|
||||||
|
// After set: serves from the real store.
|
||||||
|
store, err := membership.Open(filepath.Join(t.TempDir(), "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
id, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("identity: %v", err)
|
||||||
|
}
|
||||||
|
pub := hex.EncodeToString(id.SignPub)
|
||||||
|
if err := store.AddUser(pub, "alice", membership.RoleMember); err != nil {
|
||||||
|
t.Fatalf("add user: %v", err)
|
||||||
|
}
|
||||||
|
h.set(store)
|
||||||
|
|
||||||
|
if !h.IsAuthorized(pub) {
|
||||||
|
t.Fatalf("after set, an active user must be authorized")
|
||||||
|
}
|
||||||
|
if _, err := h.subjectACL(pub); err != nil {
|
||||||
|
t.Fatalf("after set, subjectACL must succeed: %v", err)
|
||||||
|
}
|
||||||
|
if h.IsAuthorized("deadbeef") {
|
||||||
|
t.Fatalf("a non-user must not be authorized")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,178 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"text/tabwriter"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
)
|
||||||
|
|
||||||
|
// runUserCLI implements `membershipd user <add|list|revoke> ...`, the local
|
||||||
|
// administration surface for the bus user allowlist. It opens the SQLite store
|
||||||
|
// directly (no network, no auth): it is meant to run on the bus host, where
|
||||||
|
// shell access already implies full control. This is the seam that seeds the
|
||||||
|
// first admin, breaking the chicken-egg of "you need an admin to add an admin".
|
||||||
|
//
|
||||||
|
// The function never returns: it exits the process with a non-zero status on
|
||||||
|
// error so it composes cleanly in shell scripts and systemd ExecStartPre hooks.
|
||||||
|
func runUserCLI(args []string) {
|
||||||
|
if len(args) == 0 {
|
||||||
|
userUsage()
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
sub, rest := args[0], args[1:]
|
||||||
|
switch sub {
|
||||||
|
case "add":
|
||||||
|
userAdd(rest)
|
||||||
|
case "list":
|
||||||
|
userList(rest)
|
||||||
|
case "revoke":
|
||||||
|
userRevoke(rest)
|
||||||
|
case "-h", "--help", "help":
|
||||||
|
userUsage()
|
||||||
|
os.Exit(0)
|
||||||
|
default:
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd user: unknown subcommand %q\n\n", sub)
|
||||||
|
userUsage()
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func userUsage() {
|
||||||
|
fmt.Fprint(os.Stderr, `usage: membershipd user <command> [flags]
|
||||||
|
|
||||||
|
commands:
|
||||||
|
add Register a bus user from their Ed25519 signing public key
|
||||||
|
list List all registered users
|
||||||
|
revoke Revoke a user (denies access on both planes immediately)
|
||||||
|
|
||||||
|
examples:
|
||||||
|
membershipd user add --handle alice --sign-pub <64-hex> --role admin
|
||||||
|
membershipd user list
|
||||||
|
membershipd user revoke <64-hex>
|
||||||
|
|
||||||
|
common flags:
|
||||||
|
--db <path> SQLite database path (default ./local_files/unibus.db)
|
||||||
|
`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultDBPath = "./local_files/unibus.db"
|
||||||
|
|
||||||
|
// openStore opens the membership store at path, exiting on failure. Migrations
|
||||||
|
// (including 002_users.sql) are applied by membership.Open, so a fresh database
|
||||||
|
// gets the users table on first use of the CLI.
|
||||||
|
func openStore(path string) membership.Store {
|
||||||
|
store, err := membership.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd user: open store %q: %v\n", path, err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
return store
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateSignPubHex ensures the key is exactly a 32-byte Ed25519 public key in
|
||||||
|
// hex (64 hex chars). Catching this here turns a silent "authorized nobody" into
|
||||||
|
// an explicit error at seed time.
|
||||||
|
func validateSignPubHex(signPub string) error {
|
||||||
|
b, err := hex.DecodeString(signPub)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("sign-pub is not valid hex: %w", err)
|
||||||
|
}
|
||||||
|
if len(b) != 32 {
|
||||||
|
return fmt.Errorf("sign-pub must be a 32-byte Ed25519 public key (64 hex chars), got %d bytes", len(b))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func userAdd(args []string) {
|
||||||
|
fs := flag.NewFlagSet("user add", flag.ExitOnError)
|
||||||
|
handle := fs.String("handle", "", "human-readable user name (required)")
|
||||||
|
signPub := fs.String("sign-pub", "", "Ed25519 signing public key in hex (required)")
|
||||||
|
role := fs.String("role", membership.RoleMember, "role: admin or member")
|
||||||
|
dbPath := fs.String("db", defaultDBPath, "SQLite database path")
|
||||||
|
_ = fs.Parse(args)
|
||||||
|
|
||||||
|
if *handle == "" || *signPub == "" {
|
||||||
|
fmt.Fprintln(os.Stderr, "membershipd user add: --handle and --sign-pub are required")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
if err := validateSignPubHex(*signPub); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd user add: %v\n", err)
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
store := openStore(*dbPath)
|
||||||
|
defer store.Close()
|
||||||
|
|
||||||
|
if err := store.AddUser(*signPub, *handle, *role); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd user add: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
fmt.Printf("added user %q (%s) role=%s\n", *handle, *signPub, *role)
|
||||||
|
}
|
||||||
|
|
||||||
|
func userList(args []string) {
|
||||||
|
fs := flag.NewFlagSet("user list", flag.ExitOnError)
|
||||||
|
dbPath := fs.String("db", defaultDBPath, "SQLite database path")
|
||||||
|
_ = fs.Parse(args)
|
||||||
|
|
||||||
|
store := openStore(*dbPath)
|
||||||
|
defer store.Close()
|
||||||
|
|
||||||
|
users, err := store.ListUsers()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd user list: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
if len(users) == 0 {
|
||||||
|
fmt.Println("(no users)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
|
||||||
|
fmt.Fprintln(w, "HANDLE\tROLE\tSTATUS\tSIGN_PUB\tCREATED")
|
||||||
|
for _, u := range users {
|
||||||
|
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", u.Handle, u.Role, u.Status, u.SignPub, u.CreatedAt)
|
||||||
|
}
|
||||||
|
_ = w.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
func userRevoke(args []string) {
|
||||||
|
fs := flag.NewFlagSet("user revoke", flag.ExitOnError)
|
||||||
|
dbPath := fs.String("db", defaultDBPath, "SQLite database path")
|
||||||
|
|
||||||
|
// Go's flag package stops at the first non-flag argument, so `revoke <key>
|
||||||
|
// --db path` would otherwise leave --db unparsed. Pull a leading positional
|
||||||
|
// (the sign-pub) off the front before parsing so both `revoke <key> --db p`
|
||||||
|
// and `revoke --db p <key>` work for the operator.
|
||||||
|
var signPub string
|
||||||
|
if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
|
||||||
|
signPub, args = args[0], args[1:]
|
||||||
|
}
|
||||||
|
_ = fs.Parse(args)
|
||||||
|
if signPub == "" {
|
||||||
|
if rest := fs.Args(); len(rest) == 1 {
|
||||||
|
signPub = rest[0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if signPub == "" {
|
||||||
|
fmt.Fprintln(os.Stderr, "membershipd user revoke: exactly one <sign-pub> argument required")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
if err := validateSignPubHex(signPub); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd user revoke: %v\n", err)
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
store := openStore(*dbPath)
|
||||||
|
defer store.Close()
|
||||||
|
|
||||||
|
if err := store.RevokeUser(signPub); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "membershipd user revoke: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
fmt.Printf("revoked user %s\n", signPub)
|
||||||
|
}
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
// wireReplicatedNonces applies the cluster anti-replay policy to srv. It is the
|
||||||
|
// single piece of wiring the binary uses to decide whether a node must share its
|
||||||
|
// nonce store, extracted so a regression test exercises the EXACT decision the
|
||||||
|
// running binary makes (issue 0006a, audit 0008 N3).
|
||||||
|
//
|
||||||
|
// Policy:
|
||||||
|
// - A clustered node (clustered == true) MUST use the shared JetStream KV nonce
|
||||||
|
// bucket. Every node sees the same bucket, so a request accepted on one node
|
||||||
|
// cannot be replayed to another whose per-process cache never saw the nonce.
|
||||||
|
// A missing JetStream context, or a failure to create the bucket, is a FATAL
|
||||||
|
// configuration error returned to the caller — a clustered node running with a
|
||||||
|
// per-process nonce cache is precisely the replay hole the audit flagged, so
|
||||||
|
// it must refuse to start rather than serve insecurely.
|
||||||
|
// - A standalone node (clustered == false) keeps the in-memory cache that
|
||||||
|
// NewServer installed: there is no second node to replay to, so the shared
|
||||||
|
// bucket would only add a JetStream dependency for no security gain.
|
||||||
|
//
|
||||||
|
// replicas is the nonce bucket's replication factor (R1..R3). Returns nil when no
|
||||||
|
// action is required (standalone).
|
||||||
|
func wireReplicatedNonces(srv *membership.Server, js jetstream.JetStream, clustered bool, replicas int) error {
|
||||||
|
if !clustered {
|
||||||
|
return nil // standalone: the in-memory nonce cache is sufficient and safe
|
||||||
|
}
|
||||||
|
if js == nil {
|
||||||
|
return fmt.Errorf("clustered node requires JetStream for the shared nonce bucket, but none is available")
|
||||||
|
}
|
||||||
|
if err := srv.UseReplicatedNonces(js, replicas); err != nil {
|
||||||
|
return fmt.Errorf("replicated nonces: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
+9
-1
@@ -23,6 +23,7 @@ func main() {
|
|||||||
ctrlURL = flag.String("ctrl-url", "http://127.0.0.1:8470", "membershipd control-plane url")
|
ctrlURL = flag.String("ctrl-url", "http://127.0.0.1:8470", "membershipd control-plane url")
|
||||||
roomSub = flag.String("room", "proc.test.ticks", "room subject to publish to")
|
roomSub = flag.String("room", "proc.test.ticks", "room subject to publish to")
|
||||||
idFile = flag.String("id-file", "./local_files/worker.id", "identity file path")
|
idFile = flag.String("id-file", "./local_files/worker.id", "identity file path")
|
||||||
|
caFile = flag.String("ca", "", "path to the bus CA cert (ca.crt); set to connect with TLS + nkey to a secured bus")
|
||||||
)
|
)
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
@@ -33,7 +34,7 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("identity: %v", err)
|
log.Fatalf("identity: %v", err)
|
||||||
}
|
}
|
||||||
c, err := client.New(*natsURL, *ctrlURL, id)
|
c, err := client.Connect(*natsURL, *ctrlURL, id, *caFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("connect: %v", err)
|
log.Fatalf("connect: %v", err)
|
||||||
}
|
}
|
||||||
@@ -46,6 +47,13 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("create room: %v", err)
|
log.Fatalf("create room: %v", err)
|
||||||
}
|
}
|
||||||
|
// Membership-change contract (issue 0006e): the bus freezes per-subject
|
||||||
|
// permissions at connect time, and this room did not exist then. Refresh the
|
||||||
|
// session so the new room's subject becomes publishable under enforce+ACL. On
|
||||||
|
// an unsecured/dev bus this is a harmless reconnect.
|
||||||
|
if err := c.RefreshSession(); err != nil {
|
||||||
|
log.Fatalf("refresh session after create room: %v", err)
|
||||||
|
}
|
||||||
log.Printf("room %q -> %s (subject %s, cleartext)", *roomSub, roomID, *roomSub)
|
log.Printf("room %q -> %s (subject %s, cleartext)", *roomSub, roomID, *roomSub)
|
||||||
|
|
||||||
stop := make(chan os.Signal, 1)
|
stop := make(chan os.Signal, 1)
|
||||||
|
|||||||
@@ -65,3 +65,34 @@ curl -fsS http://<host-lan-ip>:8470/healthz
|
|||||||
- To run against an external NATS instead of the embedded one, append
|
- To run against an external NATS instead of the embedded one, append
|
||||||
`--nats-url nats://<host>:4222` to `ExecStart` and re-run `daemon-reload` +
|
`--nats-url nats://<host>:4222` to `ExecStart` and re-run `daemon-reload` +
|
||||||
`restart`.
|
`restart`.
|
||||||
|
|
||||||
|
## Clustering (HA) — see `deploy/cluster/`
|
||||||
|
|
||||||
|
The single-node service above is secure on its own. Running unibus as a
|
||||||
|
multi-node **cluster** has extra hardening rules (issues 0006a–0006f); the full
|
||||||
|
runbook and the generated material live in `deploy/cluster/`. Key points an
|
||||||
|
operator must know:
|
||||||
|
|
||||||
|
- **Homogeneous posture (0006d).** Every node MUST run `--bus-auth enforce` (the
|
||||||
|
binary refuses to join a cluster otherwise) and present mutual route TLS on a
|
||||||
|
public bind. `/healthz` publishes each node's `posture` so a monitor can flag a
|
||||||
|
node that is not `enforce`+`acl`+`tls`.
|
||||||
|
- **Separate route CA (0006f).** The cluster route layer authenticates *nodes*,
|
||||||
|
not bus users — sign the route certs with a **dedicated cluster CA**
|
||||||
|
(`--route-tls-ca`), NOT the client data-plane CA (`--tls-cert`'s CA). Keeping
|
||||||
|
the two trust roots separate means a client cert can never be presented to the
|
||||||
|
route port. `deploy/cluster/generate-cluster-certs.sh` builds this CA.
|
||||||
|
- **Secret out of argv (0006f).** Pass the route password via
|
||||||
|
`--cluster-pass-file` or the `UNIBUS_CLUSTER_PASS` env var, NOT `--cluster-pass`
|
||||||
|
or a `nats://user:pass@host` in `--routes` (both are visible in `ps`/journald).
|
||||||
|
When the secret comes from a file/env, list peers as bare `--routes
|
||||||
|
nats://<host>:6250` and the binary injects the credentials.
|
||||||
|
- **`migrate-to-kv` confidentiality (0006f).** The migration writes the allowlist
|
||||||
|
(handles/roles/sign pubs) into KV. Run it only against a **loopback** nats-url,
|
||||||
|
or pin TLS with `--ca` for a remote target — otherwise that metadata travels in
|
||||||
|
cleartext. The binary refuses a remote target without `--ca`.
|
||||||
|
- **R1 is NOT HA (0006a/N3-DoS).** With `--kv-replicas 1` the control plane
|
||||||
|
(including the nonce bucket) is a single point of failure: if the node owning
|
||||||
|
the stream dies, every authenticated request fails closed (auth DoS). Real HA
|
||||||
|
needs **R3** (quorum 2/3): raise replicas in place with `nats stream update
|
||||||
|
--replicas 3` once the third node has joined. Do not advertise R1 as HA.
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
# Generated TLS material and secrets — NEVER commit (audit 0008: keys/secret).
|
||||||
|
out/
|
||||||
|
build/
|
||||||
|
secrets/
|
||||||
|
*.key
|
||||||
|
*.srl
|
||||||
|
cluster-ca.crt
|
||||||
@@ -0,0 +1,181 @@
|
|||||||
|
# unibus cluster — 3-node deploy runbook (issue 0006g)
|
||||||
|
|
||||||
|
This directory holds the material to bring up unibus as a **3-node cluster**
|
||||||
|
(`magnus` + `homer` + `datardos`) for real HA: with **R3** replication the control
|
||||||
|
plane (rooms/members/keys/users on JetStream KV + the anti-replay nonce bucket)
|
||||||
|
survives the loss of any one node (quorum 2/3).
|
||||||
|
|
||||||
|
> **The agent that authored this never touched a VPS.** Every step that changes a
|
||||||
|
> remote host is marked **HUMAN** and is executed by the operator. `deploy-cluster.sh`
|
||||||
|
> defaults to a dry run.
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | What it is |
|
||||||
|
|---|---|
|
||||||
|
| `nodes.env` | Topology: cluster name, ports, and the per-node rows (name, ssh host, public IP, WG IP). **HUMAN fills the placeholders.** |
|
||||||
|
| `generate-cluster-certs.sh` | Mints a **separate cluster route CA** + a route cert per node, and a data-plane server cert per node signed by the **client CA** (`../tls/ca.*`). |
|
||||||
|
| `membershipd-cluster.service` | One systemd unit, parameterized per node by `/opt/unibus/cluster.env`. enforce + per-subject ACL + TLS + `--store kv`, `Restart=always`. |
|
||||||
|
| `deploy-cluster.sh` | Cross-builds the linux binary, generates each node's `cluster.env`, and (with `--yes`) rsyncs everything + installs the unit. Staggered start is manual. |
|
||||||
|
|
||||||
|
Generated keys/secrets (`out/`, `build/`, `secrets/`) are **gitignored** — they are
|
||||||
|
secret and never leave the operator's trusted machine except over the secure
|
||||||
|
rsync channel.
|
||||||
|
|
||||||
|
## Topology
|
||||||
|
|
||||||
|
| Node | SSH | Public IP | WireGuard IP | Role |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| magnus | `magnus` | `<MAGNUS_PUBLIC_IP>` | `<MAGNUS_WG_IP>` | seed (first up) |
|
||||||
|
| homer | `homer` | `141.94.69.66` | `<HOMER_WG_IP>` | replica |
|
||||||
|
| datardos | `dd` | `51.91.100.142` | `<DATARDOS_WG_IP>` (10.21.0.x) | replica |
|
||||||
|
|
||||||
|
The route layer (server-to-server) prefers the **WireGuard mesh**
|
||||||
|
(`ROUTE_NETWORK=wg`); the client data plane and the HTTP control plane are reached
|
||||||
|
over the public IPs. The route CA is **separate** from the client CA, so a client
|
||||||
|
cert can never be presented to the route port.
|
||||||
|
|
||||||
|
## Prerequisites (HUMAN, once)
|
||||||
|
|
||||||
|
1. **Fill `nodes.env`** — replace every `<PLACEHOLDER>` (magnus public IP, all WG
|
||||||
|
IPs). The scripts refuse to run while any remain.
|
||||||
|
2. **Client CA exists** — `../tls/ca.crt` + `../tls/ca.key`. If not, run
|
||||||
|
`../tls/generate-certs.sh` on the CA host (om) first. The cluster reuses this CA
|
||||||
|
for the data plane so existing clients keep trusting the bus.
|
||||||
|
3. **Mint cluster TLS**:
|
||||||
|
```bash
|
||||||
|
./generate-cluster-certs.sh # writes out/<name>/ ; --force to rotate the cluster CA
|
||||||
|
```
|
||||||
|
4. **Create the route secret** (out of argv, shared by all nodes):
|
||||||
|
```bash
|
||||||
|
mkdir -p secrets && openssl rand -hex 32 > secrets/cluster.pass
|
||||||
|
```
|
||||||
|
5. **SSH** to each node's SSH host as `root` works (`ssh magnus true`, `ssh dd true`, ...).
|
||||||
|
|
||||||
|
## Stage the nodes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-cluster.sh # DRY RUN — prints the full plan, touches nothing
|
||||||
|
./deploy-cluster.sh --yes # HUMAN: actually rsync + install the unit on all 3 nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
This cross-builds `membershipd` (linux/amd64, `CGO_ENABLED=0`), writes each node's
|
||||||
|
`cluster.env` (its `NODE_NAME` and the `--routes` to the OTHER two nodes), and
|
||||||
|
ships the binary, the node's TLS material, the secret, the env file and the unit.
|
||||||
|
It does **not** start anything.
|
||||||
|
|
||||||
|
## Seed the first admin into the KV (HUMAN — loopback bootstrap)
|
||||||
|
|
||||||
|
The empty KV control plane has no users, and under `enforce` no external tool can
|
||||||
|
write the FIRST admin over NATS (it would need to be an admin already — a
|
||||||
|
chicken-and-egg). The `user` CLI also writes only to a local SQLite file, not the
|
||||||
|
KV. So the first admin is seeded on the seed node through a **loopback, no-auth
|
||||||
|
bootstrap** that populates the same JetStream store the cluster unit then reuses:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh root@magnus 'bash -s' <<'SEED'
|
||||||
|
set -euo pipefail
|
||||||
|
cd /opt/unibus
|
||||||
|
# a) Put the first admin into a local SQLite seed file.
|
||||||
|
./membershipd user add --db ./seed.db --handle root --sign-pub <ADMIN_SIGN_PUB_HEX> --role admin
|
||||||
|
# b) Bring up a TEMPORARY loopback, no-auth, single-node KV server on the cluster's
|
||||||
|
# own JetStream store dir (not exposed; bus-auth off is allowed on 127.0.0.1).
|
||||||
|
./membershipd --store kv --bus-auth off --bind 127.0.0.1 \
|
||||||
|
--nats-store ./local_files/jetstream --db ./seed.db >/tmp/seed-boot.log 2>&1 &
|
||||||
|
BOOT=$!; sleep 2
|
||||||
|
# c) Migrate the admin from SQLite into the replicated KV (loopback — no --ca needed).
|
||||||
|
./membershipd migrate-to-kv --db ./seed.db --nats-url nats://127.0.0.1:4250 --replicas 1
|
||||||
|
# d) Stop the bootstrap server. The KV buckets persist in ./local_files/jetstream.
|
||||||
|
kill "$BOOT"; wait "$BOOT" 2>/dev/null || true
|
||||||
|
rm -f ./seed.db
|
||||||
|
SEED
|
||||||
|
```
|
||||||
|
|
||||||
|
> The KV written here lives in `./local_files/jetstream`, which the cluster unit
|
||||||
|
> reuses (`--nats-store` default), so the admin is present when the enforce cluster
|
||||||
|
> starts. Additional users are added the same loopback way until a
|
||||||
|
> `user add --store kv` exists (see GAP in report 0009).
|
||||||
|
|
||||||
|
## Bring up (HUMAN — staggered)
|
||||||
|
|
||||||
|
Bring up the seed first, then the replicas one at a time, checking each joins.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Seed node (after the seed step above).
|
||||||
|
ssh root@magnus 'systemctl enable --now membershipd-cluster'
|
||||||
|
ssh root@magnus 'curl -fsS https://127.0.0.1:8470/healthz --cacert /opt/unibus/tls/ca.crt'
|
||||||
|
|
||||||
|
# 2. Replicas, one at a time.
|
||||||
|
ssh root@homer 'systemctl enable --now membershipd-cluster'
|
||||||
|
ssh root@datardos 'systemctl enable --now membershipd-cluster'
|
||||||
|
```
|
||||||
|
|
||||||
|
> Initial rollout runs at **R1** (`KV_REPLICAS=1` in `nodes.env`): the buckets live
|
||||||
|
> on the seed only. This is NOT HA yet — see "Scale to R3".
|
||||||
|
|
||||||
|
## Promote an existing single-node (SQLite) deployment (HUMAN, optional)
|
||||||
|
|
||||||
|
Instead of seeding fresh, you can migrate an existing single-node `unibus.db` into
|
||||||
|
the KV — **loopback only** (the allowlist would otherwise travel cleartext; the
|
||||||
|
command refuses a remote target without `--ca`). Use the same loopback-bootstrap
|
||||||
|
shape as the seed step (temporary `--bus-auth off` server on 127.0.0.1, then
|
||||||
|
`migrate-to-kv --db /opt/unibus/local_files/unibus.db`).
|
||||||
|
|
||||||
|
## Verify
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Posture on every node — all must be enforce+acl+tls+cluster, store=kv.
|
||||||
|
for h in magnus homer datardos; do
|
||||||
|
echo "== $h =="
|
||||||
|
ssh root@$h 'curl -fsS https://127.0.0.1:8470/healthz --cacert /opt/unibus/tls/ca.crt'
|
||||||
|
done
|
||||||
|
|
||||||
|
# Cluster + JetStream meta-group health (needs the `nats` CLI on a node):
|
||||||
|
ssh root@magnus 'nats --server nats://127.0.0.1:4250 server report jetstream'
|
||||||
|
ssh root@magnus 'nats --server nats://127.0.0.1:4250 server list' # 3 servers, routes up
|
||||||
|
```
|
||||||
|
|
||||||
|
A healthy cluster shows 3 routed servers and a JetStream meta-group with a leader.
|
||||||
|
|
||||||
|
## Scale to R3 (HUMAN — real HA)
|
||||||
|
|
||||||
|
Once all three nodes are up and routed, raise the replication factor of every
|
||||||
|
control-plane stream from 1 to 3 IN PLACE (no data loss), then flip `KV_REPLICAS=3`
|
||||||
|
in `nodes.env` so future (re)deploys keep it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
for s in KV_UNIBUS_users KV_UNIBUS_rooms KV_UNIBUS_members KV_UNIBUS_room_keys \
|
||||||
|
KV_UNIBUS_rooms_by_member KV_UNIBUS_nonces; do
|
||||||
|
ssh root@magnus "nats --server nats://127.0.0.1:4250 stream update $s --replicas 3 -f"
|
||||||
|
done
|
||||||
|
# (also OBJ_UNIBUS_blobs if the object store is in use)
|
||||||
|
```
|
||||||
|
|
||||||
|
Until this is done, R1 means the seed node is a **single point of failure for
|
||||||
|
authentication**: if it dies, the nonce/KV control plane is unreachable and every
|
||||||
|
authenticated request fails closed (auth DoS). R1 is a rollout step, not HA.
|
||||||
|
|
||||||
|
## Chaos test (HUMAN — requires the 3 live VPS; NOT run here)
|
||||||
|
|
||||||
|
Validate quorum tolerance after R3:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Kill one node; the cluster keeps serving (quorum 2/3).
|
||||||
|
ssh root@datardos 'systemctl stop membershipd-cluster'
|
||||||
|
# -> clients fail over (multiple seed URLs); reads/writes still succeed.
|
||||||
|
ssh root@datardos 'systemctl start membershipd-cluster' # rejoins, catches up
|
||||||
|
|
||||||
|
# Kill two nodes; quorum is LOST — the control plane should fail CLOSED (deny),
|
||||||
|
# never fail open. Verify a request is rejected, not silently served.
|
||||||
|
```
|
||||||
|
|
||||||
|
This network-level chaos test (kill 1/3, kill 2/3, partition/split-brain) is part
|
||||||
|
of the deploy validation (issue 0003f) and runs against the real VPS — it is
|
||||||
|
deliberately out of scope for the authoring agent.
|
||||||
|
|
||||||
|
## Rollback
|
||||||
|
|
||||||
|
`membershipd` does not delete data. To revert a node to standalone SQLite, stop
|
||||||
|
the unit and start it without `--store kv`/`--cluster-name`; the KV buckets remain
|
||||||
|
for a later retry. To rotate the cluster CA, re-run `generate-cluster-certs.sh
|
||||||
|
--force` and re-stage (every node must get the new `cluster-ca.crt` together).
|
||||||
Executable
+126
@@ -0,0 +1,126 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# deploy-cluster.sh — cross-build membershipd and stage it onto the three cluster
|
||||||
|
# nodes (issue 0006g). DEFAULT IS DRY-RUN: it prints the plan and touches nothing.
|
||||||
|
# Pass --yes to actually rsync + run remote commands. Steps that a HUMAN must run
|
||||||
|
# (or confirm) are marked "HUMAN:".
|
||||||
|
#
|
||||||
|
# Prerequisites (HUMAN, once):
|
||||||
|
# 1. Fill nodes.env (no <PLACEHOLDER> left).
|
||||||
|
# 2. ./generate-cluster-certs.sh (mints out/<name>/ TLS material)
|
||||||
|
# 3. Create the route secret locally: mkdir -p secrets && openssl rand -hex 32 > secrets/cluster.pass
|
||||||
|
# (secrets/ is gitignored; it is rsynced to each node as cluster.pass)
|
||||||
|
# 4. SSH access to every node's SSH_HOST with sudo-less root (SSH_USER=root).
|
||||||
|
#
|
||||||
|
# What it does per node (with --yes):
|
||||||
|
# - rsync the membershipd binary, the node's TLS material, the unit, the
|
||||||
|
# generated cluster.env and the route secret into REMOTE_DIR.
|
||||||
|
# - install + daemon-reload the systemd unit.
|
||||||
|
# Start is STAGGERED and left to the human (see README): start the seed node,
|
||||||
|
# seed the admin, then start the rest.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
cd "$DIR"
|
||||||
|
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source ./nodes.env
|
||||||
|
|
||||||
|
APPLY=0
|
||||||
|
[[ "${1:-}" == "--yes" ]] && APPLY=1
|
||||||
|
|
||||||
|
if grep -q '<[A-Z_]\+>' nodes.env; then
|
||||||
|
echo "ERROR: nodes.env still has <PLACEHOLDER> values — fill them in first." >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
SECRET_FILE="secrets/cluster.pass"
|
||||||
|
if [[ ! -f "$SECRET_FILE" ]]; then
|
||||||
|
echo "ERROR: $SECRET_FILE missing. HUMAN: mkdir -p secrets && openssl rand -hex 32 > $SECRET_FILE" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
run() {
|
||||||
|
# Echo every action; only execute it under --yes.
|
||||||
|
echo " + $*"
|
||||||
|
if [[ $APPLY -eq 1 ]]; then
|
||||||
|
"$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "==> [1/3] cross-build membershipd (linux/amd64, CGO disabled)"
|
||||||
|
run env CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o build/membershipd ../../cmd/membershipd
|
||||||
|
|
||||||
|
# Build the comma-separated route list for a node = the OTHER nodes' addresses on
|
||||||
|
# the chosen network, with NO userinfo (the secret is injected by membershipd from
|
||||||
|
# the file). Echoes nothing; prints the value.
|
||||||
|
routes_for() {
|
||||||
|
local self="$1" out=""
|
||||||
|
local row name _ssh pub wg addr
|
||||||
|
for row in "${CLUSTER_NODES[@]}"; do
|
||||||
|
read -r name _ssh pub wg <<<"$row"
|
||||||
|
[[ "$name" == "$self" ]] && continue
|
||||||
|
if [[ "$ROUTE_NETWORK" == "public" ]]; then addr="$pub"; else addr="$wg"; fi
|
||||||
|
out+="nats://${addr}:${NATS_ROUTE_PORT},"
|
||||||
|
done
|
||||||
|
echo "${out%,}"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "==> [2/3] stage each node (REMOTE_DIR=$REMOTE_DIR)"
|
||||||
|
for row in "${CLUSTER_NODES[@]}"; do
|
||||||
|
read -r name ssh _pub _wg <<<"$row"
|
||||||
|
target="${SSH_USER}@${ssh}"
|
||||||
|
nodedir="out/${name}"
|
||||||
|
if [[ ! -d "$nodedir" ]]; then
|
||||||
|
echo "ERROR: $nodedir missing — run ./generate-cluster-certs.sh first." >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
routes="$(routes_for "$name")"
|
||||||
|
|
||||||
|
echo "-- node ${name} (ssh ${ssh}) routes=${routes}"
|
||||||
|
|
||||||
|
# Generate this node's cluster.env locally, then ship it.
|
||||||
|
envfile="build/cluster-${name}.env"
|
||||||
|
mkdir -p build
|
||||||
|
cat > "$envfile" <<EOF
|
||||||
|
NODE_NAME=${name}
|
||||||
|
CLUSTER_NAME=${CLUSTER_NAME}
|
||||||
|
CLUSTER_USER=${CLUSTER_USER}
|
||||||
|
KV_REPLICAS=${KV_REPLICAS}
|
||||||
|
HTTP_PORT=${HTTP_PORT}
|
||||||
|
NATS_CLIENT_PORT=${NATS_CLIENT_PORT}
|
||||||
|
NATS_ROUTE_PORT=${NATS_ROUTE_PORT}
|
||||||
|
ROUTES=${routes}
|
||||||
|
CLUSTER_PASS_FILE=${REMOTE_DIR}/secrets/cluster.pass
|
||||||
|
TLS_CERT=${REMOTE_DIR}/tls/server-${name}.crt
|
||||||
|
TLS_KEY=${REMOTE_DIR}/tls/server-${name}.key
|
||||||
|
ROUTE_TLS_CERT=${REMOTE_DIR}/tls/route-${name}.crt
|
||||||
|
ROUTE_TLS_KEY=${REMOTE_DIR}/tls/route-${name}.key
|
||||||
|
ROUTE_TLS_CA=${REMOTE_DIR}/tls/cluster-ca.crt
|
||||||
|
EOF
|
||||||
|
|
||||||
|
run ssh "$target" "mkdir -p ${REMOTE_DIR}/tls ${REMOTE_DIR}/secrets"
|
||||||
|
run rsync -az build/membershipd "${target}:${REMOTE_DIR}/membershipd"
|
||||||
|
run rsync -az "${nodedir}/" "${target}:${REMOTE_DIR}/tls/"
|
||||||
|
run rsync -az "$SECRET_FILE" "${target}:${REMOTE_DIR}/secrets/cluster.pass"
|
||||||
|
run rsync -az "$envfile" "${target}:${REMOTE_DIR}/cluster.env"
|
||||||
|
run rsync -az membershipd-cluster.service "${target}:/etc/systemd/system/membershipd-cluster.service"
|
||||||
|
run ssh "$target" "chmod 600 ${REMOTE_DIR}/secrets/cluster.pass ${REMOTE_DIR}/tls/*.key && systemctl daemon-reload"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "==> [3/3] staged."
|
||||||
|
if [[ $APPLY -eq 0 ]]; then
|
||||||
|
echo " DRY-RUN: nothing was sent. Re-run with --yes to apply."
|
||||||
|
fi
|
||||||
|
cat <<'NEXT'
|
||||||
|
|
||||||
|
HUMAN — staggered start (do NOT enable all at once; see README "Bring up"):
|
||||||
|
1. Seed node first (e.g. magnus):
|
||||||
|
ssh root@magnus 'systemctl enable --now membershipd-cluster'
|
||||||
|
ssh root@magnus '/opt/unibus/membershipd user add --admin ...' # seed admin
|
||||||
|
2. Then the other two, one at a time, checking quorum after each:
|
||||||
|
ssh root@homer 'systemctl enable --now membershipd-cluster'
|
||||||
|
ssh root@datardos 'systemctl enable --now membershipd-cluster'
|
||||||
|
3. Verify posture + quorum (README "Verify").
|
||||||
|
4. Scale replicas 1 -> 3 once all three are up (README "Scale to R3").
|
||||||
|
NEXT
|
||||||
Executable
+120
@@ -0,0 +1,120 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# generate-cluster-certs.sh — mint the TLS material for a unibus 3-node cluster
|
||||||
|
# (issue 0006g). Run ONCE on a trusted machine (e.g. om, which custodies the bus
|
||||||
|
# CA); distribute the per-node output to each node over a secure channel. This
|
||||||
|
# script touches NO remote host.
|
||||||
|
#
|
||||||
|
# It produces two trust roots, kept SEPARATE on purpose (audit 0008 N1-low):
|
||||||
|
#
|
||||||
|
# 1. The CLUSTER route CA (cluster-ca.crt/key, generated here): signs each
|
||||||
|
# node's ROUTE certificate. The route layer authenticates NODES, not bus
|
||||||
|
# users, so it must NOT share the client data-plane CA — a client cert can
|
||||||
|
# then never be presented to the route port.
|
||||||
|
# 2. The CLIENT data-plane CA (../tls/ca.crt/key, the one clients pin): signs
|
||||||
|
# each node's DATA-PLANE server certificate. Reused, not regenerated, so
|
||||||
|
# existing clients keep trusting the bus.
|
||||||
|
#
|
||||||
|
# Per node it emits, under out/<name>/:
|
||||||
|
# route-<name>.crt/key route cert (cluster CA), EKU server+clientAuth
|
||||||
|
# (each node is BOTH server and dialer to its peers)
|
||||||
|
# server-<name>.crt/key data-plane cert (client CA), EKU serverAuth
|
||||||
|
# cluster-ca.crt the route CA cert (for --route-tls-ca)
|
||||||
|
# ca.crt the client CA cert (for clients / control-plane TLS)
|
||||||
|
#
|
||||||
|
# SANs per node = its public IP + its WireGuard IP + its hostname + localhost.
|
||||||
|
#
|
||||||
|
# Key material: EC P-256 (Go crypto/tls + nats-server friendly), matching
|
||||||
|
# ../tls/generate-certs.sh.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
cd "$DIR"
|
||||||
|
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source ./nodes.env
|
||||||
|
|
||||||
|
# Refuse to run while any placeholder remains (HUMAN must fill nodes.env first).
|
||||||
|
if grep -q '<[A-Z_]\+>' nodes.env; then
|
||||||
|
echo "ERROR: nodes.env still has <PLACEHOLDER> values — fill them in first." >&2
|
||||||
|
grep -n '<[A-Z_]\+>' nodes.env >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
CLIENT_CA_CRT="../tls/ca.crt"
|
||||||
|
CLIENT_CA_KEY="../tls/ca.key"
|
||||||
|
if [[ ! -f "$CLIENT_CA_CRT" || ! -f "$CLIENT_CA_KEY" ]]; then
|
||||||
|
echo "ERROR: client data-plane CA not found at ../tls/ca.{crt,key}." >&2
|
||||||
|
echo " Run ../tls/generate-certs.sh first (it mints the client CA)." >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
DAYS_CA=3650
|
||||||
|
DAYS_CRT=825
|
||||||
|
|
||||||
|
force=0
|
||||||
|
[[ "${1:-}" == "--force" ]] && force=1
|
||||||
|
|
||||||
|
# --- cluster route CA (separate trust root) ---
|
||||||
|
if [[ ! -f cluster-ca.crt || ! -f cluster-ca.key || $force -eq 1 ]]; then
|
||||||
|
echo "==> generating cluster route CA (separate from the client CA)"
|
||||||
|
openssl ecparam -name prime256v1 -genkey -noout -out cluster-ca.key
|
||||||
|
chmod 600 cluster-ca.key
|
||||||
|
openssl req -x509 -new -key cluster-ca.key -sha256 -days "$DAYS_CA" \
|
||||||
|
-subj "/CN=unibus-cluster-ca" -out cluster-ca.crt
|
||||||
|
else
|
||||||
|
echo "==> reusing existing cluster route CA (pass --force to regenerate)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# mint <out_key> <out_crt> <subject_cn> <san> <eku> <ca_crt> <ca_key>
|
||||||
|
mint_cert() {
|
||||||
|
local out_key="$1" out_crt="$2" cn="$3" san="$4" eku="$5" ca_crt="$6" ca_key="$7"
|
||||||
|
local csr ext
|
||||||
|
csr="$(mktemp)"
|
||||||
|
ext="$(mktemp)"
|
||||||
|
openssl ecparam -name prime256v1 -genkey -noout -out "$out_key"
|
||||||
|
chmod 600 "$out_key"
|
||||||
|
openssl req -new -key "$out_key" -subj "/CN=${cn}" -out "$csr"
|
||||||
|
cat > "$ext" <<EOF
|
||||||
|
subjectAltName=${san}
|
||||||
|
extendedKeyUsage=${eku}
|
||||||
|
keyUsage=digitalSignature,keyEncipherment
|
||||||
|
EOF
|
||||||
|
openssl x509 -req -in "$csr" -CA "$ca_crt" -CAkey "$ca_key" -CAcreateserial \
|
||||||
|
-sha256 -days "$DAYS_CRT" -extfile "$ext" -out "$out_crt"
|
||||||
|
rm -f "$csr" "$ext"
|
||||||
|
}
|
||||||
|
|
||||||
|
for row in "${CLUSTER_NODES[@]}"; do
|
||||||
|
read -r name _ssh pub wg <<<"$row"
|
||||||
|
echo "==> node ${name}: SAN IP:${pub}, IP:${wg}, DNS:${name}, localhost, 127.0.0.1"
|
||||||
|
nodedir="out/${name}"
|
||||||
|
mkdir -p "$nodedir"
|
||||||
|
san="IP:${pub},IP:${wg},DNS:${name},DNS:localhost,IP:127.0.0.1"
|
||||||
|
|
||||||
|
# Route cert: signed by the cluster CA; server+client auth (mutual routes).
|
||||||
|
mint_cert "${nodedir}/route-${name}.key" "${nodedir}/route-${name}.crt" \
|
||||||
|
"unibus-route-${name}" "$san" "serverAuth,clientAuth" \
|
||||||
|
cluster-ca.crt cluster-ca.key
|
||||||
|
|
||||||
|
# Data-plane server cert: signed by the client CA; serverAuth only.
|
||||||
|
mint_cert "${nodedir}/server-${name}.key" "${nodedir}/server-${name}.crt" \
|
||||||
|
"unibus-${name}" "$san" "serverAuth" \
|
||||||
|
"$CLIENT_CA_CRT" "$CLIENT_CA_KEY"
|
||||||
|
|
||||||
|
# Co-locate the two CA certs each node needs.
|
||||||
|
cp cluster-ca.crt "${nodedir}/cluster-ca.crt"
|
||||||
|
cp "$CLIENT_CA_CRT" "${nodedir}/ca.crt"
|
||||||
|
done
|
||||||
|
|
||||||
|
rm -f cluster-ca.srl ../tls/ca.srl 2>/dev/null || true
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "==> done. Per-node material under out/<name>/ (KEYS ARE SECRET — never git):"
|
||||||
|
for row in "${CLUSTER_NODES[@]}"; do
|
||||||
|
read -r name _rest <<<"$row"
|
||||||
|
echo " out/${name}/ (route-${name}.*, server-${name}.*, cluster-ca.crt, ca.crt)"
|
||||||
|
done
|
||||||
|
echo
|
||||||
|
echo "verify a SAN with:"
|
||||||
|
echo " openssl x509 -in out/<name>/server-<name>.crt -noout -text | grep -A1 'Subject Alternative Name'"
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
[Unit]
|
||||||
|
# unibus membershipd — cluster node (issue 0006g).
|
||||||
|
#
|
||||||
|
# One unit, parameterized per node by /opt/unibus/cluster.env (generated by
|
||||||
|
# deploy-cluster.sh): NODE_NAME, ROUTES and the cert paths differ per node, the
|
||||||
|
# rest of the posture (enforce + per-subject ACL + TLS + --store kv) is identical
|
||||||
|
# on every node, which is the homogeneous posture a secure cluster requires
|
||||||
|
# (audit 0008 N1).
|
||||||
|
Description=unibus membershipd (cluster node)
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
WorkingDirectory=/opt/unibus
|
||||||
|
EnvironmentFile=/opt/unibus/cluster.env
|
||||||
|
# The route password comes from a FILE referenced by ${CLUSTER_PASS_FILE}, never
|
||||||
|
# from argv (audit 0008 N1-low). The peer --routes carry no userinfo; membershipd
|
||||||
|
# injects the credentials from the file/user.
|
||||||
|
ExecStart=/opt/unibus/membershipd \
|
||||||
|
--bind 0.0.0.0 \
|
||||||
|
--bus-auth enforce \
|
||||||
|
--http-port ${HTTP_PORT} \
|
||||||
|
--nats-port ${NATS_CLIENT_PORT} \
|
||||||
|
--tls-cert ${TLS_CERT} \
|
||||||
|
--tls-key ${TLS_KEY} \
|
||||||
|
--cluster-name ${CLUSTER_NAME} \
|
||||||
|
--server-name ${NODE_NAME} \
|
||||||
|
--cluster-port ${NATS_ROUTE_PORT} \
|
||||||
|
--routes ${ROUTES} \
|
||||||
|
--cluster-user ${CLUSTER_USER} \
|
||||||
|
--cluster-pass-file ${CLUSTER_PASS_FILE} \
|
||||||
|
--route-tls-cert ${ROUTE_TLS_CERT} \
|
||||||
|
--route-tls-key ${ROUTE_TLS_KEY} \
|
||||||
|
--route-tls-ca ${ROUTE_TLS_CA} \
|
||||||
|
--store kv \
|
||||||
|
--kv-replicas ${KV_REPLICAS}
|
||||||
|
# Restart=always (NOT on-failure): a clean SIGTERM exits success, and on-failure
|
||||||
|
# would then NOT restart, leaving the node silently dead (see function_tags.md).
|
||||||
|
Restart=always
|
||||||
|
RestartSec=2
|
||||||
|
LimitNOFILE=65536
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
# Cluster topology for the unibus 3-node deployment (issue 0006g).
|
||||||
|
#
|
||||||
|
# This file is SOURCED by generate-cluster-certs.sh and deploy-cluster.sh.
|
||||||
|
#
|
||||||
|
# HUMAN: fill in every <PLACEHOLDER> with the real value before running the
|
||||||
|
# scripts. The public IPs known at authoring time are pre-filled; the WireGuard
|
||||||
|
# mesh IPs and magnus's public IP must be supplied. The scripts refuse to run
|
||||||
|
# while any <PLACEHOLDER> remains.
|
||||||
|
|
||||||
|
# Cluster identity (must be identical on every node).
|
||||||
|
CLUSTER_NAME="unibus"
|
||||||
|
# Route-secret username; the password is NOT here — it lives in a file (see
|
||||||
|
# CLUSTER_PASS_FILE in deploy-cluster.sh) so it never lands in argv or git.
|
||||||
|
CLUSTER_USER="unibus-cluster"
|
||||||
|
|
||||||
|
# KV/nonce replication factor. START AT 1 for the initial 1->3 rollout, then raise
|
||||||
|
# to 3 IN PLACE (see README "Scale to R3") once all three nodes have joined. Only
|
||||||
|
# set this to 3 here after the third node is up and you re-run the KV update.
|
||||||
|
KV_REPLICAS=1
|
||||||
|
|
||||||
|
# Ports (same on every node; the route port is server-to-server only).
|
||||||
|
NATS_CLIENT_PORT=4250
|
||||||
|
NATS_ROUTE_PORT=6250
|
||||||
|
HTTP_PORT=8470
|
||||||
|
|
||||||
|
# Remote install layout and SSH login user.
|
||||||
|
REMOTE_DIR="/opt/unibus"
|
||||||
|
SSH_USER="root"
|
||||||
|
|
||||||
|
# Which address family the inter-node routes use. "wg" builds --routes from the
|
||||||
|
# WireGuard mesh IPs (private server-to-server links, preferred); "public" uses
|
||||||
|
# the public IPs. The route layer is always mutual-TLS regardless.
|
||||||
|
ROUTE_NETWORK="wg"
|
||||||
|
|
||||||
|
# One row per node: NAME SSH_HOST PUBLIC_IP WG_IP
|
||||||
|
# NAME -> --server-name and the per-node cert filenames (unique).
|
||||||
|
# SSH_HOST -> the `ssh <SSH_HOST>` alias (see ~/.ssh/config).
|
||||||
|
# PUBLIC_IP -> public address; goes in the cert SANs (client-facing data plane).
|
||||||
|
# WG_IP -> WireGuard mesh address; cert SAN + route target when ROUTE_NETWORK=wg.
|
||||||
|
CLUSTER_NODES=(
|
||||||
|
"magnus magnus <MAGNUS_PUBLIC_IP> <MAGNUS_WG_IP>"
|
||||||
|
"homer homer 141.94.69.66 <HOMER_WG_IP>"
|
||||||
|
"datardos dd 51.91.100.142 <DATARDOS_WG_IP>"
|
||||||
|
)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
# Private keys and the deploy-specific server certificate never go to git.
|
||||||
|
# Only the public CA certificate (ca.crt) is versioned, because clients embed it.
|
||||||
|
*.key
|
||||||
|
*.csr
|
||||||
|
*.srl
|
||||||
|
server.crt
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
# Bus TLS — self-signed CA and server certificate
|
||||||
|
|
||||||
|
The unibus data plane (NATS) is encrypted with TLS using the project's own
|
||||||
|
self-signed CA. The bus is exposed publicly, protected by auth + TLS, so the CA
|
||||||
|
is private (not Let's Encrypt) and every client we control embeds the public
|
||||||
|
`ca.crt`; the server presents `server.crt`/`server.key`.
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | Secret? | Goes where |
|
||||||
|
|---|---|---|
|
||||||
|
| `ca.crt` | no (public) | versioned in git; embedded/distributed to every client |
|
||||||
|
| `ca.key` | **yes** | stays on the machine that mints certs; gitignored |
|
||||||
|
| `server.crt` | no | deployed to the bus host; gitignored (deploy-specific SANs) |
|
||||||
|
| `server.key` | **yes** | deployed to the bus host over a secure channel; gitignored |
|
||||||
|
|
||||||
|
Only `ca.crt` is committed. `ca.key`, `server.key`, `server.crt`, and any
|
||||||
|
`*.csr`/`*.srl` are gitignored — see `.gitignore`.
|
||||||
|
|
||||||
|
## Generate
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd deploy/tls
|
||||||
|
./generate-certs.sh # CA (if missing) + server cert with default SANs
|
||||||
|
./generate-certs.sh --force # also regenerate the CA (invalidates pinned clients)
|
||||||
|
```
|
||||||
|
|
||||||
|
The server certificate's SANs cover the public IP, the WireGuard IP, the om
|
||||||
|
hostname, plus `localhost`/`127.0.0.1` for on-host smoke tests. Override the
|
||||||
|
defaults via environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
UNIBUS_PUBLIC_IP=135.125.201.30 UNIBUS_WG_IP=10.42.0.1 UNIBUS_HOSTNAME=om ./generate-certs.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify the SANs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
openssl x509 -in server.crt -noout -text | grep -A1 'Subject Alternative Name'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Use
|
||||||
|
|
||||||
|
- **Server** (`membershipd`, phase 0001e): point it at `server.crt`/`server.key`
|
||||||
|
so the embedded NATS presents the certificate and requires TLS. Built with
|
||||||
|
`busauth.ServerTLSConfig(certPath, keyPath)`.
|
||||||
|
- **Clients** (Go peers, mobile binding, gateway): pin `ca.crt` with
|
||||||
|
`busauth.LoadCATLSConfig(caPath)` and pass the result as `client.Options.TLS`.
|
||||||
|
|
||||||
|
## Rotation
|
||||||
|
|
||||||
|
The CA is long-lived (10 years). Rotate the server certificate (825 days) by
|
||||||
|
re-running `generate-certs.sh` (without `--force`) and redeploying
|
||||||
|
`server.crt`/`server.key`; clients are unaffected because they pin the CA, not
|
||||||
|
the server cert. Rotating the CA (`--force`) requires redistributing `ca.crt` to
|
||||||
|
every client.
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIBfTCCASOgAwIBAgIUW2HZJDDlixxw/DgNP/IDIrJ7MeMwCgYIKoZIzj0EAwIw
|
||||||
|
FDESMBAGA1UEAwwJdW5pYnVzLWNhMB4XDTI2MDYwNzEwNDIyNloXDTM2MDYwNDEw
|
||||||
|
NDIyNlowFDESMBAGA1UEAwwJdW5pYnVzLWNhMFkwEwYHKoZIzj0CAQYIKoZIzj0D
|
||||||
|
AQcDQgAEe2by5l9dcEbqKB11yJtPIH9S/01XNhuFnBB/IpDevO2fWLLV+muqoB8C
|
||||||
|
ADH1wKleq8jF5D0sSlK2DCuYrjAjPqNTMFEwHQYDVR0OBBYEFABX+UI7bXICRF4l
|
||||||
|
WmmDR/rUtxnrMB8GA1UdIwQYMBaAFABX+UI7bXICRF4lWmmDR/rUtxnrMA8GA1Ud
|
||||||
|
EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDSAAwRQIgCAeOYTKvA6SBB8xMdMdqNrp1
|
||||||
|
20OPyi2BwFovW6vTCLMCIQC1qRi8SGRHTui8BVqIvp/DFJaZ/U8ocAg/qedLdy+R
|
||||||
|
/w==
|
||||||
|
-----END CERTIFICATE-----
|
||||||
Executable
+64
@@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# generate-certs.sh — mint the unibus bus's self-signed CA and the NATS server
|
||||||
|
# certificate. Run once on a trusted machine; distribute ca.crt to clients and
|
||||||
|
# server.crt/server.key to the bus host (server.key by a secure channel, never
|
||||||
|
# git). Re-running regenerates the server cert; pass --force to also regenerate
|
||||||
|
# the CA (which invalidates every client that pinned the old ca.crt).
|
||||||
|
#
|
||||||
|
# SANs cover the public IP, the WireGuard IP, the om hostname, plus localhost so
|
||||||
|
# the operator can smoke-test the TLS handshake on the box. Override via env:
|
||||||
|
# UNIBUS_PUBLIC_IP (default 135.125.201.30)
|
||||||
|
# UNIBUS_WG_IP (default 10.42.0.1)
|
||||||
|
# UNIBUS_HOSTNAME (default om)
|
||||||
|
#
|
||||||
|
# Key material: EC P-256 (widely supported by Go's crypto/tls and nats-server).
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
cd "$DIR"
|
||||||
|
|
||||||
|
PUBLIC_IP="${UNIBUS_PUBLIC_IP:-135.125.201.30}"
|
||||||
|
WG_IP="${UNIBUS_WG_IP:-10.42.0.1}"
|
||||||
|
HOSTNAME_OM="${UNIBUS_HOSTNAME:-om}"
|
||||||
|
DAYS_CA=3650
|
||||||
|
DAYS_SRV=825
|
||||||
|
|
||||||
|
force=0
|
||||||
|
[[ "${1:-}" == "--force" ]] && force=1
|
||||||
|
|
||||||
|
# --- CA (long-lived; only the cert is public) ---
|
||||||
|
if [[ ! -f ca.crt || ! -f ca.key || $force -eq 1 ]]; then
|
||||||
|
echo "==> generating CA"
|
||||||
|
openssl ecparam -name prime256v1 -genkey -noout -out ca.key
|
||||||
|
chmod 600 ca.key
|
||||||
|
openssl req -x509 -new -key ca.key -sha256 -days "$DAYS_CA" \
|
||||||
|
-subj "/CN=unibus-ca" -out ca.crt
|
||||||
|
else
|
||||||
|
echo "==> reusing existing CA (pass --force to regenerate)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- server certificate, signed by the CA, with the bus SANs ---
|
||||||
|
echo "==> generating server certificate (SAN: $PUBLIC_IP, $WG_IP, $HOSTNAME_OM, localhost, 127.0.0.1)"
|
||||||
|
openssl ecparam -name prime256v1 -genkey -noout -out server.key
|
||||||
|
chmod 600 server.key
|
||||||
|
openssl req -new -key server.key -subj "/CN=unibus-bus" -out server.csr
|
||||||
|
|
||||||
|
cat > server.ext <<EOF
|
||||||
|
subjectAltName=IP:${PUBLIC_IP},IP:${WG_IP},DNS:${HOSTNAME_OM},DNS:localhost,IP:127.0.0.1
|
||||||
|
extendedKeyUsage=serverAuth
|
||||||
|
keyUsage=digitalSignature,keyEncipherment
|
||||||
|
EOF
|
||||||
|
|
||||||
|
openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial \
|
||||||
|
-sha256 -days "$DAYS_SRV" -extfile server.ext -out server.crt
|
||||||
|
|
||||||
|
rm -f server.csr server.ext ca.srl
|
||||||
|
|
||||||
|
echo "==> done:"
|
||||||
|
echo " ca.crt -> embed/distribute to every client (public)"
|
||||||
|
echo " server.crt -> deploy to the bus host"
|
||||||
|
echo " server.key -> deploy to the bus host over a secure channel (NEVER git)"
|
||||||
|
echo
|
||||||
|
echo "verify SANs with:"
|
||||||
|
echo " openssl x509 -in server.crt -noout -text | grep -A1 'Subject Alternative Name'"
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
# Issue 0001e — remaining client migrations (notes, NOT implemented)
|
||||||
|
|
||||||
|
Phase 0001e migrated the first-class Go clients and the mobile binding to the
|
||||||
|
secure connection path (`client.Connect(caPath)` → TLS + nkey; control-plane
|
||||||
|
requests are always signed). Two consumers are intentionally **left as notes**
|
||||||
|
because they live outside this sub-repo or need their own coordination:
|
||||||
|
|
||||||
|
## 1. Web gateway (`playground/server.go`)
|
||||||
|
|
||||||
|
The playground is a local dev gateway that embeds its own membershipd
|
||||||
|
(`membership.NewServer(..., AuthOff)`) and an open embedded NATS, and connects
|
||||||
|
browser sessions through an in-process client. To run it against a **secured**
|
||||||
|
bus it would need:
|
||||||
|
|
||||||
|
- Connect its internal client via `client.Connect(natsURL, ctrlURL, id, caPath)`
|
||||||
|
with the bundled `ca.crt` (it currently builds the client without options).
|
||||||
|
- If it should itself enforce auth on the browser-facing side, start its
|
||||||
|
embedded membershipd with an auth mode and its embedded NATS with
|
||||||
|
`embeddednats.StartServer(ServerConfig{Auth: ..., TLS: ...})` — but a local
|
||||||
|
dev gateway typically stays open and only the *upstream* bus is secured.
|
||||||
|
- The gateway's own bus identity must be registered in the upstream allowlist
|
||||||
|
(`membershipd user add`).
|
||||||
|
|
||||||
|
Decision: left at `AuthOff` + plaintext for now (local dev tool). Migrate when
|
||||||
|
the gateway is pointed at the public bus.
|
||||||
|
|
||||||
|
## 2. unibots (`shell/transportunibus`, in the agents repo — NOT this sub-repo)
|
||||||
|
|
||||||
|
The bot transport lives in the `agents_and_robots` / message_bus consumer, not
|
||||||
|
in `dataforge/unibus`. To talk to the secured bus it must, after recompiling
|
||||||
|
against this `pkg/client`:
|
||||||
|
|
||||||
|
- Switch its connect call to `client.Connect(natsURL, ctrlURL, id, caPath)`,
|
||||||
|
passing the path to the bundled `ca.crt`.
|
||||||
|
- Ship `ca.crt` alongside the bot binary (read-only) and point `caPath` at it.
|
||||||
|
- Register each bot's identity (`hex(SignPub)`) in the bus allowlist via
|
||||||
|
`membershipd user add --handle <bot> --sign-pub <hex>` on the bus host.
|
||||||
|
- Run as `systemd --user` with `caPath` set, per the deploy plan (0001f).
|
||||||
|
|
||||||
|
No code change is possible from this sub-repo; this is the contract the bot
|
||||||
|
transport consumes.
|
||||||
|
|
||||||
|
## Server enablement (operator, phase 0001f)
|
||||||
|
|
||||||
|
`membershipd` now accepts:
|
||||||
|
|
||||||
|
- `--bus-auth enforce` — verify signed control-plane requests AND turn on the
|
||||||
|
NATS nkey authenticator (only allowlisted identities connect).
|
||||||
|
- `--tls-cert deploy/tls/server.crt --tls-key deploy/tls/server.key` — present
|
||||||
|
the server certificate and require TLS on the embedded NATS.
|
||||||
|
|
||||||
|
`dev/feature_flags.json` now declares both `bus-auth: enforce` and
|
||||||
|
`bus-tls: enabled` as the project's target state. The flags are declarative;
|
||||||
|
the operator activates them at deploy time with the flags above. The CLI
|
||||||
|
defaults remain off so local dev and the test suite are unaffected.
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
# 0004d — Data-plane access control on NATS (audit H4)
|
||||||
|
|
||||||
|
## The finding
|
||||||
|
|
||||||
|
The NATS authenticator (`pkg/busauth`) decides one thing per connection:
|
||||||
|
*is this identity registered on the bus?* It does **not** scope what a connected
|
||||||
|
client may subscribe to or publish. There is a single NATS account with no
|
||||||
|
`Permissions`, so any registered peer can subscribe to, or publish on, **any**
|
||||||
|
subject. Concretely:
|
||||||
|
|
||||||
|
- A cleartext room (`ModeNATS`) carries its payload in the clear on its subject.
|
||||||
|
A registered peer that knows or guesses the subject subscribes and reads the
|
||||||
|
content directly (the auditor's `TestAudit_NoSubjectACL`: eve, never invited,
|
||||||
|
receives `"internal: salary numbers"`).
|
||||||
|
- An encrypted room (`ModeMatrix`) keeps its **content** confidential (the
|
||||||
|
payload is AEAD ciphertext), but the **metadata of traffic** — that a subject
|
||||||
|
is active, message sizes and timing, who is publishing — is still observable by
|
||||||
|
any registered peer that subscribes to the subject.
|
||||||
|
|
||||||
|
## Why the "complete" fix does not fit here
|
||||||
|
|
||||||
|
The preferred fix is per-subject permissions derived from room membership: when a
|
||||||
|
client connects, the authenticator looks up the rooms it belongs to and grants
|
||||||
|
`Sub`/`Pub` only on those subjects. NATS supports this — `CustomClientAuthentication`
|
||||||
|
can register a `*server.User` carrying `Permissions`.
|
||||||
|
|
||||||
|
The blocker is that **NATS evaluates permissions once, at connect time, and never
|
||||||
|
re-evaluates them on a live connection.** unibus clients routinely *connect → create
|
||||||
|
or get invited to a room → publish/subscribe* within the **same** connection
|
||||||
|
(`TestSecureBusEndToEnd` does exactly this: A connects, then creates `room.secure`,
|
||||||
|
then publishes to it). Permissions frozen at connect time would not include a room
|
||||||
|
created or joined afterwards, so the legitimate owner could not publish to the room
|
||||||
|
it just made. Making per-subject ACLs work would therefore require the client to
|
||||||
|
**reconnect on every membership change**, an invasive change to the client library
|
||||||
|
and to every peer (worker, chat, mobile) — and the prompt for this issue scopes the
|
||||||
|
client changes to the minimum.
|
||||||
|
|
||||||
|
That dynamic-membership reconnection model is precisely the redesign that issue
|
||||||
|
**0003** (decentralization) already has to do: it moves the control-plane state to a
|
||||||
|
replicated JetStream KV and reworks how nodes and clients (re)establish sessions. Per
|
||||||
|
the issue's own guidance ("if a complete strategy does not fit, implement the minimum
|
||||||
|
defense and document the rest"), the full subject ACL is deferred to 0003, where the
|
||||||
|
session/permission model is being rebuilt anyway.
|
||||||
|
|
||||||
|
## The strategy implemented here: forbid cleartext rooms in public
|
||||||
|
|
||||||
|
`Server.RequireEncryptedRooms` (set by `membershipd` on any non-loopback bind)
|
||||||
|
refuses to create a cleartext (`ModeNATS`) room. Every room on a public deployment
|
||||||
|
is therefore end-to-end encrypted, so **message content stays confidential even
|
||||||
|
though the transport offers no subject isolation**: a peer that sniffs another
|
||||||
|
room's subject receives only AEAD ciphertext it has no key for.
|
||||||
|
|
||||||
|
This composes with the 0004c control-plane authorization: a non-member cannot even
|
||||||
|
learn a room's subject through the control plane (`GET /rooms/{id}` → 403), so to
|
||||||
|
sniff it an attacker must already know or guess the subject out of band.
|
||||||
|
|
||||||
|
## What this does NOT close (residual exposure, by design)
|
||||||
|
|
||||||
|
- **Traffic metadata.** A registered peer that already knows a subject can still
|
||||||
|
subscribe and observe that the subject is active, the ciphertext sizes, and the
|
||||||
|
timing/cadence of messages. It cannot read content.
|
||||||
|
- **Cross-room publish.** A registered peer can still *publish* arbitrary bytes on
|
||||||
|
any subject. In an encrypted room those bytes fail AEAD open and the signature
|
||||||
|
check (`SignMsgs`), so receivers drop them — it is a nuisance/spam vector, not a
|
||||||
|
confidentiality or integrity break.
|
||||||
|
- **WireGuard-only deployments** may still use cleartext rooms (the guard only trips
|
||||||
|
on a public bind), because the network already restricts who can reach the bus.
|
||||||
|
|
||||||
|
Closing the residual metadata exposure requires the per-subject ACL described above,
|
||||||
|
tracked for issue 0003.
|
||||||
|
|
||||||
|
## Regression evidence
|
||||||
|
|
||||||
|
- `pkg/membership` — `TestRequireEncryptedRoomsRejectsCleartext`: with
|
||||||
|
`RequireEncryptedRooms` on, `POST /rooms` for a cleartext policy returns 403 while
|
||||||
|
an encrypted-room create returns 201.
|
||||||
|
- `pkg/client` — `TestAudit_NoSubjectACL`: under the public posture, creating a
|
||||||
|
`ModeNATS` room fails; alice creates an encrypted room and publishes; eve (a
|
||||||
|
registered non-member) raw-subscribes to the subject and receives only ciphertext —
|
||||||
|
she never recovers the plaintext.
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"flags": {
|
||||||
|
"bus-auth": {
|
||||||
|
"enabled": true,
|
||||||
|
"state": "enforce",
|
||||||
|
"issue": "0001",
|
||||||
|
"description": "Signed control-plane auth + NATS nkey auth. Rollout: off -> soft (verify+log, allow) -> enforce (reject). 'enabled' mirrors state!=off. Server opts in via membershipd --bus-auth; clients via client.Connect(caPath).",
|
||||||
|
"added": "2026-06-07",
|
||||||
|
"enabled_at": "2026-06-07"
|
||||||
|
},
|
||||||
|
"bus-tls": {
|
||||||
|
"enabled": true,
|
||||||
|
"issue": "0001",
|
||||||
|
"description": "TLS on the NATS data plane using the project's self-signed CA (deploy/tls/). Server opts in via membershipd --tls-cert/--tls-key; clients pin ca.crt via client.Connect(caPath).",
|
||||||
|
"added": "2026-06-07",
|
||||||
|
"enabled_at": "2026-06-07"
|
||||||
|
},
|
||||||
|
"decentralized": {
|
||||||
|
"enabled": false,
|
||||||
|
"issue": "0003",
|
||||||
|
"description": "Control-plane state on replicated JetStream KV instead of local SQLite (branch-by-abstraction membership.Store: sqliteStore default, jetstreamStore opt-in). The route cluster (0003a) and the KV store (0003b) shipped behind this flag; the membershipd boot wiring that selects the store is COMPLETE since issue 0006c and is realized at runtime with the server flag --store kv|sqlite (default sqlite). The internal-identity bootstrap (0006a) lets membershipd open the KV store on its own embedded NATS under enforce. Per-deploy opt-in: a node joins the decentralized control plane by starting with --store kv (and --cluster-name for HA). OFF (--store sqlite) keeps the single-node SQLite control plane unchanged.",
|
||||||
|
"added": "2026-06-07",
|
||||||
|
"enabled_at": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,214 @@
|
|||||||
|
---
|
||||||
|
issue: 0001
|
||||||
|
title: Seguridad del bus — sistema de usuarios, auth firmada del control plane, NATS nkey + TLS
|
||||||
|
status: spec
|
||||||
|
created: 2026-06-07
|
||||||
|
domain: security
|
||||||
|
scope: unibus (membershipd, pkg/membership, pkg/embeddednats, pkg/client) + clientes (mobile, web gateway, unibots)
|
||||||
|
---
|
||||||
|
|
||||||
|
# Objetivo
|
||||||
|
|
||||||
|
Hoy el bus unibus solo está protegido por la red (WireGuard) y por el cifrado E2E
|
||||||
|
por room (megolm). El **control plane** (HTTP `:8470`) y el **data plane** (NATS
|
||||||
|
`:4250`) **no tienen autenticación ni TLS**: cualquiera que alcance esos puertos
|
||||||
|
puede crear rooms, leer metadata, publicar, y hacer DoS. El contenido de las rooms
|
||||||
|
`ModeMatrix` está cifrado E2E, pero las rooms `ModeNATS` (cleartext), la metadata
|
||||||
|
de subjects y todo el control plane viajan en claro y sin control de acceso.
|
||||||
|
|
||||||
|
Este issue añade tres capas de seguridad al propio bus, de modo que **WireGuard
|
||||||
|
pase a ser opcional** (defensa en profundidad) y el bus pueda exponerse de forma
|
||||||
|
segura incluso a un cliente móvil en una red ajena:
|
||||||
|
|
||||||
|
1. **Sistema de usuarios** — un registro a nivel bus de las identidades autorizadas
|
||||||
|
(allowlist de claves públicas Ed25519), con roles y revocación.
|
||||||
|
2. **Auth del control plane** — cada request HTTP va firmado con la identidad del
|
||||||
|
peer; el server verifica la firma y que la identidad esté autorizada.
|
||||||
|
3. **NATS endurecido** — autenticación por nkey (Ed25519) contra el registro de
|
||||||
|
usuarios + TLS para cifrar todo el transporte del data plane.
|
||||||
|
|
||||||
|
# Modelo de amenazas y capas
|
||||||
|
|
||||||
|
| Capa | Qué protege | Estado hoy | Tras este issue |
|
||||||
|
|---|---|---|---|
|
||||||
|
| WireGuard | Acceso de red; oculta el bus de internet | activo (opcional) | sigue disponible, ya no imprescindible |
|
||||||
|
| TLS NATS | Confidencialidad/integridad del **canal** (cleartext rooms, metadata, nonces de auth) | ausente | CA propia self-signed |
|
||||||
|
| Auth (firma Ed25519 / nkey) | **Autenticación**: solo identidades registradas conectan/operan | ausente | control plane + data plane |
|
||||||
|
| E2E por room (megolm) | Confidencialidad del **contenido** de rooms cifradas | activo | sin cambios |
|
||||||
|
|
||||||
|
Principio: cada capa es independiente. TLS cifra el canal, la auth decide quién
|
||||||
|
entra, el E2E protege el contenido aunque el bus fuera comprometido.
|
||||||
|
|
||||||
|
# Diseño
|
||||||
|
|
||||||
|
## Pieza 1 — Sistema de usuarios
|
||||||
|
|
||||||
|
Registro a nivel bus (no por room) de las identidades autorizadas. Migración
|
||||||
|
**aditiva** `migrations/002_users.sql` (y su gemela embebida en
|
||||||
|
`pkg/membership/migrations/`):
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE IF NOT EXISTS users (
|
||||||
|
sign_pub TEXT PRIMARY KEY, -- clave pública Ed25519 en hex (identidad del peer)
|
||||||
|
handle TEXT NOT NULL, -- nombre legible (único recomendado, no PK)
|
||||||
|
role TEXT NOT NULL DEFAULT 'member', -- 'admin' | 'member'
|
||||||
|
status TEXT NOT NULL DEFAULT 'active', -- 'active' | 'revoked'
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
revoked_at TEXT
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_users_status ON users(status);
|
||||||
|
```
|
||||||
|
|
||||||
|
- `sign_pub` es la misma clave que ya deriva el `endpoint` (`frame.EndpointID(SignPub)`).
|
||||||
|
- CRUD en `pkg/membership/store.go`: `AddUser`, `GetUser`, `ListUsers`,
|
||||||
|
`RevokeUser`, `IsAuthorized(signPubHex) bool`.
|
||||||
|
- CLI de administración en `cmd/membershipd`: `membershipd user add --handle h
|
||||||
|
--sign-pub <hex> [--role admin]`, `user list`, `user revoke <sign-pub>`.
|
||||||
|
- **Bootstrap (chicken-egg):** el primer `admin` se siembra ejecutando el CLI
|
||||||
|
localmente en el host del bus (`user add --role admin --sign-pub <tu_pub>`). El
|
||||||
|
CLI local se considera de confianza (quien tiene shell en el host ya manda). Sin
|
||||||
|
al menos un admin, los endpoints de gestión de usuarios devuelven 403.
|
||||||
|
|
||||||
|
## Pieza 2 — Auth del control plane (HTTP :8470)
|
||||||
|
|
||||||
|
Generaliza la firma que ya existe (`pkg/client.signRequest` ↔
|
||||||
|
`pkg/membership.verifyOwnerSig`) de "solo owner" a "todo request".
|
||||||
|
|
||||||
|
**Cliente** (`pkg/client`): cada request añade cabeceras:
|
||||||
|
|
||||||
|
```
|
||||||
|
X-Unibus-Pub: <sign_pub hex>
|
||||||
|
X-Unibus-Ts: <unix seconds>
|
||||||
|
X-Unibus-Nonce: <16 bytes aleatorios, base64>
|
||||||
|
X-Unibus-Sig: Ed25519( canonical ) ; canonical = method "\n" path "\n" ts "\n" nonce "\n" sha256(body)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Server** (middleware en `membershipd`):
|
||||||
|
1. Parsear cabeceras; reconstruir `canonical`; verificar firma con `X-Unibus-Pub`.
|
||||||
|
2. Comprobar `IsAuthorized(pub)` (status active). Si no → `401`.
|
||||||
|
3. **Anti-replay:** rechazar si `|now - ts| > 30s`; cachear `nonce` con TTL 60s y
|
||||||
|
rechazar repetidos (LRU en memoria, suficiente para un único membershipd).
|
||||||
|
4. Autorización fina: operaciones de gestión de usuarios exigen `role=admin`;
|
||||||
|
operaciones de room siguen exigiendo ownership donde ya aplica.
|
||||||
|
|
||||||
|
Feature flag `bus-auth` en `dev/feature_flags.json` con tres estados de rollout:
|
||||||
|
`off` (sin verificar) → `soft` (verifica y **loguea** rechazos pero deja pasar) →
|
||||||
|
`enforce` (rechaza). Permite migrar clientes sin cortar el servicio.
|
||||||
|
|
||||||
|
## Pieza 3 — NATS: nkey auth + TLS
|
||||||
|
|
||||||
|
### Auth (nkey sobre la identidad Ed25519)
|
||||||
|
|
||||||
|
Los nkeys de NATS **son** claves Ed25519, así que reutilizamos la identidad del
|
||||||
|
peer sin material nuevo.
|
||||||
|
|
||||||
|
- **Server** (`pkg/embeddednats`): `server.Options.CustomClientAuthentication` con
|
||||||
|
un autenticador que, dado el nonce que NATS presenta al cliente y la firma que el
|
||||||
|
cliente devuelve, verifica la firma con la pubkey declarada y consulta
|
||||||
|
`store.IsAuthorized(pub)`. Validar dinámicamente contra la BD permite **revocar
|
||||||
|
sin reiniciar** el server (ventaja sobre precargar `Options.Nkeys`).
|
||||||
|
- **Cliente** (`pkg/client`): conectar con `nats.Nkey(pubSeedEncoded, sigCB)` donde
|
||||||
|
`sigCB` firma el nonce con la Ed25519 del peer. Convertir `cs.Identity` →
|
||||||
|
formato nkey con `github.com/nats-io/nkeys` (`nkeys.FromRawSeed(PrefixByteUser,
|
||||||
|
seed)`).
|
||||||
|
|
||||||
|
### TLS (CA self-signed propia)
|
||||||
|
|
||||||
|
**Exposición DECIDIDA: pública.** El bus se expone a internet protegido por
|
||||||
|
auth+TLS (WireGuard pasa a ser una vía de acceso más, no la barrera). En
|
||||||
|
consecuencia: `ufw` en om abre `8470/tcp` y `4250/tcp`, y el server cert incluye en
|
||||||
|
su SAN la **IP pública de om `135.125.201.30`**, la **IP WG `10.42.0.1`** (los peers
|
||||||
|
internos siguen funcionando) y el hostname de om. Los clientes son todos
|
||||||
|
controlados por nosotros (`pkg/client`, binding móvil, gateway web, unibots), así
|
||||||
|
que **embeben el `ca.crt`** propio — no hace falta Let's Encrypt ni un dominio
|
||||||
|
público apuntando al NATS.
|
||||||
|
|
||||||
|
- Generar una **CA propia** una vez (`deploy/tls/ca.{key,crt}`), y un **server
|
||||||
|
cert** para el bus con SAN = `135.125.201.30`, `10.42.0.1`, hostname de om.
|
||||||
|
- `pkg/embeddednats`: `server.Options.TLSConfig` con el server cert. NATS pasa a
|
||||||
|
`tls://`.
|
||||||
|
- Cliente: `nats.Secure(&tls.Config{RootCAs: caPool})` cargando la CA propia.
|
||||||
|
- Las claves privadas (CA key, server key) **nunca** se commitean: van gitignored y
|
||||||
|
se distribuyen por `pass`/scp. Solo el `ca.crt` (público) viaja con los clientes.
|
||||||
|
|
||||||
|
# Decisiones técnicas
|
||||||
|
|
||||||
|
| Decisión | Elegido | Alternativa descartada | Razón |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Auth NATS | `CustomClientAuthentication` contra tabla `users` | `Options.Nkeys` estático | revocación dinámica sin reinicio |
|
||||||
|
| TLS | CA self-signed propia | Let's Encrypt | infra privada, sin dependencia de dominio público apuntando al NATS |
|
||||||
|
| Anti-replay control plane | timestamp ±30s + cache de nonce | nonce emitido por server (round-trip extra) | menos latencia, suficiente con un solo membershipd |
|
||||||
|
| Material de identidad | reutilizar la Ed25519 del peer (firma + nkey) | claves separadas por capa | una identidad, menos gestión |
|
||||||
|
| Rollout | feature flag `bus-auth` off→soft→enforce | corte directo | no romper clientes en vuelo |
|
||||||
|
|
||||||
|
# Fases (TBD, ramas `issue/0001x-*`, feature flags)
|
||||||
|
|
||||||
|
1. **0001a — users store + CLI** — migración `002_users.sql`, CRUD en store,
|
||||||
|
comandos `membershipd user *`, seed admin. Flag `bus-auth: off`. Tests de store.
|
||||||
|
2. **0001b — control-plane auth** — firma generalizada en `pkg/client`, middleware
|
||||||
|
de verificación + anti-replay en `membershipd`. Flag `bus-auth: soft`. Tests:
|
||||||
|
request firmado OK, no-autorizado 401, replay rechazado, reloj desfasado 401.
|
||||||
|
3. **0001c — NATS nkey auth** — `CustomClientAuthentication` + cliente con
|
||||||
|
`nats.Nkey`. Tests: peer no registrado rechazado al conectar; revocado pierde
|
||||||
|
acceso sin reiniciar.
|
||||||
|
4. **0001d — TLS NATS** — generación de CA/cert (`deploy/tls/` + script), server
|
||||||
|
`TLSConfig`, cliente `RootCAs`. Flag `bus-tls`. Test: handshake TLS, cliente sin
|
||||||
|
CA rechazado.
|
||||||
|
5. **0001e — migrar clientes** — `mobile/` (binding), gateway web (`playground/`),
|
||||||
|
`unibots` (`shell/transportunibus`): todos firman requests y conectan con
|
||||||
|
nkey+TLS. Pasar `bus-auth` a `enforce`.
|
||||||
|
6. **0001f — deploy** — unibus en om (bind `10.42.0.1` o público con auth+TLS),
|
||||||
|
unibots como systemd-user en el PC local. Verificación E2E.
|
||||||
|
|
||||||
|
# Migración de clientes
|
||||||
|
|
||||||
|
Todo el cambio se concentra en `pkg/client` (firma de requests HTTP + conexión
|
||||||
|
NATS nkey+TLS). `mobile/`, el gateway web y `unibots` lo heredan al recompilar; solo
|
||||||
|
necesitan **pasar la ruta de la CA** y su identidad (que ya tienen). El binding
|
||||||
|
gomobile expone un parámetro nuevo `caPath` en `NewSession`.
|
||||||
|
|
||||||
|
# Plan de despliegue (fase 0001f)
|
||||||
|
|
||||||
|
1. Cross-build `CGO_ENABLED=0 GOOS=linux GOARCH=amd64` del `membershipd`.
|
||||||
|
2. `scp` binario + `ca.crt` + server cert/key a om (`/opt/unibus/`), dir de datos
|
||||||
|
persistente para JetStream/db/blobs.
|
||||||
|
3. systemd-system unit, `--bind 0.0.0.0` (exposición pública), `Restart=always`.
|
||||||
|
4. `ufw allow 8470/tcp` y `ufw allow 4250/tcp` en om.
|
||||||
|
5. Seed del admin (tu identidad) por CLI local en om.
|
||||||
|
6. Verificar **desde fuera de la VPN** (red pública) y desde la WG: handshake TLS,
|
||||||
|
`curl` firmado a `/healthz` OK, `curl` sin firma → 401, conexión NATS de un peer
|
||||||
|
no registrado → rechazada.
|
||||||
|
7. unibots local: systemd-user con `caPath` + identidad registrada.
|
||||||
|
|
||||||
|
> **Nota:** la fase de despliegue (0001f: abrir firewall público, scp a om, systemd
|
||||||
|
> en el VPS) la ejecuta el humano en coordinación, no el agente autónomo — es una
|
||||||
|
> acción outward sobre infraestructura pública. El agente entrega 0001a–0001e
|
||||||
|
> (código + tests + CA/cert generados) en master de unibus, listos para desplegar.
|
||||||
|
|
||||||
|
# Tests (DoD: golden + edge + error path, evidencia ejecutable)
|
||||||
|
|
||||||
|
- **Golden:** peer autorizado crea room, publica y recibe por el bus con auth+TLS
|
||||||
|
activos.
|
||||||
|
- **Edge:** revocar un usuario activo → su próxima conexión NATS y su próximo
|
||||||
|
request HTTP son rechazados sin reiniciar el server.
|
||||||
|
- **Error path:** request con firma válida pero identidad no registrada → 401;
|
||||||
|
conexión NATS con nkey no autorizado → rechazada; cliente sin la CA → fallo de
|
||||||
|
handshake TLS; replay de un request firmado → rechazado.
|
||||||
|
- Suite completa `CGO_ENABLED=0 go test ./...` verde.
|
||||||
|
|
||||||
|
# Riesgos y mitigaciones
|
||||||
|
|
||||||
|
| Riesgo | Mitigación |
|
||||||
|
|---|---|
|
||||||
|
| Chicken-egg del primer admin | seed por CLI local en el host (confianza de shell) |
|
||||||
|
| Romper clientes en vuelo al activar auth | flag `bus-auth` off→soft→enforce; migrar clientes en soft |
|
||||||
|
| Rotación/caducidad de certs | CA propia de larga vida; documentar regeneración del server cert en `deploy/tls/README.md` |
|
||||||
|
| Coste de verificar firma por request | Ed25519 verify ≈ µs; despreciable frente a la latencia de red |
|
||||||
|
| Conversión Ed25519 → nkey mal hecha | test dedicado de ida y vuelta firma/verify nkey antes de tocar el server |
|
||||||
|
| Claves privadas filtradas en git | CA key / server key gitignored; distribución por `pass`/scp; solo `ca.crt` versionado |
|
||||||
|
|
||||||
|
# Fuera de alcance (futuro)
|
||||||
|
|
||||||
|
- Rotación automática de credenciales de usuario.
|
||||||
|
- Cuentas/multi-tenant de NATS (un solo account basta hoy).
|
||||||
|
- Federación entre buses.
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
---
|
||||||
|
issue: 0002
|
||||||
|
title: Media v2 — archivos grandes (chunking), metadata, GC del object store, exponer en clientes
|
||||||
|
status: spec
|
||||||
|
created: 2026-06-07
|
||||||
|
domain: media
|
||||||
|
scope: unibus (pkg/blobstore, pkg/frame, pkg/client, pkg/membership) + clientes (mobile binding, gateway web, unibots)
|
||||||
|
depends_on: 0001 (la auth firmada del control plane debe cubrir /blobs antes de exponer media)
|
||||||
|
---
|
||||||
|
|
||||||
|
# Objetivo
|
||||||
|
|
||||||
|
El envío de archivos (imágenes, audio, vídeo) ya funciona en v1, pero con límites
|
||||||
|
que lo hacen inviable para vídeo grande y poco usable para los clientes. Este issue
|
||||||
|
lleva la media a un estado de producción: archivos grandes por chunks, metadata de
|
||||||
|
tipo/nombre, recolección de basura del object store, y exposición en los frontends.
|
||||||
|
|
||||||
|
# Contexto — cómo funciona media v1 (hoy)
|
||||||
|
|
||||||
|
`PublishMedia(roomID, data []byte)` cifra el archivo **entero** con la clave de la
|
||||||
|
room (`SealAEAD`), lo sube **entero** al object store (`pkg/blobstore`,
|
||||||
|
content-addressed por hash) vía el control plane (`POST /blobs`), y publica por el
|
||||||
|
bus solo una referencia `frame.BlobRef{Hash, Nonce, Size}`. `FetchMedia` baja el
|
||||||
|
ciphertext por hash (`GET /blobs/{hash}`) y lo descifra. El binario nunca viaja por
|
||||||
|
NATS; el bus solo lleva la referencia. El object store guarda solo ciphertext (E2E
|
||||||
|
real). Es correcto y simple, pero:
|
||||||
|
|
||||||
|
| Limitación v1 | Consecuencia |
|
||||||
|
|---|---|
|
||||||
|
| Todo el archivo en RAM (cifra y sube de una vez) | imágenes/audio OK; vídeo grande (cientos MB/GB) revienta memoria |
|
||||||
|
| `BlobRef` solo lleva hash+nonce+size | el receptor no sabe mimetype/filename; no puede renderizar bien |
|
||||||
|
| Sin resumable | si falla la subida de un archivo grande, reempezar de cero |
|
||||||
|
| Object store sin GC | blobs content-addressed crecen indefinidamente, sin refcount ni TTL |
|
||||||
|
| `mobile/` solo expone `Publish` (texto) | no se puede enviar una foto desde el móvil |
|
||||||
|
| Gateway web sin endpoints de media | la SPA no sube/baja archivos |
|
||||||
|
|
||||||
|
Fuera de alcance de este issue (sería otro): **streaming en vivo** (videollamada,
|
||||||
|
audio en tiempo real) — eso no es modelo blob, requiere WebRTC señalizado por el bus.
|
||||||
|
|
||||||
|
# Diseño
|
||||||
|
|
||||||
|
## Pieza 1 — Chunking de archivos grandes
|
||||||
|
|
||||||
|
Partir el archivo en chunks de tamaño fijo (propuesta: 4 MB), cifrar **cada chunk**
|
||||||
|
de forma independiente con la clave de la room (nonce por chunk), y subir cada chunk
|
||||||
|
como un blob propio (content-addressed). La referencia pasa de un solo blob a un
|
||||||
|
manifiesto de chunks.
|
||||||
|
|
||||||
|
- `frame.BlobRef` evoluciona (de forma compatible) a soportar lista de chunks:
|
||||||
|
```
|
||||||
|
BlobRef{
|
||||||
|
Hash string // hash del manifiesto (o del blob único si no hay chunks)
|
||||||
|
Nonce []byte // nonce del manifiesto / del blob único
|
||||||
|
Size int64 // tamaño total en claro
|
||||||
|
Chunks []ChunkRef // vacío en archivos pequeños (camino v1 intacto)
|
||||||
|
}
|
||||||
|
ChunkRef{ Hash string; Nonce []byte; Size int64 } // por chunk cifrado
|
||||||
|
```
|
||||||
|
- `PublishMediaStream(roomID string, r io.Reader, meta MediaMeta) (BlobRef, error)`:
|
||||||
|
lee del `io.Reader` en chunks (no carga el archivo entero en RAM), cifra y sube
|
||||||
|
cada chunk, y construye el manifiesto. El `PublishMedia([]byte)` v1 se mantiene
|
||||||
|
como atajo para archivos pequeños (sin chunks).
|
||||||
|
- `FetchMediaStream(roomID, BlobRef) (io.ReadCloser, error)`: baja y descifra chunks
|
||||||
|
bajo demanda, exponiendo un `io.Reader` (descarga progresiva, no todo en RAM).
|
||||||
|
- Subida/descarga de chunks en paralelo acotado (p. ej. 4 a la vez) para throughput.
|
||||||
|
|
||||||
|
## Pieza 2 — Metadata (mimetype + filename)
|
||||||
|
|
||||||
|
Añadir a `BlobRef` (o a un sidecar cifrado) los campos `Mime string` y `Name
|
||||||
|
string`, de modo que el receptor sepa renderizar (imagen inline, reproductor de
|
||||||
|
audio/vídeo, icono de descarga). Como `Name`/`Mime` pueden ser sensibles, viajan
|
||||||
|
**dentro del campo cifrado** del frame, no en claro. Detección de mimetype por
|
||||||
|
sniffing del primer chunk + extensión.
|
||||||
|
|
||||||
|
## Pieza 3 — Garbage collection del object store
|
||||||
|
|
||||||
|
Hoy los blobs no se borran nunca. Introducir refcount o barrido:
|
||||||
|
|
||||||
|
- **Refcount por referencia**: una tabla `blob_refs(hash, room_id, msg_id)` en el
|
||||||
|
control plane; al expirar un mensaje de una room efímera o al purgar historial de
|
||||||
|
una room persistente, decrementar y borrar el blob cuando llega a cero.
|
||||||
|
- **Alternativa TTL**: blobs de rooms efímeras con TTL; blobs de rooms persistentes
|
||||||
|
viven mientras viva el mensaje en JetStream.
|
||||||
|
- Comando `membershipd blobs gc [--dry-run]` para barrido manual + métrica de
|
||||||
|
espacio. Debe ser idempotente y seguro (nunca borrar un blob aún referenciado).
|
||||||
|
|
||||||
|
## Pieza 4 — Exponer media en los clientes
|
||||||
|
|
||||||
|
- **Binding móvil** (`mobile/unibus.go`): `SendFile(roomID, path, mime)` y
|
||||||
|
`FetchFile(roomID, frameJSON) -> path` (escribe a un archivo local del sandbox de
|
||||||
|
la app y devuelve la ruta; no pasa []byte grandes por el puente gomobile).
|
||||||
|
- **Gateway web** (`playground/server.go`): `POST /api/media` (multipart, streaming
|
||||||
|
al store) y `GET /api/media/{room}/{hash}` (descarga descifrada con los headers
|
||||||
|
`Content-Type`/`Content-Disposition` derivados de la metadata).
|
||||||
|
- **unibots**: una tool `send_file` para que un bot pueda adjuntar archivos.
|
||||||
|
|
||||||
|
# Decisiones técnicas
|
||||||
|
|
||||||
|
| Decisión | Elegido | Alternativa | Razón |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Tamaño de chunk | 4 MB | 1 MB / 16 MB | equilibrio RAM vs overhead de manifiesto |
|
||||||
|
| Cifrado por chunk | nonce independiente por chunk, misma clave de room | re-cifrar todo | permite descarga/borrado parcial y paralelismo |
|
||||||
|
| Metadata sensible | dentro del frame cifrado | en claro en BlobRef | filename/mime pueden filtrar info |
|
||||||
|
| GC | refcount en control plane | solo TTL | preciso, no borra lo aún referenciado |
|
||||||
|
| Compatibilidad v1 | `Chunks` vacío = camino v1 | romper formato | no romper media ya enviada |
|
||||||
|
|
||||||
|
# Fases (TBD, ramas `issue/0002x-*`)
|
||||||
|
|
||||||
|
1. **0002a — BlobRef con chunks (compatible)** — extender el tipo + tests de
|
||||||
|
marshalling con `Chunks` vacío (v1) y con chunks (v2). Sin cambiar clientes aún.
|
||||||
|
2. **0002b — PublishMediaStream / FetchMediaStream** — API de streaming en
|
||||||
|
`pkg/client` sobre `io.Reader`/`io.ReadCloser`, cifrado por chunk, subida/descarga
|
||||||
|
paralela acotada. Tests con un archivo > tamaño de chunk.
|
||||||
|
3. **0002c — metadata mime+name** (en el campo cifrado) + sniffing.
|
||||||
|
4. **0002d — GC del object store** — refcount + `membershipd blobs gc` + tests de
|
||||||
|
"no borrar referenciado / borrar huérfano".
|
||||||
|
5. **0002e — exponer en clientes** — binding móvil (`SendFile`/`FetchFile`), gateway
|
||||||
|
web (`/api/media`), tool `send_file` en unibots.
|
||||||
|
|
||||||
|
# Definition of Done (evidencia ejecutable)
|
||||||
|
|
||||||
|
- **Golden:** enviar y recibir una imagen pequeña (camino v1, sin chunks) sigue
|
||||||
|
funcionando; enviar y recibir un archivo de 50 MB por chunks sin cargar 50 MB en
|
||||||
|
RAM (medir RSS durante la operación).
|
||||||
|
- **Edge:** archivo cuyo tamaño es múltiplo exacto del chunk; archivo de 1 byte;
|
||||||
|
archivo justo por debajo y por encima del umbral de chunking.
|
||||||
|
- **Error path:** chunk corrupto/no descifrable → error claro, no panic; `blobs gc`
|
||||||
|
con un blob aún referenciado → NO lo borra (assert).
|
||||||
|
- `CGO_ENABLED=0 go test ./...` verde.
|
||||||
|
|
||||||
|
# Riesgos y mitigaciones
|
||||||
|
|
||||||
|
| Riesgo | Mitigación |
|
||||||
|
|---|---|
|
||||||
|
| Romper media v1 ya enviada | `Chunks` vacío preserva el camino v1; tests de compatibilidad |
|
||||||
|
| GC borra un blob aún referenciado | refcount + barrido conservador + `--dry-run` por defecto en CI |
|
||||||
|
| Puente gomobile con []byte grandes | el binding trabaja con rutas de archivo, no buffers en memoria |
|
||||||
|
| Paralelismo de chunks satura el control plane | límite de concurrencia (4) + el endurecimiento de auth del issue 0001 |
|
||||||
|
|
||||||
|
# Relación con otros issues
|
||||||
|
|
||||||
|
- **0001 (seguridad)** — prerequisito: la auth firmada del control plane debe cubrir
|
||||||
|
`POST/GET /blobs` antes de exponer media públicamente; si no, cualquiera llena el
|
||||||
|
store o descarga ciphertext ajeno.
|
||||||
|
- **Streaming en vivo** (futuro, no este issue) — videollamada/audio en tiempo real =
|
||||||
|
WebRTC con el bus como canal de señalización; modelo distinto al blob.
|
||||||
@@ -0,0 +1,195 @@
|
|||||||
|
---
|
||||||
|
issue: 0003
|
||||||
|
title: Descentralización / alta disponibilidad — cluster NATS + JetStream replicado + control plane sin SPOF
|
||||||
|
status: spec
|
||||||
|
created: 2026-06-07
|
||||||
|
domain: infra
|
||||||
|
scope: unibus (pkg/embeddednats, pkg/membership, pkg/blobstore, pkg/client, cmd/membershipd) + despliegue multi-nodo
|
||||||
|
depends_on: 0001 (la auth de cluster y de clientes va junto con el endurecimiento)
|
||||||
|
---
|
||||||
|
|
||||||
|
# Objetivo
|
||||||
|
|
||||||
|
Que la caída de un servidor **no deje el bus sin servicio**. Hoy unibus es un único
|
||||||
|
`membershipd` (con NATS embebido + SQLite local): si ese host muere, no hay bus.
|
||||||
|
Este issue lleva unibus a un modelo **descentralizado / alta disponibilidad** usando
|
||||||
|
las capacidades nativas de NATS: cluster multi-nodo, JetStream replicado (RAFT), y
|
||||||
|
el estado del control plane fuera de la SQLite local. **No es federación**
|
||||||
|
(multi-operador con dominios distintos); es eliminar el punto único de fallo dentro
|
||||||
|
de un único dominio administrativo controlado por nosotros.
|
||||||
|
|
||||||
|
# Requisito clave de quorum (decisión de infraestructura)
|
||||||
|
|
||||||
|
JetStream replica con RAFT, que necesita **mayoría (quorum)** para confirmar
|
||||||
|
escrituras. Las consecuencias son duras y hay que asumirlas desde el diseño:
|
||||||
|
|
||||||
|
| Nodos | Réplica | Tolera caída de | Nota |
|
||||||
|
|---|---|---|---|
|
||||||
|
| 1 | R1 | 0 | situación actual (SPOF) |
|
||||||
|
| 2 | R2 | **0** | si cae uno se pierde quorum: las escrituras se bloquean. NO sirve para HA |
|
||||||
|
| **3** | **R3** | **1** | mínimo real para "si un server cae, seguimos" |
|
||||||
|
| 5 | R5 | 2 | mayor tolerancia |
|
||||||
|
|
||||||
|
**Por tanto el objetivo del usuario ("si mi server falla, no nos quedamos sin
|
||||||
|
servicio") exige 3 nodos JetStream.** Servers disponibles hoy: **magnus** y
|
||||||
|
**homer** (ambos VPS OVH). El tercero está pendiente de conseguir.
|
||||||
|
|
||||||
|
| Nodo | IP pública | Estado | Notas |
|
||||||
|
|---|---|---|---|
|
||||||
|
| magnus | (en pass: `MAGNUS_ovh_ssh_ROOT`) | disponible, **cargado** | corre coolify, minio, postgres, authentik, portainer, dagu — revisar recursos antes |
|
||||||
|
| homer | `141.94.69.66` | disponible, vivo | creds en pass (`vps_ovhcloud_SSH_SERVER_HOMER_-_root`, `vps_SSH_SERVER_HOMER_dataherrero`); tenía coolify |
|
||||||
|
| nodo 3 | — | **pendiente** | conseguir un tercer VPS siempre-on, o reusar om/datardos si se liberan |
|
||||||
|
|
||||||
|
Preparación previa al deploy de cada nodo: alta del alias SSH + clave, integración en
|
||||||
|
la WireGuard, y revisar/aligerar la carga existente (coolify, etc.).
|
||||||
|
|
||||||
|
## Rollout R1 → R3: funcionar con 2 nodos hoy, HA con 3 mañana
|
||||||
|
|
||||||
|
No se "desactiva el quorum"; se controla el **número de réplicas** de cada stream/KV:
|
||||||
|
|
||||||
|
| Réplicas | Quorum | Tolera | Sirve con |
|
||||||
|
|---|---|---|---|
|
||||||
|
| R1 | ninguno (1 copia) | 0 caídas | 1-2 nodos, sin bloqueo |
|
||||||
|
| R3 | 2 de 3 | 1 caída | 3 nodos |
|
||||||
|
|
||||||
|
- **Fase actual (magnus + homer):** desplegar con streams/KV en **R1** (flag
|
||||||
|
`decentralized: off`). El bus funciona al 100% para operar, sin tolerancia a fallo
|
||||||
|
todavía. Opción: streams en **R2** para duplicar los datos en ambos nodos
|
||||||
|
(durabilidad/backup vivo), asumiendo que la escritura necesita los dos hasta el 3er
|
||||||
|
nodo.
|
||||||
|
- **Cuando entre el nodo 3:** escalar en caliente `nats stream update --replicas 3`
|
||||||
|
(idem KV/Object Store) + añadir el nodo al cluster + flag `decentralized: on`. **HA
|
||||||
|
real, sin downtime, sin reescritura, sin migrar datos.**
|
||||||
|
- **Aviso de 2 nodos:** NO montar el meta-group de JetStream con 2 nodos como si
|
||||||
|
fuera HA — su quorum es 2, y la caída de uno bloquea la gestión de streams. Con 2
|
||||||
|
servers, modelo recomendado: **magnus principal (R1) + homer 2º nodo/réplica**, y
|
||||||
|
escalar a R3 al tener el tercero.
|
||||||
|
|
||||||
|
Mientras solo haya 2 nodos: el **data plane efímero** (core-NATS, rooms `ModeNATS`)
|
||||||
|
sí tolera la caída de uno (los clientes reconectan al otro), pero las **rooms
|
||||||
|
persistentes y el control plane** (que necesitan quorum) no. El issue se despliega
|
||||||
|
de verdad cuando haya 3 nodos.
|
||||||
|
|
||||||
|
# Contexto — por qué hoy es un SPOF
|
||||||
|
|
||||||
|
- `pkg/embeddednats` arranca un NATS **standalone** (sin cluster).
|
||||||
|
- `pkg/membership` guarda rooms/members/room_keys/users en una **SQLite local** al
|
||||||
|
proceso.
|
||||||
|
- `pkg/blobstore` guarda los blobs en el **disco local** del proceso.
|
||||||
|
- El cliente (`pkg/client`) conecta a **una** URL de NATS y **una** de control plane.
|
||||||
|
|
||||||
|
Todo vive en un host. Ese host es el punto único de fallo.
|
||||||
|
|
||||||
|
# Diseño
|
||||||
|
|
||||||
|
## Pieza 1 — Cluster NATS (data plane replicado)
|
||||||
|
|
||||||
|
`pkg/embeddednats` gana opciones de cluster: `server.Options.Cluster` (nombre +
|
||||||
|
host/puerto de routes) y `Routes` (los otros nodos). Cada `membershipd` arranca su
|
||||||
|
NATS embebido en cluster con los demás. JetStream se habilita con `Replicas: 3` en
|
||||||
|
streams y KV. Auth entre nodos (routes) con credenciales propias (no las de
|
||||||
|
clientes), y TLS también en las routes (reusa la CA del issue 0001).
|
||||||
|
|
||||||
|
## Pieza 2 — Control plane sin estado local (SQLite → JetStream KV)
|
||||||
|
|
||||||
|
Es el corazón del issue. Hoy `pkg/membership.Store` es SQLite. Se introduce, por
|
||||||
|
**branch-by-abstraction**, una interfaz `Store` con dos implementaciones:
|
||||||
|
|
||||||
|
- `sqliteStore` — la actual (sigue siendo el default mientras el flag está off; útil
|
||||||
|
para un solo nodo / desarrollo).
|
||||||
|
- `jetstreamStore` — nueva: rooms, members, room_keys y users (la tabla del issue
|
||||||
|
0001) viven en **JetStream KV** (buckets replicados R3). Cualquier nodo lee/escribe
|
||||||
|
el mismo estado; RAFT garantiza consistencia. El HTTP control plane pasa a ser
|
||||||
|
efectivamente **stateless**: cualquier `membershipd` sirve cualquier request
|
||||||
|
porque el estado está en el KV replicado.
|
||||||
|
|
||||||
|
Flag `decentralized` (off → on). Migración inicial de datos SQLite → KV con un
|
||||||
|
comando `membershipd migrate-to-kv` (idempotente). Las claves de room siguen
|
||||||
|
selladas igual; solo cambia **dónde se guardan**, no el cifrado.
|
||||||
|
|
||||||
|
## Pieza 3 — Blobs replicados (object store → NATS Object Store)
|
||||||
|
|
||||||
|
`pkg/blobstore` gana una implementación sobre **NATS Object Store** (encima de
|
||||||
|
JetStream, replicado R3) además de la de disco local. Los blobs (ya ciphertext, E2E)
|
||||||
|
quedan disponibles desde cualquier nodo. Encaja con el GC del issue 0002.
|
||||||
|
|
||||||
|
## Pieza 4 — Cliente con failover
|
||||||
|
|
||||||
|
`pkg/client`: aceptar **lista** de seeds de NATS y **lista** de URLs de control
|
||||||
|
plane. `nats.go` ya hace reconnect/failover entre servidores del cluster nativamente
|
||||||
|
(`nats.Servers([...])`, `nats.MaxReconnects(-1)`). El control plane HTTP se prueba en
|
||||||
|
orden con reintento. Así, si un nodo cae, el cliente reconecta a otro de forma
|
||||||
|
transparente.
|
||||||
|
|
||||||
|
## Pieza 5 — Despliegue multi-nodo
|
||||||
|
|
||||||
|
3 nodos `membershipd`, cada uno con su NATS embebido en cluster, JetStream R3, mismo
|
||||||
|
`ca.crt`/credenciales de routes. systemd en cada VPS. Los clientes reciben la lista
|
||||||
|
de los 3 endpoints. Health/observabilidad por nodo (`/healthz` + métricas de
|
||||||
|
JetStream: líder RAFT, lag de réplica).
|
||||||
|
|
||||||
|
# Decisiones técnicas
|
||||||
|
|
||||||
|
| Decisión | Elegido | Alternativa | Razón |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Nº de nodos de quorum | 3 (R3) | 2 (R2) | 2 no tolera caída de uno; 3 es el mínimo real de HA |
|
||||||
|
| Estado del control plane | JetStream KV replicado | SQLite replicada a mano / Postgres externo | KV ya viene con NATS, mismo RAFT que JetStream, cero infra extra |
|
||||||
|
| Migración del store | branch-by-abstraction (interfaz `Store`, dos impls, flag) | reescritura directa | master nunca se rompe; sqlite sigue para 1 nodo/dev |
|
||||||
|
| Blobs | NATS Object Store | disco compartido / S3 | replicado nativamente, sin dependencia externa |
|
||||||
|
| Failover de cliente | lista de seeds + reconnect nativo nats.go | balanceador externo | menos infra, nats.go ya lo hace |
|
||||||
|
| Federación multi-operador | **fuera de alcance** | — | no es el objetivo; es otra liga (trust entre dominios) |
|
||||||
|
|
||||||
|
# Fases (TBD, ramas `issue/0003x-*`)
|
||||||
|
|
||||||
|
1. **0003a — cluster NATS** — opciones de cluster/routes + TLS de routes en
|
||||||
|
`pkg/embeddednats`; arrancar 2-3 nodos locales en tests e2e y verificar que un
|
||||||
|
subject publicado en uno llega a un suscriptor en otro.
|
||||||
|
2. **0003b — interfaz Store + jetstreamStore (KV)** — abstraer `pkg/membership.Store`;
|
||||||
|
implementar rooms/members/room_keys/users sobre JetStream KV R3; tests de
|
||||||
|
consistencia. Flag `decentralized: off`.
|
||||||
|
3. **0003c — migrate-to-kv** — comando idempotente SQLite → KV + test de paridad
|
||||||
|
(mismo estado antes/después).
|
||||||
|
4. **0003d — blobs en Object Store** — impl `pkg/blobstore` sobre NATS Object Store
|
||||||
|
replicado.
|
||||||
|
5. **0003e — cliente failover** — lista de seeds + lista de ctrl-urls + reconnect;
|
||||||
|
test que mata el nodo al que está conectado y verifica que sigue operando.
|
||||||
|
6. **0003f — despliegue 3 nodos** (humano) — 3 VPS en cluster, JetStream R3, flag
|
||||||
|
`decentralized: on`. Chaos test real: matar un nodo en producción y comprobar que
|
||||||
|
el servicio sigue.
|
||||||
|
|
||||||
|
# Definition of Done (evidencia ejecutable)
|
||||||
|
|
||||||
|
- **Golden:** 3 nodos en cluster; un cliente publica en un nodo y otro cliente
|
||||||
|
suscrito a otro nodo lo recibe; crear room + invitar funciona desde cualquier nodo.
|
||||||
|
- **Edge:** un cliente conectado al nodo A; se **mata el nodo A**; el cliente
|
||||||
|
reconecta a B automáticamente y sigue publicando/recibiendo sin perder la sesión.
|
||||||
|
- **Error path (chaos):** matar 1 de 3 nodos → el control plane sigue aceptando
|
||||||
|
escrituras (quorum 2/3); matar 2 de 3 → las escrituras se bloquean (quorum perdido,
|
||||||
|
comportamiento esperado y documentado, no corrupción).
|
||||||
|
- `CGO_ENABLED=0 go test ./...` verde, incluido un test e2e multi-nodo en proceso.
|
||||||
|
|
||||||
|
# Riesgos y mitigaciones
|
||||||
|
|
||||||
|
| Riesgo | Mitigación |
|
||||||
|
|---|---|
|
||||||
|
| Solo 2 nodos disponibles → sin quorum real | prerequisito explícito de 3 nodos antes de 0003f; hasta entonces, despliegue queda en standalone |
|
||||||
|
| Latencia inter-VPS afecta RAFT | nodos en la misma región o con buena red; medir; R3 tolera latencias moderadas |
|
||||||
|
| Migración SQLite→KV pierde datos | comando idempotente + test de paridad + backup de la SQLite antes |
|
||||||
|
| Partición de red (split-brain) | RAFT lo previene: el lado sin quorum se bloquea para escritura, no diverge |
|
||||||
|
| Complejidad operativa de 3 nodos | observabilidad de JetStream (líder, lag) + `/healthz` por nodo + runbook en deploy/ |
|
||||||
|
|
||||||
|
# Orden recomendado respecto a otros issues
|
||||||
|
|
||||||
|
1. **0001 (seguridad)** primero: la auth de clientes (nkey) y la CA/TLS se reutilizan
|
||||||
|
para las routes del cluster. Desplegar descentralizado sin auth sería abrir varios
|
||||||
|
puntos públicos sin protección.
|
||||||
|
2. **0003 (este)** después: una vez el bus es seguro, replicarlo en 3 nodos.
|
||||||
|
3. **0002 (media v2)** es ortogonal; su object store encaja con la pieza 3 (blobs
|
||||||
|
replicados) cuando ambos estén.
|
||||||
|
|
||||||
|
# Fuera de alcance
|
||||||
|
|
||||||
|
- Federación entre operadores/dominios distintos (otra liga; requiere protocolo de
|
||||||
|
trust entre dominios).
|
||||||
|
- Multi-tenant / accounts de NATS por organización.
|
||||||
|
- Auto-escalado dinámico de nodos.
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
---
|
||||||
|
issue: 0004
|
||||||
|
title: Hardening de seguridad — autorización, anti-DoS y confidencialidad antes de exponer público
|
||||||
|
status: done
|
||||||
|
created: 2026-06-07
|
||||||
|
completed: 2026-06-07
|
||||||
|
report: projects/message_bus/reports/0005-2026-06-07-unibus-security-hardening.md
|
||||||
|
domain: security
|
||||||
|
scope: unibus (pkg/membership/server.go, auth.go, pkg/embeddednats, pkg/client, cmd/membershipd, deploy/tls)
|
||||||
|
depends_on: 0001 (cierra los gaps que la auditoría 0004 encontró sobre lo entregado en 0001)
|
||||||
|
blocks: 0001f (deploy público) y 0003f (deploy descentralizado)
|
||||||
|
source: projects/message_bus/reports/0004-2026-06-07-unibus-security-audit.md
|
||||||
|
---
|
||||||
|
|
||||||
|
# Objetivo
|
||||||
|
|
||||||
|
La auditoría red-team (report 0004) concluyó: la **autenticación** del bus es sólida,
|
||||||
|
pero faltan **autorización, disponibilidad y confidencialidad de metadata** — justo lo
|
||||||
|
que un bus *público* necesita. Veredicto: **NO exponer público hoy**. Este issue cierra
|
||||||
|
los hallazgos bloqueantes (1 crítico + 4 altos) y los medios relevantes, de modo que el
|
||||||
|
deploy 0001f (público) y luego 0003 (descentralizado) sean seguros.
|
||||||
|
|
||||||
|
Cada fase corresponde a un hallazgo del report 0004. La **DoD de cada fase es portar el
|
||||||
|
test adversarial del auditor** (`TestAudit_*`) y verificar que ahora arroja el resultado
|
||||||
|
SEGURO (lo que antes pasaba el ataque, ahora lo rechaza).
|
||||||
|
|
||||||
|
# Fases (TBD, ramas `issue/0004x-*`, una por hallazgo)
|
||||||
|
|
||||||
|
## 0004a — H1 (Crítico): límite de cuerpo + anti-DoS pre-auth
|
||||||
|
|
||||||
|
**Problema:** `Server.ServeHTTP` hace `io.ReadAll(r.Body)` **sin límite y antes** de
|
||||||
|
`authenticate()`; `handlePutBlob` repite el `io.ReadAll` sin límite. 400 MB sin
|
||||||
|
credenciales → 898 MB RSS → OOM con pocas conexiones.
|
||||||
|
|
||||||
|
**Fix:**
|
||||||
|
- `http.MaxBytesReader` en el middleware **antes** del `io.ReadAll` (límite control plane,
|
||||||
|
p.ej. 1 MB).
|
||||||
|
- Límite separado y mayor para `/blobs`, con rechazo temprano por `Content-Length` antes
|
||||||
|
de bufferizar; idealmente stream a disco en vez de RAM.
|
||||||
|
- `Server.MaxHeaderBytes` ajustado.
|
||||||
|
- Rate-limit por IP (y por identidad tras auth). Reusar/crear una función del registry si
|
||||||
|
aplica (delegar a `fn-constructor` si es genérica).
|
||||||
|
|
||||||
|
**DoD:** test que envía un cuerpo > límite sin firma → `413`/`401` **sin** que el RSS se
|
||||||
|
dispare (medir `/proc/self/status` antes/después, delta acotado). Golden (cuerpo normal
|
||||||
|
pasa) + edge (justo en el límite) + error (excede → rechazo barato).
|
||||||
|
|
||||||
|
## 0004b — H2 (Alto): cerrar el fail-open de configuración
|
||||||
|
|
||||||
|
**Problema:** default `--bus-auth off`; el nkey de NATS solo se activa en `enforce`; TLS
|
||||||
|
es flag independiente. `--bind 0.0.0.0 --tls-cert …` **sin** `--bus-auth enforce` deja el
|
||||||
|
bus abierto con apariencia de seguro.
|
||||||
|
|
||||||
|
**Fix:**
|
||||||
|
- Si `--bind` no es loopback ⇒ exigir `--bus-auth enforce` (si no, `log.Fatal` con mensaje
|
||||||
|
claro).
|
||||||
|
- `--tls-cert`/`--tls-key` sin `--bus-auth enforce` ⇒ error de arranque.
|
||||||
|
- Arranque inseguro imposible o, como mínimo, ruidoso y rechazado.
|
||||||
|
|
||||||
|
**DoD:** portar `TestAudit_FailOpenTLSWithoutAuth` → ahora el arranque público-sin-enforce
|
||||||
|
falla; cliente no registrado NO conecta. Golden (bind loopback dev sigue permitido) + error
|
||||||
|
(bind público sin enforce aborta).
|
||||||
|
|
||||||
|
## 0004c — H3 (Alto): autorización por pertenencia en el control plane
|
||||||
|
|
||||||
|
**Problema:** "autorizado" = "registrado", no "miembro". Los GET de room no comprueban
|
||||||
|
pertenencia: `/rooms/{id}`, `/rooms/{id}/members` (expone `sign_pub`+`kex_pub` de todos),
|
||||||
|
`/members/{endpoint}/rooms`, y `/rooms/{id}/key?endpoint=X` (devuelve la `sealed_key` ajena).
|
||||||
|
|
||||||
|
**Fix:**
|
||||||
|
- Cada handler de room consulta `members` y exige que el firmante (`X-Unibus-Pub` →
|
||||||
|
endpoint) sea miembro.
|
||||||
|
- `/rooms/{id}/key` solo sirve la clave sellada **para el propio firmante** (`endpoint ==
|
||||||
|
signer`), nunca de un tercero.
|
||||||
|
- `/members/{endpoint}/rooms` solo si `endpoint == signer`.
|
||||||
|
- No exponer la member-list completa a no-miembros.
|
||||||
|
|
||||||
|
**DoD:** portar `TestAudit_HorizontalMetadataLeak` → bob (no miembro) ahora recibe `403`
|
||||||
|
en todos. Golden (miembro legítimo accede) + edge (owner accede) + error (no-miembro 403).
|
||||||
|
|
||||||
|
## 0004d — H4 (Alto): control de acceso en el data plane NATS
|
||||||
|
|
||||||
|
**Problema:** el authenticator nkey solo decide "registrado sí/no"; no hay permisos por
|
||||||
|
subject. Cualquier registrado se suscribe/publica en cualquier subject; las rooms
|
||||||
|
`ModeNATS` (cleartext) quedan expuestas entre usuarios.
|
||||||
|
|
||||||
|
**Fix (elegir y documentar la estrategia):**
|
||||||
|
- Preferente: NATS `Permissions` por identidad (subjects que el usuario puede sub/pub),
|
||||||
|
derivadas de su pertenencia a rooms; o
|
||||||
|
- Subjects impredecibles (no derivables del nombre) + verificación de pertenencia
|
||||||
|
server-side; o
|
||||||
|
- Prohibir `ModeNATS` en despliegue público (forzar siempre E2E) como mínimo defensivo.
|
||||||
|
|
||||||
|
**DoD:** portar `TestAudit_NoSubjectACL` → eve (no invitada) ya NO recibe el mensaje de la
|
||||||
|
room ajena. Documentar la estrategia elegida y su límite.
|
||||||
|
|
||||||
|
## 0004e — H5 (Alto, público): TLS en el control plane
|
||||||
|
|
||||||
|
**Problema:** HTTP `:8470` firmado pero **sin TLS** → metadata (subjects, endpoints,
|
||||||
|
pubkeys, sealed keys, hashes de blobs, grafo social) legible por un MITM en la red pública.
|
||||||
|
|
||||||
|
**Fix:**
|
||||||
|
- Servir el control plane sobre TLS con la misma CA propia (o documentar un reverse-proxy
|
||||||
|
TLS delante).
|
||||||
|
- El cliente exige `https` cuando se le pasa una CA (`client.Connect(caPath)` ⇒ control
|
||||||
|
plane también TLS).
|
||||||
|
|
||||||
|
**DoD:** cliente contra control plane `https` con la CA → OK; contra `http` con CA esperada
|
||||||
|
→ rechaza; un observador no ve la metadata (argumentado + test de esquema).
|
||||||
|
|
||||||
|
## 0004f — medios: owner binding, nonce-cache, error leak
|
||||||
|
|
||||||
|
- **H6** `handleCreateRoom`: exigir `Owner.Endpoint == frame.EndpointID(X-Unibus-Pub)` y
|
||||||
|
`Owner.SignPub == pub`. (Portar `TestAudit_OwnerSpoof` → ahora 403.)
|
||||||
|
- **H7** mover `IsAuthorized` **antes** de tocar el `nonceCache` (no cachear nonces de
|
||||||
|
no-autorizados); poda por expiry-bucket/heap en vez de O(n) bajo mutex global; cap de
|
||||||
|
tamaño. (Portar `TestAudit_NonceCachePoisonPreAuth`.) **Nota:** este fix es prerequisito
|
||||||
|
del cambio a nonce-cache replicado del issue 0003.
|
||||||
|
- **H12** mensajes de error genéricos al cliente; detalle solo al log (no filtrar rutas/SQL).
|
||||||
|
|
||||||
|
# Fuera de alcance de este issue (encolado en otros)
|
||||||
|
|
||||||
|
- **H9** (cuota/GC de blobs) → issue 0002 (media v2) ya lo cubre.
|
||||||
|
- **H10** (AEAD nonce 12B → XChaCha o rekey por volumen) → bajo, futuro; abrir issue propio
|
||||||
|
si se necesitan rooms de muy alto volumen.
|
||||||
|
- **H11** (firma de owner sin nonce/ts) → cubierto en la práctica por el envelope `enforce`;
|
||||||
|
documentar la dependencia. Reforzar si se relaja `enforce`.
|
||||||
|
- **H8** (custodia de la CA: generar en om, `ca.key` fuera del PC) → tarea operacional del
|
||||||
|
deploy 0001f/0003f, no de código.
|
||||||
|
- **govulncheck** sobre nats-server/nats.go/modernc → paso de CI aparte.
|
||||||
|
|
||||||
|
# Definition of Done global
|
||||||
|
|
||||||
|
- Las cuatro pruebas adversariales bloqueantes del report 0004 (DoS acotado, fail-open
|
||||||
|
cerrado, fuga horizontal 403, ACL data plane) portadas como tests de regresión y en verde.
|
||||||
|
- `CGO_ENABLED=0 go build ./...` + `go vet ./...` + `go test ./...` verdes.
|
||||||
|
- Re-evaluación: tras el hardening, el veredicto de exposición pública pasa de "NO" a
|
||||||
|
"sí-con-condiciones operacionales" (CA custodiada, Restart=always). Anotar en un report
|
||||||
|
nuevo o como addendum al 0004.
|
||||||
|
|
||||||
|
# Orden respecto a otros issues
|
||||||
|
|
||||||
|
1. **0004 (este)** — primero: hace el bus seguro para exponer.
|
||||||
|
2. **0003 (descentralización)** — después: absorbe el nonce-cache→KV replicado (apoyado en
|
||||||
|
0004f-H7), la auth de routes del cluster y el guard de fail-open ×N nodos.
|
||||||
|
3. **0002 (media v2)** — ortogonal; incluye la cuota/GC de blobs (H9).
|
||||||
@@ -0,0 +1,132 @@
|
|||||||
|
---
|
||||||
|
issue: 0005
|
||||||
|
title: Hardening 2 — CVEs, spoof por firma omitida, DoS por concurrencia, TLS forzado (re-auditoría)
|
||||||
|
status: done
|
||||||
|
created: 2026-06-07
|
||||||
|
completed: 2026-06-07
|
||||||
|
domain: security
|
||||||
|
scope: unibus (go.mod, pkg/client, pkg/membership/server.go, cmd/membershipd/config.go, pkg/embeddednats, pkg/blobstore)
|
||||||
|
depends_on: 0001, 0004 (cierra los hallazgos NUEVOS de la re-auditoría sobre lo entregado)
|
||||||
|
blocks: 0001f (deploy público) y 0003f (deploy descentralizado)
|
||||||
|
source: projects/message_bus/reports/0006-2026-06-07-unibus-security-reaudit.md
|
||||||
|
---
|
||||||
|
|
||||||
|
# Objetivo
|
||||||
|
|
||||||
|
La re-auditoría red-team (report 0006) confirmó que el hardening 0004 cerró H1–H7/H12,
|
||||||
|
pero encontró **hallazgos nuevos** que mantienen el veredicto en **"NO exponer público
|
||||||
|
aún"**. Este issue los cierra. La re-auditoría se hizo sobre el commit `618f6b6`
|
||||||
|
(pre-0003); algunos hallazgos pueden haber cambiado con 0003 — **cada fase debe primero
|
||||||
|
verificar si el hallazgo sigue vivo en el master actual** (post-0003, v0.6.0) antes de
|
||||||
|
arreglarlo.
|
||||||
|
|
||||||
|
Estado verificado al crear este issue (master post-0003):
|
||||||
|
- **N1 vivo**: `go.mod` sigue en `nats-server v2.10.22` y `go 1.25.0`.
|
||||||
|
- **N3 vivo**: `pkg/client/client.go:802` tiene `if info.Policy.SignMsgs && f.Sig != nil` (el patrón vulnerable exacto).
|
||||||
|
- **H4**: 0003 añadió `pkg/membership/acl.go` — hay que evaluar si cierra el wildcard `Subscribe(">")` o si falta la capa de NATS Permissions.
|
||||||
|
- N2, N4: presumiblemente vivos (0003 no los tocó); verificar.
|
||||||
|
|
||||||
|
# Fases (TBD, ramas `issue/0005x-*`)
|
||||||
|
|
||||||
|
## 0005a — N1 (Alto): CVEs en dependencias
|
||||||
|
|
||||||
|
**Hallazgo:** `govulncheck ./...` → 16 vulnerabilidades alcanzables: 14 en
|
||||||
|
`github.com/nats-io/nats-server/v2@v2.10.22` (servidor embebido, expuesto público en el
|
||||||
|
deploy decidido) + 2 en la stdlib de Go (`net/textproto` GO-2026-5039, `crypto/x509`
|
||||||
|
GO-2026-5037).
|
||||||
|
|
||||||
|
**Fix:**
|
||||||
|
- `go get github.com/nats-io/nats-server/v2@v2.11.15` (o superior que cubra las 14).
|
||||||
|
- Subir la toolchain a `go1.26.4` (cubre las 2 de stdlib); actualizar la directiva `go`
|
||||||
|
en `go.mod` si procede.
|
||||||
|
- Re-correr `govulncheck ./...` hasta **0 affected**.
|
||||||
|
- **Nota:** este es un cambio de `go.mod`/`go.sum` justificado por CVE; documentarlo en el
|
||||||
|
commit. Verificar que el bump de nats-server no rompe el cluster/JetStream de 0003
|
||||||
|
(correr toda la suite, incluido el e2e multi-nodo).
|
||||||
|
|
||||||
|
**DoD:** `govulncheck ./...` → "No vulnerabilities found" (o solo no-alcanzables); suite
|
||||||
|
completa verde tras el bump.
|
||||||
|
|
||||||
|
## 0005b — N3 (Alto): spoof por firma omitida en rooms firmadas
|
||||||
|
|
||||||
|
**Hallazgo:** `pkg/client/client.go::processFrame` verifica la firma **solo si el frame la
|
||||||
|
trae**: `if info.Policy.SignMsgs && f.Sig != nil { verify }`. Un atacante con acceso al
|
||||||
|
data plane publica un frame con `Sig==nil` y `Sender` forjado → el receptor lo acepta como
|
||||||
|
auténtico en una room que EXIGE firma.
|
||||||
|
|
||||||
|
**Fix:** en una room `SignMsgs`, un frame sin firma debe **dropearse**:
|
||||||
|
```go
|
||||||
|
if info.Policy.SignMsgs {
|
||||||
|
if f.Sig == nil { return } // exige firma; sin ella, descarta
|
||||||
|
if !verify(...) { return }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**DoD:** portar `TestReaudit_SigNilSpoof` → ahora el frame `Sig==nil` con `Sender` forjado
|
||||||
|
en una room `SignMsgs` se **descarta** (no se entrega al handler). Golden (frame firmado
|
||||||
|
válido se entrega) + edge (room sin SignMsgs no se ve afectada) + error (Sig==nil en
|
||||||
|
SignMsgs → drop).
|
||||||
|
|
||||||
|
## 0005c — N2 (Medio-Alto): DoS por concurrencia
|
||||||
|
|
||||||
|
**Hallazgo:** el límite por-request (16 MiB) + rate-limit per-IP NO acotan la memoria
|
||||||
|
agregada. 40 subidas de 16 MiB simultáneas (= el burst per-IP) → 1.42 GB RSS. Multi-IP
|
||||||
|
escala sin techo.
|
||||||
|
|
||||||
|
**Fix (elegir y documentar):**
|
||||||
|
- Límite global de conexiones concurrentes y/o de bytes-en-vuelo (semáforo con cota de
|
||||||
|
memoria total), y/o
|
||||||
|
- Stream del blob a disco en vez de `io.ReadAll` en RAM (encaja con la cuota/GC del issue
|
||||||
|
0002), y/o
|
||||||
|
- Bajar `maxBlobBytes` y separar mejor el límite de control (1 MiB) del de blobs.
|
||||||
|
|
||||||
|
**DoD:** test que lanza N subidas concurrentes al techo y verifica que el RSS agregado
|
||||||
|
queda **acotado** (mide `/proc/self/status`, cota declarada) en vez de crecer linealmente
|
||||||
|
con N. Golden (concurrencia normal pasa) + edge (en la cota) + error (exceso → 429/503 sin
|
||||||
|
OOM).
|
||||||
|
|
||||||
|
## 0005d — N4 (Medio): forzar TLS del control plane en bind público
|
||||||
|
|
||||||
|
**Hallazgo:** el guard `validateBootConfig` cierra "público sin enforce" y "TLS sin
|
||||||
|
enforce", pero **permite** público + enforce **sin** `--tls-cert` → el control plane sirve
|
||||||
|
HTTP plano públicamente (reaparece H5: metadata en claro).
|
||||||
|
|
||||||
|
**Fix:** el guard debe exigir `--tls-cert`/`--tls-key` cuando el bind no es loopback.
|
||||||
|
`public + enforce + sin TLS` → `log.Fatal`.
|
||||||
|
|
||||||
|
**DoD:** portar `TestGap_PublicEnforceNoTLS` → ahora `validateBootConfig("0.0.0.0",
|
||||||
|
enforce, "", "")` **rechaza**. Golden (público+enforce+TLS OK) + edge (loopback sin TLS
|
||||||
|
sigue OK para dev) + error (público sin TLS aborta).
|
||||||
|
|
||||||
|
## 0005e — H4 (Medio, residual): evaluar y completar la ACL por subject
|
||||||
|
|
||||||
|
**Contexto:** 0003 añadió `pkg/membership/acl.go`. Primero **evaluar** con el ataque del
|
||||||
|
report 0006 (`TestReaudit_H4_WildcardMetadataLeak`: un registrado no-miembro con
|
||||||
|
`Subscribe(">")` raw capta subjects + advisories de JetStream de rooms ajenas) si ese
|
||||||
|
acl.go ya lo cierra.
|
||||||
|
- Si lo cierra → portar el test como regresión y documentar.
|
||||||
|
- Si NO (probable: la ACL real necesita NATS `Permissions` por identidad a nivel del
|
||||||
|
authenticator/cuenta, no solo lógica de membership en el control plane) → implementar las
|
||||||
|
Permissions por identidad derivadas de pertenencia, o documentar el límite y el plan.
|
||||||
|
|
||||||
|
**DoD:** `TestReaudit_H4_WildcardMetadataLeak` → el no-miembro ya NO capta los subjects de
|
||||||
|
rooms ajenas (o, si queda residual, está documentado con su límite exacto).
|
||||||
|
|
||||||
|
# Fuera de alcance (otros issues)
|
||||||
|
|
||||||
|
- **H9** (cuota/GC de blobs) → issue 0002; se solapa con 0005c (streaming a disco).
|
||||||
|
- **H10** (AEAD nonce) / **H11** (nonce/ts en firma de owner) → bajo, futuro.
|
||||||
|
- **H8** (custodia de la CA: generar en om) → operacional del deploy.
|
||||||
|
- **Auditoría de la superficie nueva de 0003** (cluster routes auth, jetstreamStore KV
|
||||||
|
fail-closed, nonce-cache replicado, failover) → el report 0006 NO la cubrió (auditó
|
||||||
|
pre-0003). Pendiente una re-auditoría dedicada de 0003 (prompt ya preparado).
|
||||||
|
|
||||||
|
# Definition of Done global
|
||||||
|
|
||||||
|
- `govulncheck ./...` → 0 alcanzables.
|
||||||
|
- Los tests adversariales de la re-auditoría (`TestReaudit_SigNilSpoof`,
|
||||||
|
`TestGap_PublicEnforceNoTLS`, `TestReaudit_H4_WildcardMetadataLeak`, DoS-concurrencia)
|
||||||
|
portados como regresión y en verde (o el residual documentado).
|
||||||
|
- `CGO_ENABLED=0 go build ./... && go vet ./... && go test ./...` verdes (incluido el e2e
|
||||||
|
multi-nodo de 0003, para confirmar que el bump de nats-server no lo rompió).
|
||||||
|
- Re-evaluación: el veredicto de exposición pública pasa de "NO-aún" a "sí-con-condiciones".
|
||||||
@@ -0,0 +1,160 @@
|
|||||||
|
---
|
||||||
|
issue: 0006
|
||||||
|
title: Completar y endurecer el cluster — wiring del control plane KV + N1-N6 de la auditoría 0008
|
||||||
|
status: done
|
||||||
|
created: 2026-06-07
|
||||||
|
closed: 2026-06-07
|
||||||
|
closed_by: fases 0006a–0006g (ver report 0009); unibus v0.8.0
|
||||||
|
domain: security
|
||||||
|
scope: unibus (cmd/membershipd, pkg/membership, pkg/embeddednats, pkg/busauth, pkg/client)
|
||||||
|
depends_on: 0003 (completa su wiring), 0005 (hereda el bus single-node ya seguro)
|
||||||
|
blocks: 0003f (deploy del cluster descentralizado)
|
||||||
|
source: projects/message_bus/reports/0008-2026-06-07-unibus-decentralization-audit.md
|
||||||
|
---
|
||||||
|
|
||||||
|
# Objetivo
|
||||||
|
|
||||||
|
La auditoría dedicada de la superficie de 0003 (report 0008) concluyó: **el bus en
|
||||||
|
cluster NO es seguro para público** por dos bloqueantes, y además **0003 dejó el
|
||||||
|
control plane descentralizado SIN cablear** (el binario sigue usando SQLite single-store;
|
||||||
|
el flag `decentralized` existe pero ningún código Go lo lee). Como nodo único standalone
|
||||||
|
unibus YA es seguro (report 0008 lo confirma); como cluster, no.
|
||||||
|
|
||||||
|
Este issue cierra los bloqueantes de seguridad del cluster Y completa el wiring que 0003
|
||||||
|
dejó a medias, de modo que el deploy descentralizado (0003f) sea seguro. Cada fase
|
||||||
|
reproduce el ataque del report 0008 (`TestAttack0008_*`) y verifica que ahora se rechaza.
|
||||||
|
|
||||||
|
# Fases (TBD, ramas `issue/0006x-*`)
|
||||||
|
|
||||||
|
## 0006a — N3 (BLOQUEANTE): cablear el nonce replicado en el binario
|
||||||
|
|
||||||
|
**Hallazgo (ALTA):** `membershipd` **nunca llama** `Server.UseReplicatedNonces`; cada nodo
|
||||||
|
usa `memNonceCache` por-proceso. Un request firmado aceptado en el nodo A se **replaya con
|
||||||
|
éxito en el nodo B** (200+200). La API (`kvNonceStore`) y el test
|
||||||
|
(`TestReplicatedNonceRejectsCrossNodeReplay`) existen, pero el binario no los invoca.
|
||||||
|
|
||||||
|
**Fix:** en `cmd/membershipd/main.go`, cuando se arranca con `--cluster-name` (o siempre que
|
||||||
|
haya JetStream disponible), llamar `srv.UseReplicatedNonces(js, replicas)` y **fail-fast** si
|
||||||
|
el bucket `KV_UNIBUS_nonces` no se crea. Regla dura: `--cluster-name != ""` ⇒ nonce replicado
|
||||||
|
**obligatorio** (no arrancar un nodo de cluster con nonce-cache local).
|
||||||
|
|
||||||
|
**DoD:** reproducir `TestAttack0008_N3` (2 nodos con el wiring exacto del binario) → el replay
|
||||||
|
del nonce al nodo B ahora da **401**. Golden (request normal OK en cualquier nodo) + edge
|
||||||
|
(single-node sin cluster sigue usando cache local OK) + error (replay cross-node → 401).
|
||||||
|
|
||||||
|
## 0006b — N2 (BLOQUEANTE): cerrar `$JS.API.>` / aislar el control plane KV
|
||||||
|
|
||||||
|
**Hallazgo (ALTA):** el grant ACL `clientInfraSubjects = {"_INBOX.>", "$JS.API.>"}`
|
||||||
|
(`acl.go:20`) deja a cualquier peer registrado leer los buckets KV del control plane
|
||||||
|
(`KV_UNIBUS_users/rooms/members/room_keys`) directo por NATS, saltándose `requireMember` y
|
||||||
|
los chequeos del HTTP. Fuga del allowlist (handles+roles+claves), grafo de rooms y metadata
|
||||||
|
de sealed-keys. (La ESCRITURA al KV ya está denegada — verificado; la fuga es de lectura.)
|
||||||
|
|
||||||
|
**Fix (elegir y documentar):**
|
||||||
|
- Sustituir el grant amplio `$JS.API.>` por permisos JetStream **mínimos por-room** (solo la
|
||||||
|
API del stream/consumer de las rooms del peer), y **denegar explícitamente** los streams
|
||||||
|
`KV_UNIBUS_*` y `OBJ_*`; o
|
||||||
|
- (Más robusto) aislar el control plane KV en una NATS **account separada**, inaccesible
|
||||||
|
desde la account de clientes.
|
||||||
|
|
||||||
|
**DoD:** reproducir `TestAttack0008_N2` → eve (registrada, no miembro) ya **NO** puede leer
|
||||||
|
los buckets KV (`Permissions Violation` o equivalente). La JetStream API legítima de las
|
||||||
|
rooms del peer sigue funcionando.
|
||||||
|
|
||||||
|
## 0006c — wiring del control plane KV (completar 0003)
|
||||||
|
|
||||||
|
**Hallazgo (MEDIA / raíz):** el binario no activa el store descentralizado. `membership.Open`
|
||||||
|
(SQLite) está hardcodeado en `main.go:90`; `OpenJetStream` solo lo usa `migrate-to-kv`.
|
||||||
|
|
||||||
|
**Fix:** leer el flag `decentralized` (o un `--store kv|sqlite`) y **seleccionar el store** en
|
||||||
|
el arranque: SQLite (default, single-node/dev) o `jetstreamStore` (cluster). Resolver el
|
||||||
|
"ciclo bootstrap" del authenticator interno (el authenticator necesita el store para
|
||||||
|
`IsAuthorized`, y el store KV necesita el NATS arrancado). Mantener branch-by-abstraction:
|
||||||
|
con el flag off, comportamiento idéntico al actual. `IsAuthorized`/lecturas sobre KV
|
||||||
|
**fail-closed** ante pérdida de quorum/timeout (ya implementado en `jetstreamStore` —
|
||||||
|
verificar que el wiring lo preserva).
|
||||||
|
|
||||||
|
**DoD:** con `decentralized: on` + cluster, el control plane sirve desde el KV replicado y un
|
||||||
|
nodo nuevo ve las rooms creadas en otro (cierra la divergencia de estado que nota N5).
|
||||||
|
Fail-closed: simular KV no disponible → deniega. Con flag off, suite idéntica al baseline.
|
||||||
|
|
||||||
|
## 0006d — N1 (ALTA): posture homogénea del cluster
|
||||||
|
|
||||||
|
**Hallazgo:** el cluster es tan seguro como su nodo más débil; un nodo sin authenticator o
|
||||||
|
`--bus-auth off` deja a un peer no autenticado `Subscribe(">")` y cosechar el tráfico
|
||||||
|
reenviado de los nodos con ACL.
|
||||||
|
|
||||||
|
**Fix:** garantizar (en arranque/health) que todos los nodos corren `enforce`+ACL+TLS;
|
||||||
|
rechazar formar cluster con un peer en posture inferior, o como mínimo documentar y exponer
|
||||||
|
un health que lo detecte. Nunca exponer el puerto de cliente de un nodo sin enforce.
|
||||||
|
|
||||||
|
**DoD:** reproducir `TestAttack0008_N1` escenario 2 (cluster con un nodo `withACL=false`) →
|
||||||
|
el arranque/health lo rechaza o lo señala; documentar la garantía.
|
||||||
|
|
||||||
|
## 0006e — N4 (MEDIA): RefreshSession en los clientes
|
||||||
|
|
||||||
|
**Hallazgo:** la ACL congela permisos al conectar; un peer que crea/se une a una room debe
|
||||||
|
llamar `client.RefreshSession()` para pub/sub en su subject. **Ningún cliente lo llama**
|
||||||
|
(`cmd/chat`, `cmd/worker`, `mobile`, `gateway`). Es fail-closed (deniega), pero rompe la
|
||||||
|
usabilidad bajo `enforce`+ACL → empuja al operador a desactivar la ACL (regresión de
|
||||||
|
seguridad a discreción del operador).
|
||||||
|
|
||||||
|
**Fix:** llamar `RefreshSession` tras cambios de membresía en `cmd/chat`/`cmd/worker` (y
|
||||||
|
documentar el contrato para `mobile`/`gateway`), o implementar refresh transparente (rehacer
|
||||||
|
suscripciones automáticamente al unirse a una room).
|
||||||
|
|
||||||
|
**DoD:** test que crea/une room bajo enforce+ACL y publica/recibe SIN intervención manual
|
||||||
|
(el cliente refresca solo o el demo llama RefreshSession). Documentar el requisito.
|
||||||
|
|
||||||
|
## 0006f — bajos: CA de routes, secreto de cluster, migrate-to-kv, R1≠HA
|
||||||
|
|
||||||
|
- **N1 (BAJA):** CA **separada** para las routes del cluster (no reusar la CA del data plane
|
||||||
|
de clientes); pasar el secreto de cluster por **archivo/env**, no por `--routes
|
||||||
|
nats://user:pass@host` en argv (hoy visible en `ps`/`journald`).
|
||||||
|
- **N6 (BAJA):** `migrate-to-kv` solo en loopback o con TLS (hoy el allowlist viaja plaintext
|
||||||
|
si `--nats-url` remoto sin `--ca`).
|
||||||
|
- **N3-DoS (MEDIA, doc):** documentar que el nonce/control plane en **R1 es SPOF de auth** (su
|
||||||
|
caída rechaza todos los requests autenticados); R3 (quorum 2/3) es la condición de HA real.
|
||||||
|
No vender R1 como HA.
|
||||||
|
|
||||||
|
## 0006g — preparar el material de deploy del cluster (3 nodos)
|
||||||
|
|
||||||
|
Los tres nodos del cluster están decididos: **magnus + homer + datardos** (3 VPS OVH →
|
||||||
|
quorum R3 real, tolera la caída de uno). Datos: homer `141.94.69.66`; datardos `ssh dd`
|
||||||
|
`51.91.100.142` (WG `datardos-wg` 10.21.0.x); magnus en `pass` (`MAGNUS_ovh_ssh_ROOT`).
|
||||||
|
|
||||||
|
**Preparar (NO ejecutar en los VPS — eso es 0003f, lo hace el humano):** dejar en
|
||||||
|
`deploy/cluster/` el material parametrizado por nodo:
|
||||||
|
- `generate-cluster-certs.sh` — CA propia del cluster (separada de la de clientes, ver 0006f)
|
||||||
|
+ un server cert por nodo con SAN = su IP pública + su IP WG + hostname.
|
||||||
|
- una plantilla de systemd unit por nodo (`membershipd@.service` o tres units) con
|
||||||
|
`--bind 0.0.0.0 --bus-auth enforce --tls-cert … --cluster-name unibus --routes
|
||||||
|
nats://…@<otros-2-nodos> --store kv` y `Restart=always`, secreto de cluster por archivo/env.
|
||||||
|
- `deploy-cluster.sh` (cross-build linux + rsync por nodo + plan de arranque escalonado).
|
||||||
|
- un `README.md` con el runbook: orden de arranque, seed del admin, `migrate-to-kv` (loopback/TLS),
|
||||||
|
escalado de réplicas a R3 (`nats stream update --replicas 3`), verificación de quorum y chaos
|
||||||
|
test (matar un nodo). Marcar claramente qué pasos toca el humano.
|
||||||
|
|
||||||
|
**DoD:** el material existe y es coherente (los certs cubren los 3 nodos; las units referencian
|
||||||
|
los routes correctos); un `bash -n` de los scripts pasa; el README describe el deploy end-to-end.
|
||||||
|
NO se toca ningún VPS desde el agente.
|
||||||
|
|
||||||
|
# Fuera de alcance (otros issues / operacional)
|
||||||
|
|
||||||
|
- **H8** (CA generada/custodiada en om) → operacional del deploy 0003f.
|
||||||
|
- **H9/H10/H11** → issue 0002 / futuro.
|
||||||
|
- **Object Store (blobs) vía `$JS.API.>`**: el report 0008 lo marca como "probable misma
|
||||||
|
clase que N2, no verificado" (impacto menor: blobs son ciphertext E2E). El fix de 0006b
|
||||||
|
(denegar `OBJ_*`) lo cubre; verificar.
|
||||||
|
- **Chaos test de red real** (matar 1/3, 2/3, split-brain) → 0003f (requiere 3 VPS).
|
||||||
|
|
||||||
|
# Definition of Done global
|
||||||
|
|
||||||
|
- `TestAttack0008_N3` → replay cross-node **401**; `TestAttack0008_N2` → eve no lee buckets KV;
|
||||||
|
`TestAttack0008_N1` → nodo débil rechazado/señalado. Portados como regresión.
|
||||||
|
- Con `decentralized: on`: control plane sobre KV replicado, fail-closed verificado, estado
|
||||||
|
consistente entre nodos. Con flag off: baseline idéntico.
|
||||||
|
- Clientes operan bajo `enforce`+ACL sin intervención manual (RefreshSession resuelto).
|
||||||
|
- `CGO_ENABLED=0 go build ./... && go vet ./... && go test ./...` verdes + `govulncheck` 0.
|
||||||
|
- Veredicto re-evaluado: el bus DESCENTRALIZADO pasa de "NO" a "sí-con-condiciones" (3 nodos
|
||||||
|
R3 para HA real, posture homogénea, CA en om).
|
||||||
@@ -1,25 +1,28 @@
|
|||||||
module github.com/enmanuel/unibus
|
module github.com/enmanuel/unibus
|
||||||
|
|
||||||
go 1.25.0
|
go 1.26.4
|
||||||
|
|
||||||
replace fn-registry => ../../../../
|
replace fn-registry => ../../../../
|
||||||
|
|
||||||
require (
|
require (
|
||||||
fn-registry v0.0.0-00010101000000-000000000000
|
fn-registry v0.0.0-00010101000000-000000000000
|
||||||
github.com/nats-io/nats-server/v2 v2.10.22
|
github.com/nats-io/nats-server/v2 v2.11.15
|
||||||
github.com/nats-io/nats.go v1.37.0
|
github.com/nats-io/nats.go v1.49.0
|
||||||
|
github.com/nats-io/nkeys v0.4.15
|
||||||
github.com/oklog/ulid/v2 v2.1.0
|
github.com/oklog/ulid/v2 v2.1.0
|
||||||
|
golang.org/x/time v0.15.0
|
||||||
modernc.org/sqlite v1.47.0
|
modernc.org/sqlite v1.47.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/antithesishq/antithesis-sdk-go v0.6.0-default-no-op // indirect
|
||||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||||
|
github.com/google/go-tpm v0.9.8 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
github.com/klauspost/compress v1.18.3 // indirect
|
github.com/klauspost/compress v1.18.4 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
github.com/minio/highwayhash v1.0.3 // indirect
|
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 // indirect
|
||||||
github.com/nats-io/jwt/v2 v2.5.8 // indirect
|
github.com/nats-io/jwt/v2 v2.8.1 // indirect
|
||||||
github.com/nats-io/nkeys v0.4.7 // indirect
|
|
||||||
github.com/nats-io/nuid v1.0.1 // indirect
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||||
@@ -28,8 +31,6 @@ require (
|
|||||||
golang.org/x/mod v0.36.0 // indirect
|
golang.org/x/mod v0.36.0 // indirect
|
||||||
golang.org/x/sync v0.20.0 // indirect
|
golang.org/x/sync v0.20.0 // indirect
|
||||||
golang.org/x/sys v0.44.0 // indirect
|
golang.org/x/sys v0.44.0 // indirect
|
||||||
golang.org/x/text v0.37.0 // indirect
|
|
||||||
golang.org/x/time v0.7.0 // indirect
|
|
||||||
golang.org/x/tools v0.45.0 // indirect
|
golang.org/x/tools v0.45.0 // indirect
|
||||||
modernc.org/libc v1.70.0 // indirect
|
modernc.org/libc v1.70.0 // indirect
|
||||||
modernc.org/mathutil v1.7.1 // indirect
|
modernc.org/mathutil v1.7.1 // indirect
|
||||||
|
|||||||
@@ -1,25 +1,31 @@
|
|||||||
|
github.com/antithesishq/antithesis-sdk-go v0.6.0-default-no-op h1:kpBdlEPbRvff0mDD1gk7o9BhI16b9p5yYAXRlidpqJE=
|
||||||
|
github.com/antithesishq/antithesis-sdk-go v0.6.0-default-no-op/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E=
|
||||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||||
|
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||||
|
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
|
github.com/google/go-tpm v0.9.8 h1:slArAR9Ft+1ybZu0lBwpSmpwhRXaa85hWtMinMyRAWo=
|
||||||
|
github.com/google/go-tpm v0.9.8/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY=
|
||||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
|
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
|
||||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||||
github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
|
github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
|
||||||
github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q=
|
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 h1:KGuD/pM2JpL9FAYvBrnBBeENKZNh6eNtjqytV6TYjnk=
|
||||||
github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
|
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
|
||||||
github.com/nats-io/jwt/v2 v2.5.8 h1:uvdSzwWiEGWGXf+0Q+70qv6AQdvcvxrv9hPM0RiPamE=
|
github.com/nats-io/jwt/v2 v2.8.1 h1:V0xpGuD/N8Mi+fQNDynXohVvp7ZztevW5io8CUWlPmU=
|
||||||
github.com/nats-io/jwt/v2 v2.5.8/go.mod h1:ZdWS1nZa6WMZfFwwgpEaqBV8EPGVgOTDHN/wTbz0Y5A=
|
github.com/nats-io/jwt/v2 v2.8.1/go.mod h1:nWnOEEiVMiKHQpnAy4eXlizVEtSfzacZ1Q43LIRavZg=
|
||||||
github.com/nats-io/nats-server/v2 v2.10.22 h1:Yt63BGu2c3DdMoBZNcR6pjGQwk/asrKU7VX846ibxDA=
|
github.com/nats-io/nats-server/v2 v2.11.15 h1:StSf9TINInaZtr4oww2+kXmfwa9SkN//g/LwS19/UJ0=
|
||||||
github.com/nats-io/nats-server/v2 v2.10.22/go.mod h1:X/m1ye9NYansUXYFrbcDwUi/blHkrgHh2rgCJaakonk=
|
github.com/nats-io/nats-server/v2 v2.11.15/go.mod h1:zwhv8Y0PE3KHyKgznJc/9Xoai638SaJd83zzJ5GJn74=
|
||||||
github.com/nats-io/nats.go v1.37.0 h1:07rauXbVnnJvv1gfIyghFEo6lUcYRY0WXc3x7x0vUxE=
|
github.com/nats-io/nats.go v1.49.0 h1:yh/WvY59gXqYpgl33ZI+XoVPKyut/IcEaqtsiuTJpoE=
|
||||||
github.com/nats-io/nats.go v1.37.0/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
|
github.com/nats-io/nats.go v1.49.0/go.mod h1:fDCn3mN5cY8HooHwE2ukiLb4p4G4ImmzvXyJt+tGwdw=
|
||||||
github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
|
github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4=
|
||||||
github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
|
github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs=
|
||||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||||
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||||
@@ -41,12 +47,14 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|||||||
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
|
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
|
||||||
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||||
golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc=
|
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
|
||||||
golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38=
|
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
|
||||||
golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ=
|
|
||||||
golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
|
||||||
golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
|
golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
|
||||||
golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
|
golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
|
||||||
|
golang.org/x/tools/go/expect v0.1.1-deprecated h1:jpBZDwmgPhXsKZC6WhL20P4b/wmnpsEAGHaNy0n/rJM=
|
||||||
|
golang.org/x/tools/go/expect v0.1.1-deprecated/go.mod h1:eihoPOH+FgIqa3FpoTwguz/bVUSGBlGQU67vpBeOrBY=
|
||||||
|
golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated h1:1h2MnaIAIXISqTFKdENegdpAgUXz6NrPEsbIeWaBRvM=
|
||||||
|
golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated/go.mod h1:RVAQXBGNv1ib0J382/DPCRS/BPnsGebyM1Gj5VSDpG8=
|
||||||
modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
|
modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
|
||||||
modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
||||||
modernc.org/ccgo/v4 v4.32.0 h1:hjG66bI/kqIPX1b2yT6fr/jt+QedtP2fqojG2VrFuVw=
|
modernc.org/ccgo/v4 v4.32.0 h1:hjG66bI/kqIPX1b2yT6fr/jt+QedtP2fqojG2VrFuVw=
|
||||||
|
|||||||
@@ -0,0 +1,22 @@
|
|||||||
|
-- 002_users.sql — bus-level user directory (issue 0001a).
|
||||||
|
--
|
||||||
|
-- The authoritative allowlist of identities permitted to use the bus, independent
|
||||||
|
-- of room membership. A user is identified by its Ed25519 signing public key (the
|
||||||
|
-- same key that derives the endpoint via frame.EndpointID); roles gate admin-only
|
||||||
|
-- control-plane operations; status enables revocation without deleting history.
|
||||||
|
--
|
||||||
|
-- Additive and idempotent: safe to apply repeatedly. Never modify this file;
|
||||||
|
-- further schema changes go in new numbered migrations (see
|
||||||
|
-- .claude/rules/db_migrations.md). The embedded copy under
|
||||||
|
-- pkg/membership/migrations/002_users.sql mirrors this file byte-for-byte.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS users (
|
||||||
|
sign_pub TEXT PRIMARY KEY, -- Ed25519 public key in lowercase hex (peer identity)
|
||||||
|
handle TEXT NOT NULL, -- human-readable name (unique recommended, not enforced as PK)
|
||||||
|
role TEXT NOT NULL DEFAULT 'member', -- 'admin' | 'member'
|
||||||
|
status TEXT NOT NULL DEFAULT 'active', -- 'active' | 'revoked'
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
revoked_at TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_users_status ON users(status);
|
||||||
Executable
+37
@@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Regenera el binding gomobile (unibus.aar) a partir de ./mobile sobre pkg/client.
|
||||||
|
#
|
||||||
|
# El .aar (~38 MB, con libgojni.so para 4 ABIs) NO se versiona: es un artefacto
|
||||||
|
# de build reproducible. Este script lo regenera. Requisitos:
|
||||||
|
# - Go con gomobile/gobind instalados:
|
||||||
|
# go install golang.org/x/mobile/cmd/gomobile@latest
|
||||||
|
# go install golang.org/x/mobile/cmd/gobind@latest
|
||||||
|
# gomobile init
|
||||||
|
# - Android NDK (este repo usó 26.3.11579264 dentro del Android SDK).
|
||||||
|
#
|
||||||
|
# En un worktree fuera del árbol del registry, pkg/client importa
|
||||||
|
# "fn-registry/functions/cybersecurity" vía el `replace` del go.mod. Si ese
|
||||||
|
# replace relativo no resuelve (p. ej. worktree en /tmp), crea un go.work local
|
||||||
|
# (gitignored) con: replace fn-registry => /ruta/absoluta/a/fn_registry
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
|
||||||
|
: "${ANDROID_HOME:=$HOME/android-sdk}"
|
||||||
|
: "${ANDROID_NDK_HOME:=$ANDROID_HOME/ndk/26.3.11579264}"
|
||||||
|
export ANDROID_HOME ANDROID_NDK_HOME
|
||||||
|
export PATH="$HOME/go/bin:$PATH"
|
||||||
|
|
||||||
|
OUT="android/app/libs/unibus.aar"
|
||||||
|
mkdir -p "$(dirname "$OUT")"
|
||||||
|
|
||||||
|
echo "==> gomobile bind -> $OUT"
|
||||||
|
gomobile bind \
|
||||||
|
-target=android \
|
||||||
|
-androidapi 21 \
|
||||||
|
-javapkg com.unibus.core \
|
||||||
|
-o "$OUT" \
|
||||||
|
./mobile
|
||||||
|
|
||||||
|
echo "==> OK: $OUT"
|
||||||
|
ls -lh "$OUT"
|
||||||
+84
-9
@@ -22,9 +22,14 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// FrameListener receives decrypted messages for a subscribed room. The Android
|
// FrameListener receives decrypted messages for a subscribed room. The Android
|
||||||
// side implements this interface. Its methods are invoked from a NATS delivery
|
// side implements this interface.
|
||||||
// goroutine, so implementations must hop back to the UI thread (for example via
|
//
|
||||||
// a coroutine on the main dispatcher) before touching Android views.
|
// IMPORTANT (threading): OnFrame is invoked from a NATS delivery goroutine, NOT
|
||||||
|
// the Android main thread. A Kotlin implementation MUST hop back to the UI
|
||||||
|
// thread before touching any Compose state or Android view — for example with
|
||||||
|
// `withContext(Dispatchers.Main)` from a coroutine, or by posting to a
|
||||||
|
// MutableStateFlow that the UI collects. Touching views directly from here
|
||||||
|
// crashes with CalledFromWrongThreadException.
|
||||||
type FrameListener interface {
|
type FrameListener interface {
|
||||||
OnFrame(roomID string, sender string, msgID string, text string)
|
OnFrame(roomID string, sender string, msgID string, text string)
|
||||||
}
|
}
|
||||||
@@ -37,21 +42,26 @@ type Session struct {
|
|||||||
|
|
||||||
// GenerateIdentity creates (or loads) the long-term keypair stored at path.
|
// GenerateIdentity creates (or loads) the long-term keypair stored at path.
|
||||||
// Call it once on first launch. The resulting file holds the peer's private
|
// Call it once on first launch. The resulting file holds the peer's private
|
||||||
// Ed25519 and X25519 keys and must be kept private to the app sandbox.
|
// Ed25519 and X25519 keys and must be kept private to the app sandbox
|
||||||
|
// (use Context.getFilesDir() on Android).
|
||||||
func GenerateIdentity(path string) error {
|
func GenerateIdentity(path string) error {
|
||||||
_, err := client.LoadOrCreateIdentity(path)
|
_, err := client.LoadOrCreateIdentity(path)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewSession loads the identity at idPath and connects to the bus. natsURL is
|
// NewSession loads the identity at idPath and connects to the bus. natsURL is
|
||||||
// the data plane (for example nats://host:4250) and ctrlURL is the control
|
// the data plane (for example tls://host:4250) and ctrlURL is the control plane
|
||||||
// plane HTTP endpoint (for example http://host:8470).
|
// HTTP endpoint (for example https://host:8470). caPath is the path to the bus
|
||||||
func NewSession(idPath, natsURL, ctrlURL string) (*Session, error) {
|
// CA certificate (ca.crt) bundled with the app: when set, the session connects
|
||||||
|
// securely (TLS pinned to that CA + nkey authentication on the data plane),
|
||||||
|
// matching a bus running with auth + TLS. Pass an empty caPath to connect in
|
||||||
|
// plaintext to an unsecured (dev) bus.
|
||||||
|
func NewSession(idPath, natsURL, ctrlURL, caPath string) (*Session, error) {
|
||||||
id, err := client.LoadOrCreateIdentity(idPath)
|
id, err := client.LoadOrCreateIdentity(idPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
c, err := client.New(natsURL, ctrlURL, id)
|
c, err := client.Connect(natsURL, ctrlURL, id, caPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -64,9 +74,24 @@ func (s *Session) EndpointID() string {
|
|||||||
return s.c.Endpoint().ID
|
return s.c.Endpoint().ID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ConnectedServer returns the NATS URL the session is currently connected to,
|
||||||
|
// useful for surfacing a "connected to" hint in the UI.
|
||||||
|
func (s *Session) ConnectedServer() string {
|
||||||
|
return s.c.ConnectedServer()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsConnected reports whether the underlying NATS connection is live.
|
||||||
|
func (s *Session) IsConnected() bool {
|
||||||
|
return s.c.IsConnected()
|
||||||
|
}
|
||||||
|
|
||||||
// CreateRoom opens a room on the given subject. mode is "matrix" for the
|
// CreateRoom opens a room on the given subject. mode is "matrix" for the
|
||||||
// encrypted, persisted and signed policy, or "nats" for plain cleartext. It
|
// encrypted, persisted and signed policy, or "nats" for plain cleartext. It
|
||||||
// returns the room id used by Join, Publish and Subscribe.
|
// returns the room id used by Join, Publish and Subscribe.
|
||||||
|
//
|
||||||
|
// On a secured bus, call RefreshSession after CreateRoom and before
|
||||||
|
// Subscribe/Publish so the bus re-derives this peer's per-subject permissions
|
||||||
|
// from its new membership (issue 0006e).
|
||||||
func (s *Session) CreateRoom(subject, mode string) (string, error) {
|
func (s *Session) CreateRoom(subject, mode string) (string, error) {
|
||||||
p := room.ModeNATS
|
p := room.ModeNATS
|
||||||
if mode == "matrix" {
|
if mode == "matrix" {
|
||||||
@@ -81,13 +106,27 @@ func (s *Session) Join(roomID string) error {
|
|||||||
return s.c.Join(roomID)
|
return s.c.Join(roomID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RefreshSession reconnects the data plane so the bus re-derives this peer's
|
||||||
|
// per-subject permissions from its current room membership.
|
||||||
|
//
|
||||||
|
// Membership-change contract (issue 0006e): a secured bus (--bus-auth enforce)
|
||||||
|
// freezes a connection's permissions at connect time. After ANY membership change
|
||||||
|
// — a room you just created, were invited to, or joined — call RefreshSession
|
||||||
|
// BEFORE Publish/Subscribe on that room, or the bus denies the new room's subject.
|
||||||
|
// It also drops active subscriptions, so re-Subscribe afterwards. On an unsecured
|
||||||
|
// bus it is a harmless reconnect. A mobile/gateway caller wires this exactly like
|
||||||
|
// cmd/chat and cmd/worker do: CreateRoom -> RefreshSession -> Subscribe/Publish.
|
||||||
|
func (s *Session) RefreshSession() error {
|
||||||
|
return s.c.RefreshSession()
|
||||||
|
}
|
||||||
|
|
||||||
// Publish sends a UTF-8 text message to the room.
|
// Publish sends a UTF-8 text message to the room.
|
||||||
func (s *Session) Publish(roomID, text string) error {
|
func (s *Session) Publish(roomID, text string) error {
|
||||||
return s.c.Publish(roomID, []byte(text))
|
return s.c.Publish(roomID, []byte(text))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Subscribe streams decrypted messages of the room to the listener until the
|
// Subscribe streams decrypted messages of the room to the listener until the
|
||||||
// session is closed.
|
// session is closed. See FrameListener for the threading contract.
|
||||||
func (s *Session) Subscribe(roomID string, l FrameListener) error {
|
func (s *Session) Subscribe(roomID string, l FrameListener) error {
|
||||||
_, err := s.c.Subscribe(roomID, func(f frame.Frame, plaintext []byte) {
|
_, err := s.c.Subscribe(roomID, func(f frame.Frame, plaintext []byte) {
|
||||||
l.OnFrame(roomID, f.Sender, f.MsgID, string(plaintext))
|
l.OnFrame(roomID, f.Sender, f.MsgID, string(plaintext))
|
||||||
@@ -95,6 +134,42 @@ func (s *Session) Subscribe(roomID string, l FrameListener) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// roomJSON is the flat shape returned by ListRoomsJSON for each room the peer
|
||||||
|
// belongs to. It mirrors the fields the UI needs to render a room list item.
|
||||||
|
type roomJSON struct {
|
||||||
|
RoomID string `json:"room_id"`
|
||||||
|
Subject string `json:"subject"`
|
||||||
|
Epoch int `json:"epoch"`
|
||||||
|
Encrypted bool `json:"encrypted"`
|
||||||
|
Role string `json:"role"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListRoomsJSON returns the peer's rooms as a JSON array string. gomobile does
|
||||||
|
// not bind slices of structs cleanly across the boundary, so the list is
|
||||||
|
// marshalled to JSON and the Kotlin side decodes it (kotlinx.serialization).
|
||||||
|
// Each element is a roomJSON object.
|
||||||
|
func (s *Session) ListRoomsJSON() (string, error) {
|
||||||
|
refs, err := s.c.ListMyRooms()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
out := make([]roomJSON, 0, len(refs))
|
||||||
|
for _, r := range refs {
|
||||||
|
out = append(out, roomJSON{
|
||||||
|
RoomID: r.RoomID,
|
||||||
|
Subject: r.Subject,
|
||||||
|
Epoch: r.Epoch,
|
||||||
|
Encrypted: r.Policy.Encrypt,
|
||||||
|
Role: r.Role,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
b, err := json.Marshal(out)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return string(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
// cardJSON is the portable, copy-pasteable public identity a peer shares so a
|
// cardJSON is the portable, copy-pasteable public identity a peer shares so a
|
||||||
// room owner can invite it to an encrypted room. It carries no secret: only the
|
// room owner can invite it to an encrypted room. It carries no secret: only the
|
||||||
// endpoint id and the two public keys (signing + key-exchange), base64-encoded
|
// endpoint id and the two public keys (signing + key-exchange), base64-encoded
|
||||||
|
|||||||
+27
-10
@@ -1,9 +1,15 @@
|
|||||||
// Package blobstore is a content-addressed object store on local disk.
|
// Package blobstore is a content-addressed object store for media ciphertext.
|
||||||
//
|
//
|
||||||
// The bus transports messages, not blobs. Media (images, files, large payloads)
|
// The bus transports messages, not blobs. Media (images, files, large payloads)
|
||||||
// is encrypted by the client BEFORE being stored here, so the store only ever
|
// is encrypted by the client BEFORE being stored here, so the store only ever
|
||||||
// sees ciphertext. Objects are addressed by the sha256 hex of their (encrypted)
|
// sees ciphertext. Objects are addressed by the sha256 hex of their (encrypted)
|
||||||
// bytes, which makes Put idempotent and deduplicating.
|
// bytes, which makes Put idempotent and deduplicating.
|
||||||
|
//
|
||||||
|
// Store is an interface (branch-by-abstraction, issue 0003d) with two backends:
|
||||||
|
// diskStore (the default, local filesystem) and objectStore (NATS Object Store
|
||||||
|
// on JetStream, replicated across the cluster so blobs survive a node loss and
|
||||||
|
// are reachable from any node). The wire contract (sha256-hex addressing) is
|
||||||
|
// identical, so a client cannot tell which backend a membershipd uses.
|
||||||
package blobstore
|
package blobstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -14,27 +20,38 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Store is a directory-backed content-addressed blob store.
|
// Store is a content-addressed blob store: Put returns the sha256-hex address of
|
||||||
type Store struct {
|
// the stored bytes, Get fetches by that address, Has reports presence.
|
||||||
|
type Store interface {
|
||||||
|
Put(data []byte) (string, error)
|
||||||
|
Get(hash string) ([]byte, error)
|
||||||
|
Has(hash string) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// diskStore is a directory-backed content-addressed blob store (the default,
|
||||||
|
// single-node backend).
|
||||||
|
type diskStore struct {
|
||||||
dir string
|
dir string
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a Store rooted at dir, creating the directory if needed.
|
// New creates a disk-backed Store rooted at dir, creating the directory if
|
||||||
func New(dir string) (*Store, error) {
|
// needed. It remains the default backend; the replicated NATS Object Store is
|
||||||
|
// constructed separately (NewObjectStore) when decentralization is enabled.
|
||||||
|
func New(dir string) (Store, error) {
|
||||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||||
return nil, fmt.Errorf("blobstore: mkdir %q: %w", dir, err)
|
return nil, fmt.Errorf("blobstore: mkdir %q: %w", dir, err)
|
||||||
}
|
}
|
||||||
return &Store{dir: dir}, nil
|
return &diskStore{dir: dir}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// path returns the on-disk path for a given content hash.
|
// path returns the on-disk path for a given content hash.
|
||||||
func (s *Store) path(hash string) string {
|
func (s *diskStore) path(hash string) string {
|
||||||
return filepath.Join(s.dir, hash)
|
return filepath.Join(s.dir, hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Put writes data to the store and returns its sha256 hex hash. If an object
|
// Put writes data to the store and returns its sha256 hex hash. If an object
|
||||||
// with the same content already exists, Put is a no-op and returns the hash.
|
// with the same content already exists, Put is a no-op and returns the hash.
|
||||||
func (s *Store) Put(data []byte) (string, error) {
|
func (s *diskStore) Put(data []byte) (string, error) {
|
||||||
sum := sha256.Sum256(data)
|
sum := sha256.Sum256(data)
|
||||||
hash := hex.EncodeToString(sum[:])
|
hash := hex.EncodeToString(sum[:])
|
||||||
p := s.path(hash)
|
p := s.path(hash)
|
||||||
@@ -66,7 +83,7 @@ func (s *Store) Put(data []byte) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get reads the object with the given hash.
|
// Get reads the object with the given hash.
|
||||||
func (s *Store) Get(hash string) ([]byte, error) {
|
func (s *diskStore) Get(hash string) ([]byte, error) {
|
||||||
data, err := os.ReadFile(s.path(hash))
|
data, err := os.ReadFile(s.path(hash))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("blobstore: get %q: %w", hash, err)
|
return nil, fmt.Errorf("blobstore: get %q: %w", hash, err)
|
||||||
@@ -75,7 +92,7 @@ func (s *Store) Get(hash string) ([]byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Has reports whether an object with the given hash exists.
|
// Has reports whether an object with the given hash exists.
|
||||||
func (s *Store) Has(hash string) bool {
|
func (s *diskStore) Has(hash string) bool {
|
||||||
_, err := os.Stat(s.path(hash))
|
_, err := os.Stat(s.path(hash))
|
||||||
return err == nil
|
return err == nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,102 @@
|
|||||||
|
package blobstore
|
||||||
|
|
||||||
|
// objectStore is the NATS Object Store implementation of Store (issue 0003d):
|
||||||
|
// media ciphertext lives in a JetStream Object Store bucket replicated across
|
||||||
|
// the cluster, so a blob uploaded to one node is durable against the loss of a
|
||||||
|
// node and readable from any node. It is selected when decentralization is on;
|
||||||
|
// diskStore stays the single-node default. The content-addressing (sha256-hex)
|
||||||
|
// is identical to the disk backend, so the wire contract does not change.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultObjectBucket = "UNIBUS_blobs"
|
||||||
|
defaultObjOpTime = 10 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
// ObjectStoreConfig configures the replicated Object Store backend.
|
||||||
|
type ObjectStoreConfig struct {
|
||||||
|
// Bucket is the object store bucket name; empty uses UNIBUS_blobs.
|
||||||
|
Bucket string
|
||||||
|
// Replicas is the replication factor (R1..R5), matching the KV store's
|
||||||
|
// R1->R3 rollout.
|
||||||
|
Replicas int
|
||||||
|
// OpTimeout bounds each object operation; zero uses defaultObjOpTime.
|
||||||
|
OpTimeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type objectStore struct {
|
||||||
|
os jetstream.ObjectStore
|
||||||
|
opTimeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewObjectStore creates (or opens) the replicated Object Store bucket on js and
|
||||||
|
// returns it as a Store. The JetStream context belongs to the caller.
|
||||||
|
func NewObjectStore(js jetstream.JetStream, cfg ObjectStoreConfig) (Store, error) {
|
||||||
|
if cfg.Bucket == "" {
|
||||||
|
cfg.Bucket = defaultObjectBucket
|
||||||
|
}
|
||||||
|
if cfg.Replicas <= 0 {
|
||||||
|
cfg.Replicas = 1
|
||||||
|
}
|
||||||
|
opTimeout := cfg.OpTimeout
|
||||||
|
if opTimeout <= 0 {
|
||||||
|
opTimeout = defaultObjOpTime
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
obj, err := js.CreateOrUpdateObjectStore(ctx, jetstream.ObjectStoreConfig{
|
||||||
|
Bucket: cfg.Bucket,
|
||||||
|
Replicas: cfg.Replicas,
|
||||||
|
Storage: jetstream.FileStorage,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("blobstore: open object store %q (replicas=%d): %w", cfg.Bucket, cfg.Replicas, err)
|
||||||
|
}
|
||||||
|
return &objectStore{os: obj, opTimeout: opTimeout}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *objectStore) ctx() (context.Context, context.CancelFunc) {
|
||||||
|
return context.WithTimeout(context.Background(), s.opTimeout)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put stores data under its sha256-hex address. Re-putting identical bytes is a
|
||||||
|
// harmless overwrite (same address, same content), preserving the idempotent,
|
||||||
|
// deduplicating semantics of the disk backend.
|
||||||
|
func (s *objectStore) Put(data []byte) (string, error) {
|
||||||
|
sum := sha256.Sum256(data)
|
||||||
|
hash := hex.EncodeToString(sum[:])
|
||||||
|
ctx, cancel := s.ctx()
|
||||||
|
defer cancel()
|
||||||
|
if _, err := s.os.PutBytes(ctx, hash, data); err != nil {
|
||||||
|
return "", fmt.Errorf("blobstore: put object %q: %w", hash, err)
|
||||||
|
}
|
||||||
|
return hash, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get fetches the object by its hash address.
|
||||||
|
func (s *objectStore) Get(hash string) ([]byte, error) {
|
||||||
|
ctx, cancel := s.ctx()
|
||||||
|
defer cancel()
|
||||||
|
data, err := s.os.GetBytes(ctx, hash)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("blobstore: get object %q: %w", hash, err)
|
||||||
|
}
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Has reports whether an object with the given hash exists.
|
||||||
|
func (s *objectStore) Has(hash string) bool {
|
||||||
|
ctx, cancel := s.ctx()
|
||||||
|
defer cancel()
|
||||||
|
_, err := s.os.GetInfo(ctx, hash)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,132 @@
|
|||||||
|
package blobstore_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"net"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
func objFreePort(t *testing.T) int {
|
||||||
|
t.Helper()
|
||||||
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("free port: %v", err)
|
||||||
|
}
|
||||||
|
defer l.Close()
|
||||||
|
return l.Addr().(*net.TCPAddr).Port
|
||||||
|
}
|
||||||
|
|
||||||
|
// newObjectStore boots a single-node embedded NATS with JetStream and returns a
|
||||||
|
// replicated (R1) Object Store backend over it.
|
||||||
|
func newObjectStore(t *testing.T) blobstore.Store {
|
||||||
|
t.Helper()
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(),
|
||||||
|
Host: "127.0.0.1",
|
||||||
|
Port: objFreePort(t),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("embedded nats: %v", err)
|
||||||
|
}
|
||||||
|
nc, err := nats.Connect(ns.ClientURL())
|
||||||
|
if err != nil {
|
||||||
|
ns.Shutdown()
|
||||||
|
t.Fatalf("nats connect: %v", err)
|
||||||
|
}
|
||||||
|
js, err := jetstream.New(nc)
|
||||||
|
if err != nil {
|
||||||
|
nc.Close()
|
||||||
|
ns.Shutdown()
|
||||||
|
t.Fatalf("jetstream: %v", err)
|
||||||
|
}
|
||||||
|
st, err := blobstore.NewObjectStore(js, blobstore.ObjectStoreConfig{Replicas: 1, OpTimeout: 5 * time.Second})
|
||||||
|
if err != nil {
|
||||||
|
nc.Close()
|
||||||
|
ns.Shutdown()
|
||||||
|
t.Fatalf("new object store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { nc.Close(); ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
return st
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestObjectStoreRoundTrip is the golden path: put ciphertext, get it back by
|
||||||
|
// its hash, Has reports presence, and re-putting identical bytes returns the
|
||||||
|
// same address (content-addressed dedup).
|
||||||
|
func TestObjectStoreRoundTrip(t *testing.T) {
|
||||||
|
s := newObjectStore(t)
|
||||||
|
data := []byte("encrypted-media-ciphertext-payload")
|
||||||
|
|
||||||
|
hash, err := s.Put(data)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Put: %v", err)
|
||||||
|
}
|
||||||
|
want := hex.EncodeToString(sha256Sum(data))
|
||||||
|
if hash != want {
|
||||||
|
t.Fatalf("hash = %q, want sha256 hex %q", hash, want)
|
||||||
|
}
|
||||||
|
got, err := s.Get(hash)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Get: %v", err)
|
||||||
|
}
|
||||||
|
if !bytes.Equal(got, data) {
|
||||||
|
t.Fatalf("Get returned %q, want %q", got, data)
|
||||||
|
}
|
||||||
|
if !s.Has(hash) {
|
||||||
|
t.Fatalf("Has should be true for a stored blob")
|
||||||
|
}
|
||||||
|
// Re-put identical bytes: same address, no error.
|
||||||
|
hash2, err := s.Put(data)
|
||||||
|
if err != nil || hash2 != hash {
|
||||||
|
t.Fatalf("re-Put: hash2=%q err=%v (want %q)", hash2, err, hash)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestObjectStoreMissing is the edge/error path: a hash that was never stored
|
||||||
|
// is absent and unreadable.
|
||||||
|
func TestObjectStoreMissing(t *testing.T) {
|
||||||
|
s := newObjectStore(t)
|
||||||
|
missing := hex.EncodeToString(sha256Sum([]byte("never stored")))
|
||||||
|
if s.Has(missing) {
|
||||||
|
t.Fatalf("Has should be false for an unknown hash")
|
||||||
|
}
|
||||||
|
if _, err := s.Get(missing); err == nil {
|
||||||
|
t.Fatalf("Get of an unknown hash should error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestObjectStoreAddressMatchesDisk is the contract test: the Object Store and
|
||||||
|
// the disk backend address identical bytes to the IDENTICAL hash, so a client
|
||||||
|
// cannot tell which backend a node uses and a blob ref is portable across them.
|
||||||
|
func TestObjectStoreAddressMatchesDisk(t *testing.T) {
|
||||||
|
obj := newObjectStore(t)
|
||||||
|
disk, err := blobstore.New(t.TempDir())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("disk store: %v", err)
|
||||||
|
}
|
||||||
|
for _, payload := range [][]byte{[]byte("a"), []byte("longer ciphertext blob \x00\x01\x02"), {}} {
|
||||||
|
oh, err := obj.Put(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("object Put: %v", err)
|
||||||
|
}
|
||||||
|
dh, err := disk.Put(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("disk Put: %v", err)
|
||||||
|
}
|
||||||
|
if oh != dh {
|
||||||
|
t.Fatalf("address mismatch for %q: object=%q disk=%q", payload, oh, dh)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func sha256Sum(b []byte) []byte {
|
||||||
|
sum := sha256.Sum256(b)
|
||||||
|
return sum[:]
|
||||||
|
}
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
package busauth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
|
||||||
|
server "github.com/nats-io/nats-server/v2/server"
|
||||||
|
"github.com/nats-io/nkeys"
|
||||||
|
)
|
||||||
|
|
||||||
|
// nkeyAuthenticator is a NATS server.Authentication that authorizes a client by
|
||||||
|
// verifying the nkey signature over the server-presented nonce and then
|
||||||
|
// consulting the bus user allowlist. Authorization is checked on every new
|
||||||
|
// connection via the injected predicate (not a static Options.Nkeys map), so
|
||||||
|
// revoking a user denies its next connection without restarting the server.
|
||||||
|
type nkeyAuthenticator struct {
|
||||||
|
// isAuthorized reports whether the lowercase-hex Ed25519 public key behind an
|
||||||
|
// nkey belongs to an active bus user. Injected (membership.Store.IsAuthorized)
|
||||||
|
// so this package stays free of the store dependency.
|
||||||
|
isAuthorized func(signPubHex string) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewNkeyAuthenticator builds a NATS custom authenticator backed by isAuthorized.
|
||||||
|
// Pass it to embeddednats so the data plane only accepts registered identities.
|
||||||
|
func NewNkeyAuthenticator(isAuthorized func(signPubHex string) bool) server.Authentication {
|
||||||
|
return &nkeyAuthenticator{isAuthorized: isAuthorized}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check verifies the client's nkey signature against the nonce the server
|
||||||
|
// presented, then maps the nkey to its allowlist key and checks authorization.
|
||||||
|
// Any malformed input or failed verification yields false (fail closed).
|
||||||
|
func (a *nkeyAuthenticator) Check(c server.ClientAuthentication) bool {
|
||||||
|
signPubHex, ok := verifyNkey(c)
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return a.isAuthorized(signPubHex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// verifyNkey performs the shared nkey verification: it checks the client's
|
||||||
|
// signature against the server-presented nonce and returns the lowercase-hex
|
||||||
|
// Ed25519 public key behind the nkey. ok is false on any malformed input or
|
||||||
|
// failed verification (fail closed). The signature decoding mirrors
|
||||||
|
// nats-server's own (raw-url base64, then std base64 fallback) so genuine
|
||||||
|
// clients using nats.Nkey are accepted unchanged.
|
||||||
|
func verifyNkey(c server.ClientAuthentication) (signPubHex string, ok bool) {
|
||||||
|
opts := c.GetOpts()
|
||||||
|
if opts.Nkey == "" {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
sig, err := base64.RawURLEncoding.DecodeString(opts.Sig)
|
||||||
|
if err != nil {
|
||||||
|
sig, err = base64.StdEncoding.DecodeString(opts.Sig)
|
||||||
|
if err != nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub, err := nkeys.FromPublicKey(opts.Nkey)
|
||||||
|
if err != nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
if err := pub.Verify(c.GetNonce(), sig); err != nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
signPubHex, err = SignPubHexFromNkey(opts.Nkey)
|
||||||
|
if err != nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
return signPubHex, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermissionsFunc maps a connecting identity (lowercase-hex Ed25519 signing key)
|
||||||
|
// to the NATS permissions it should be granted for this connection. Returning an
|
||||||
|
// error denies the connection (fail closed). It is how the data plane enforces
|
||||||
|
// per-subject access from room membership (issue 0003e, audit H4 residual).
|
||||||
|
type PermissionsFunc func(signPubHex string) (*server.Permissions, error)
|
||||||
|
|
||||||
|
// nkeyAuthenticatorACL is the nkey authenticator that ALSO scopes the connection
|
||||||
|
// to per-subject permissions derived from room membership. NATS evaluates
|
||||||
|
// permissions once, at connect time, so a peer that joins a room after
|
||||||
|
// connecting must reconnect (client.RefreshSession) to gain that room's subject
|
||||||
|
// — the dynamic-membership reconnection model the audit deferred to this issue.
|
||||||
|
type nkeyAuthenticatorACL struct {
|
||||||
|
isAuthorized func(signPubHex string) bool
|
||||||
|
perms PermissionsFunc
|
||||||
|
// internalPubHex is the lowercase-hex Ed25519 public key of membershipd's own
|
||||||
|
// ephemeral internal service identity. A connection that proves that key is
|
||||||
|
// granted full permissions WITHOUT consulting the allowlist, so the service can
|
||||||
|
// bootstrap and manage JetStream (the replicated nonce bucket and, when
|
||||||
|
// decentralized, the control-plane KV buckets) against its own embedded server
|
||||||
|
// even while the data plane confines every client to its rooms. Empty disables
|
||||||
|
// the internal-identity path entirely (behavior identical to a plain ACL
|
||||||
|
// authenticator).
|
||||||
|
internalPubHex string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewNkeyAuthenticatorACL builds an authenticator that authorizes by the bus
|
||||||
|
// allowlist AND registers per-subject permissions from perms. A registered but
|
||||||
|
// permission-less peer can no longer subscribe to or publish on arbitrary
|
||||||
|
// subjects: it is confined to the subjects of the rooms it belongs to (plus the
|
||||||
|
// client infrastructure subjects perms includes). This is the per-subject ACL
|
||||||
|
// the 0004 hardening left as a residual.
|
||||||
|
func NewNkeyAuthenticatorACL(isAuthorized func(signPubHex string) bool, perms PermissionsFunc) server.Authentication {
|
||||||
|
return &nkeyAuthenticatorACL{isAuthorized: isAuthorized, perms: perms}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewNkeyAuthenticatorACLInternal is NewNkeyAuthenticatorACL that also recognizes
|
||||||
|
// membershipd's internal service identity (internalPubHex, the lowercase hex of
|
||||||
|
// its ephemeral Ed25519 public key): a connection proving that key is granted
|
||||||
|
// full permissions without an allowlist lookup, so the service can create and
|
||||||
|
// manage JetStream against its own embedded server under enforce (issue 0006a/c —
|
||||||
|
// the replicated nonce bucket and the control-plane KV). Every other identity
|
||||||
|
// goes through the allowlist + per-subject ACL unchanged. An empty internalPubHex
|
||||||
|
// is identical to NewNkeyAuthenticatorACL, so this is a superset and safe to use
|
||||||
|
// everywhere the plain constructor was used.
|
||||||
|
func NewNkeyAuthenticatorACLInternal(isAuthorized func(signPubHex string) bool, perms PermissionsFunc, internalPubHex string) server.Authentication {
|
||||||
|
return &nkeyAuthenticatorACL{isAuthorized: isAuthorized, perms: perms, internalPubHex: internalPubHex}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fullPermissions grants publish and subscribe on every subject (">"). It is the
|
||||||
|
// permission set for membershipd's own internal service connection, which must
|
||||||
|
// manage the JetStream control plane (nonce bucket + KV buckets) over NATS. It is
|
||||||
|
// NEVER granted to a bus user — only to the process's own ephemeral internal
|
||||||
|
// identity, recognized by exact public-key match in Check.
|
||||||
|
func fullPermissions() *server.Permissions {
|
||||||
|
sp := &server.SubjectPermission{Allow: []string{">"}}
|
||||||
|
return &server.Permissions{Publish: sp, Subscribe: sp}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check verifies the nkey, authorizes against the allowlist, then derives and
|
||||||
|
// registers the connection's subject permissions. A permissions-derivation
|
||||||
|
// error denies the connection (fail closed) rather than granting open access.
|
||||||
|
func (a *nkeyAuthenticatorACL) Check(c server.ClientAuthentication) bool {
|
||||||
|
signPubHex, ok := verifyNkey(c)
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// membershipd's own internal service identity bypasses the allowlist and is
|
||||||
|
// granted full permissions so the service can bootstrap JetStream under
|
||||||
|
// enforce. The key is matched exactly against the cryptographically verified
|
||||||
|
// connecting key, so no other identity can claim these permissions.
|
||||||
|
if a.internalPubHex != "" && signPubHex == a.internalPubHex {
|
||||||
|
c.RegisterUser(&server.User{Permissions: fullPermissions()})
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if !a.isAuthorized(signPubHex) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
perms, err := a.perms(signPubHex)
|
||||||
|
if err != nil {
|
||||||
|
return false // fail closed: never grant open access on a derivation error
|
||||||
|
}
|
||||||
|
c.RegisterUser(&server.User{Permissions: perms})
|
||||||
|
return true
|
||||||
|
}
|
||||||
@@ -0,0 +1,76 @@
|
|||||||
|
// Package busauth bridges a unibus peer's Ed25519 identity to NATS nkey
|
||||||
|
// authentication. A NATS nkey IS an Ed25519 keypair, so the bus reuses the
|
||||||
|
// peer's existing signing identity for the data plane instead of minting new
|
||||||
|
// key material — one identity authenticates both planes (HTTP request signatures
|
||||||
|
// and NATS connections), keyed in the user allowlist by the same Ed25519 public
|
||||||
|
// key.
|
||||||
|
//
|
||||||
|
// This is transport glue specific to NATS + unibus, not a general-purpose
|
||||||
|
// registry primitive: it deliberately lives in the app to avoid pulling
|
||||||
|
// github.com/nats-io/nkeys into the multi-domain registry module. The Ed25519
|
||||||
|
// signing/verification it relies on comes from the registry cybersecurity
|
||||||
|
// package; this package never reimplements a primitive.
|
||||||
|
package busauth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/ed25519"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/nats-io/nkeys"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ClientNkey derives, from a peer's Ed25519 private key, the NATS user nkey
|
||||||
|
// public string ("U...") and a signature callback suitable for
|
||||||
|
// nats.Nkey(pub, sign). The callback signs the server-presented nonce with the
|
||||||
|
// same Ed25519 key, so the server can verify it and map it back to the bus user.
|
||||||
|
//
|
||||||
|
// signPriv must be a 64-byte Ed25519 private key (as produced by the registry's
|
||||||
|
// GenerateIdentity). Its first 32 bytes are the seed nkeys needs.
|
||||||
|
func ClientNkey(signPriv []byte) (pub string, sign func([]byte) ([]byte, error), err error) {
|
||||||
|
if len(signPriv) != ed25519.PrivateKeySize {
|
||||||
|
return "", nil, fmt.Errorf("busauth: signPriv must be %d bytes, got %d", ed25519.PrivateKeySize, len(signPriv))
|
||||||
|
}
|
||||||
|
seed := ed25519.PrivateKey(signPriv).Seed() // 32-byte Ed25519 seed
|
||||||
|
kp, err := nkeys.FromRawSeed(nkeys.PrefixByteUser, seed)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, fmt.Errorf("busauth: derive nkey from seed: %w", err)
|
||||||
|
}
|
||||||
|
pub, err = kp.PublicKey()
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, fmt.Errorf("busauth: nkey public key: %w", err)
|
||||||
|
}
|
||||||
|
sign = func(nonce []byte) ([]byte, error) {
|
||||||
|
return kp.Sign(nonce)
|
||||||
|
}
|
||||||
|
return pub, sign, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NkeyPublicFromSignPub derives the NATS user nkey public string from a 32-byte
|
||||||
|
// Ed25519 public key. It is the inverse view of the identity used by callers
|
||||||
|
// that have only the public key (e.g. to display or pre-register an nkey).
|
||||||
|
func NkeyPublicFromSignPub(signPub []byte) (string, error) {
|
||||||
|
if len(signPub) != ed25519.PublicKeySize {
|
||||||
|
return "", fmt.Errorf("busauth: signPub must be %d bytes, got %d", ed25519.PublicKeySize, len(signPub))
|
||||||
|
}
|
||||||
|
pub, err := nkeys.Encode(nkeys.PrefixByteUser, signPub)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("busauth: encode nkey public: %w", err)
|
||||||
|
}
|
||||||
|
return string(pub), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SignPubHexFromNkey decodes a NATS user nkey public string ("U...") back to the
|
||||||
|
// lowercase hex of its 32-byte Ed25519 public key — the identity key used to
|
||||||
|
// look a peer up in the bus user allowlist. The server calls this to map the
|
||||||
|
// nkey a client presented to the users table.
|
||||||
|
func SignPubHexFromNkey(nkeyPub string) (string, error) {
|
||||||
|
raw, err := nkeys.Decode(nkeys.PrefixByteUser, []byte(nkeyPub))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("busauth: decode nkey %q: %w", nkeyPub, err)
|
||||||
|
}
|
||||||
|
if len(raw) != ed25519.PublicKeySize {
|
||||||
|
return "", fmt.Errorf("busauth: decoded nkey is %d bytes, want %d", len(raw), ed25519.PublicKeySize)
|
||||||
|
}
|
||||||
|
return hex.EncodeToString(raw), nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,85 @@
|
|||||||
|
package busauth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"crypto/ed25519"
|
||||||
|
"encoding/hex"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/nats-io/nkeys"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestNkeyRoundTrip is the dedicated sign/verify round-trip the spec requires
|
||||||
|
// BEFORE the NATS server depends on this conversion. It proves three things end
|
||||||
|
// to end: (1) ClientNkey produces a signature callback whose output verifies
|
||||||
|
// under the derived nkey public key; (2) that signature is exactly the Ed25519
|
||||||
|
// signature of the same identity (the nkey is the same key, not a new one);
|
||||||
|
// (3) the nkey public string maps back to the identity's Ed25519 hex, which is
|
||||||
|
// the key the allowlist is indexed by.
|
||||||
|
func TestNkeyRoundTrip(t *testing.T) {
|
||||||
|
id, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("identity: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub, sign, err := ClientNkey(id.SignPriv)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ClientNkey: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// (1) The callback's signature over a server-style nonce verifies under the
|
||||||
|
// public nkey, exactly as the NATS server will verify it.
|
||||||
|
nonce := []byte("server-presented-nonce-1234567890")
|
||||||
|
sig, err := sign(nonce)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("sign: %v", err)
|
||||||
|
}
|
||||||
|
kpPub, err := nkeys.FromPublicKey(pub)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("FromPublicKey: %v", err)
|
||||||
|
}
|
||||||
|
if err := kpPub.Verify(nonce, sig); err != nil {
|
||||||
|
t.Fatalf("nkey verify failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// (2) The signature is the very same bytes as a raw Ed25519 sign with the
|
||||||
|
// identity's private key — confirming no separate key material was minted.
|
||||||
|
want := ed25519.Sign(ed25519.PrivateKey(id.SignPriv), nonce)
|
||||||
|
if !bytes.Equal(sig, want) {
|
||||||
|
t.Fatalf("nkey signature differs from Ed25519 signature of the same identity")
|
||||||
|
}
|
||||||
|
|
||||||
|
// (3) The nkey public maps back to the identity's Ed25519 hex (allowlist key).
|
||||||
|
gotHex, err := SignPubHexFromNkey(pub)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("SignPubHexFromNkey: %v", err)
|
||||||
|
}
|
||||||
|
if gotHex != hex.EncodeToString(id.SignPub) {
|
||||||
|
t.Fatalf("nkey->hex mismatch: got %s want %s", gotHex, hex.EncodeToString(id.SignPub))
|
||||||
|
}
|
||||||
|
|
||||||
|
// And NkeyPublicFromSignPub is consistent with ClientNkey's public.
|
||||||
|
pub2, err := NkeyPublicFromSignPub(id.SignPub)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NkeyPublicFromSignPub: %v", err)
|
||||||
|
}
|
||||||
|
if pub2 != pub {
|
||||||
|
t.Fatalf("public nkey mismatch between derivations: %s vs %s", pub2, pub)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: a wrong-length private key is rejected, not silently misused.
|
||||||
|
func TestClientNkeyBadKey(t *testing.T) {
|
||||||
|
if _, _, err := ClientNkey([]byte("too-short")); err == nil {
|
||||||
|
t.Fatalf("expected error for short private key")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: a non-nkey string does not decode to an allowlist key.
|
||||||
|
func TestSignPubHexFromNkeyBad(t *testing.T) {
|
||||||
|
if _, err := SignPubHexFromNkey("not-a-real-nkey"); err == nil {
|
||||||
|
t.Fatalf("expected error decoding a bogus nkey")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
package busauth
|
||||||
|
|
||||||
|
import server "github.com/nats-io/nats-server/v2/server"
|
||||||
|
|
||||||
|
// PermissionsFromSubjects adapts a subject-deriving function (e.g.
|
||||||
|
// membership.SubjectACLFor, which maps an identity to the subjects of the rooms
|
||||||
|
// it belongs to plus the client infrastructure subjects) into the PermissionsFunc
|
||||||
|
// the ACL authenticator expects. The derived subjects are granted as BOTH the
|
||||||
|
// publish and subscribe allow set, so a connection can only pub/sub on the
|
||||||
|
// subjects it is entitled to. A derivation error is propagated so the caller
|
||||||
|
// fails closed (denies the connection) rather than granting open access.
|
||||||
|
//
|
||||||
|
// This is the production wiring for the per-subject data-plane ACL (issue 0003e,
|
||||||
|
// audit H4): membershipd passes PermissionsFromSubjects(membership.SubjectACLFor(
|
||||||
|
// store)) to NewNkeyAuthenticatorACL. It lives in busauth (not membership) so the
|
||||||
|
// membership package stays free of the nats-server dependency.
|
||||||
|
func PermissionsFromSubjects(derive func(signPubHex string) ([]string, error)) PermissionsFunc {
|
||||||
|
return func(signPubHex string) (*server.Permissions, error) {
|
||||||
|
subjects, err := derive(signPubHex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
sp := &server.SubjectPermission{Allow: subjects}
|
||||||
|
return &server.Permissions{Publish: sp, Subscribe: sp}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
package busauth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/tls"
|
||||||
|
"crypto/x509"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LoadCATLSConfig builds a *tls.Config that trusts ONLY the given CA certificate
|
||||||
|
// (PEM file), for a bus client pinning the project's self-signed CA. Because the
|
||||||
|
// bus uses a private CA rather than a public one, clients must pin it explicitly;
|
||||||
|
// trusting the system roots would reject the server cert. This is the single
|
||||||
|
// helper every client (Go peers, the mobile binding, the gateway) uses to turn a
|
||||||
|
// ca.crt path into a connection config.
|
||||||
|
func LoadCATLSConfig(caPEMPath string) (*tls.Config, error) {
|
||||||
|
pem, err := os.ReadFile(caPEMPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("busauth: read CA %q: %w", caPEMPath, err)
|
||||||
|
}
|
||||||
|
pool := x509.NewCertPool()
|
||||||
|
if !pool.AppendCertsFromPEM(pem) {
|
||||||
|
return nil, fmt.Errorf("busauth: CA %q contains no valid PEM certificate", caPEMPath)
|
||||||
|
}
|
||||||
|
return &tls.Config{RootCAs: pool, MinVersion: tls.VersionTLS12}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ServerTLSConfig loads the bus NATS server's certificate and private key (PEM
|
||||||
|
// files) into a *tls.Config to present to clients. The private key never leaves
|
||||||
|
// the host; only the CA cert travels to clients.
|
||||||
|
func ServerTLSConfig(certPEMPath, keyPEMPath string) (*tls.Config, error) {
|
||||||
|
cert, err := tls.LoadX509KeyPair(certPEMPath, keyPEMPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("busauth: load server keypair: %w", err)
|
||||||
|
}
|
||||||
|
return &tls.Config{Certificates: []tls.Certificate{cert}, MinVersion: tls.VersionTLS12}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteTLSConfig builds the mutual-TLS config for the NATS CLUSTER route layer
|
||||||
|
// (issue 0003a). Unlike the client data plane, where the server presents a cert
|
||||||
|
// and only the client verifies it, routes are server-to-server: each node both
|
||||||
|
// presents its own node certificate AND verifies the connecting node's
|
||||||
|
// certificate against the bus CA. So this single config carries:
|
||||||
|
//
|
||||||
|
// - Certificates: this node's CA-signed certificate (presented in both the
|
||||||
|
// server and the client role of a route handshake),
|
||||||
|
// - RootCAs: the bus CA, to verify the certificate of a node we dial out to,
|
||||||
|
// - ClientCAs + ClientAuth=RequireAndVerifyClientCert: the bus CA, to verify
|
||||||
|
// the certificate of a node dialing in.
|
||||||
|
//
|
||||||
|
// The effect: a node that lacks a certificate signed by the bus CA cannot
|
||||||
|
// establish a route in either direction, even if it knows the cluster password.
|
||||||
|
// Reuse the same CA as the client data plane (deploy/tls) but a per-node cert
|
||||||
|
// whose SAN covers that node's route address.
|
||||||
|
func RouteTLSConfig(certPEMPath, keyPEMPath, caPEMPath string) (*tls.Config, error) {
|
||||||
|
cert, err := tls.LoadX509KeyPair(certPEMPath, keyPEMPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("busauth: load route keypair: %w", err)
|
||||||
|
}
|
||||||
|
pem, err := os.ReadFile(caPEMPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("busauth: read route CA %q: %w", caPEMPath, err)
|
||||||
|
}
|
||||||
|
pool := x509.NewCertPool()
|
||||||
|
if !pool.AppendCertsFromPEM(pem) {
|
||||||
|
return nil, fmt.Errorf("busauth: route CA %q contains no valid PEM certificate", caPEMPath)
|
||||||
|
}
|
||||||
|
return &tls.Config{
|
||||||
|
Certificates: []tls.Certificate{cert},
|
||||||
|
RootCAs: pool,
|
||||||
|
ClientCAs: pool,
|
||||||
|
ClientAuth: tls.RequireAndVerifyClientCert,
|
||||||
|
MinVersion: tls.VersionTLS12,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,95 @@
|
|||||||
|
package busauth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/ecdsa"
|
||||||
|
"crypto/elliptic"
|
||||||
|
"crypto/rand"
|
||||||
|
"crypto/x509"
|
||||||
|
"crypto/x509/pkix"
|
||||||
|
"encoding/pem"
|
||||||
|
"math/big"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// writeSelfSigned writes a self-signed cert + key PEM pair to dir and returns
|
||||||
|
// their paths. It is enough to exercise both LoadCATLSConfig (reads the cert as
|
||||||
|
// a CA) and ServerTLSConfig (reads the cert+key as a server keypair).
|
||||||
|
func writeSelfSigned(t *testing.T, dir string) (certPath, keyPath string) {
|
||||||
|
t.Helper()
|
||||||
|
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("key: %v", err)
|
||||||
|
}
|
||||||
|
tmpl := &x509.Certificate{
|
||||||
|
SerialNumber: big.NewInt(1),
|
||||||
|
Subject: pkix.Name{CommonName: "unibus-tls-test"},
|
||||||
|
NotBefore: time.Now().Add(-time.Hour),
|
||||||
|
NotAfter: time.Now().Add(time.Hour),
|
||||||
|
IsCA: true,
|
||||||
|
KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature,
|
||||||
|
BasicConstraintsValid: true,
|
||||||
|
}
|
||||||
|
der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("cert: %v", err)
|
||||||
|
}
|
||||||
|
certPath = filepath.Join(dir, "cert.pem")
|
||||||
|
keyPath = filepath.Join(dir, "key.pem")
|
||||||
|
certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
|
||||||
|
if err := os.WriteFile(certPath, certPEM, 0o644); err != nil {
|
||||||
|
t.Fatalf("write cert: %v", err)
|
||||||
|
}
|
||||||
|
keyDER, err := x509.MarshalECPrivateKey(key)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal key: %v", err)
|
||||||
|
}
|
||||||
|
keyPEM := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})
|
||||||
|
if err := os.WriteFile(keyPath, keyPEM, 0o600); err != nil {
|
||||||
|
t.Fatalf("write key: %v", err)
|
||||||
|
}
|
||||||
|
return certPath, keyPath
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden: a valid CA PEM loads into a config with a non-empty RootCAs pool, and
|
||||||
|
// a valid keypair loads into a config presenting one certificate.
|
||||||
|
func TestLoadTLSConfigsGolden(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
certPath, keyPath := writeSelfSigned(t, dir)
|
||||||
|
|
||||||
|
caCfg, err := LoadCATLSConfig(certPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadCATLSConfig: %v", err)
|
||||||
|
}
|
||||||
|
if caCfg.RootCAs == nil {
|
||||||
|
t.Fatalf("expected a populated RootCAs pool")
|
||||||
|
}
|
||||||
|
|
||||||
|
srvCfg, err := ServerTLSConfig(certPath, keyPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ServerTLSConfig: %v", err)
|
||||||
|
}
|
||||||
|
if len(srvCfg.Certificates) != 1 {
|
||||||
|
t.Fatalf("expected exactly one server certificate, got %d", len(srvCfg.Certificates))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: missing file, and a file that is not valid PEM.
|
||||||
|
func TestLoadTLSConfigsErrors(t *testing.T) {
|
||||||
|
if _, err := LoadCATLSConfig("/no/such/ca.crt"); err == nil {
|
||||||
|
t.Fatalf("expected error for missing CA file")
|
||||||
|
}
|
||||||
|
dir := t.TempDir()
|
||||||
|
junk := filepath.Join(dir, "junk.crt")
|
||||||
|
if err := os.WriteFile(junk, []byte("not a pem"), 0o644); err != nil {
|
||||||
|
t.Fatalf("write junk: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := LoadCATLSConfig(junk); err == nil {
|
||||||
|
t.Fatalf("expected error for non-PEM CA file")
|
||||||
|
}
|
||||||
|
if _, err := ServerTLSConfig("/no/such/server.crt", "/no/such/server.key"); err == nil {
|
||||||
|
t.Fatalf("expected error for missing server keypair")
|
||||||
|
}
|
||||||
|
}
|
||||||
+311
-70
@@ -16,16 +16,23 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
|
"crypto/tls"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cs "fn-registry/functions/cybersecurity"
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
"github.com/enmanuel/unibus/pkg/frame"
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
"github.com/enmanuel/unibus/pkg/room"
|
"github.com/enmanuel/unibus/pkg/room"
|
||||||
"github.com/nats-io/nats.go"
|
"github.com/nats-io/nats.go"
|
||||||
"github.com/nats-io/nats.go/jetstream"
|
"github.com/nats-io/nats.go/jetstream"
|
||||||
@@ -44,20 +51,130 @@ type Client struct {
|
|||||||
endpoint string
|
endpoint string
|
||||||
nc *nats.Conn
|
nc *nats.Conn
|
||||||
js jetstream.JetStream // durable plane for rooms with Policy.Persist
|
js jetstream.JetStream // durable plane for rooms with Policy.Persist
|
||||||
ctrlURL string
|
ctrlURLs []string // control-plane HTTP endpoints, tried in order (failover)
|
||||||
http *http.Client
|
http *http.Client
|
||||||
|
|
||||||
|
// natsServers + natsOpts are retained so RefreshSession can rebuild the
|
||||||
|
// data-plane connection (re-triggering the server's subject-ACL evaluation).
|
||||||
|
natsServers []string
|
||||||
|
natsOpts []nats.Option
|
||||||
|
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
keyCache map[string]map[int][]byte // roomID -> epoch -> K
|
keyCache map[string]map[int][]byte // roomID -> epoch -> K
|
||||||
signCache map[string][]byte // sender endpoint -> sign pub (for verification)
|
signCache map[string][]byte // sender endpoint -> sign pub (for verification)
|
||||||
}
|
}
|
||||||
|
|
||||||
// New connects to NATS and records the control-plane URL. The identity holds
|
// Options configures how a client connects to the bus. The zero value is the
|
||||||
// the peer's long-term keypairs.
|
// legacy behavior: a plain NATS connection with no nkey and no TLS — what dev
|
||||||
|
// stacks and a not-yet-secured server expect. Secured deployments set these.
|
||||||
|
type Options struct {
|
||||||
|
// UseNkey authenticates the NATS connection with the peer's Ed25519 identity
|
||||||
|
// reused as a NATS nkey. It MUST match the server: nats.go refuses to connect
|
||||||
|
// with an nkey to a server that does not advertise nkey auth ("nkeys not
|
||||||
|
// supported by the server"), so this is opt-in rather than always-on.
|
||||||
|
UseNkey bool
|
||||||
|
// TLS, when non-nil, secures the NATS (data plane) connection and pins the
|
||||||
|
// server to this config's RootCAs (the bus's self-signed CA). Build it with
|
||||||
|
// busauth.LoadCATLSConfig(caPath). Nil keeps the data plane plaintext.
|
||||||
|
TLS *tls.Config
|
||||||
|
// CtrlTLS, when non-nil, secures the HTTP control-plane connection and pins it
|
||||||
|
// to this config's RootCAs. It is separate from TLS so the two planes can be
|
||||||
|
// secured independently (a test may TLS one and not the other); production
|
||||||
|
// sets both to the same CA via Connect. Nil keeps the control plane plaintext.
|
||||||
|
CtrlTLS *tls.Config
|
||||||
|
// NatsServers are ADDITIONAL NATS seed URLs for cluster failover (issue
|
||||||
|
// 0003e), beyond the primary natsURL passed to the constructor. With more
|
||||||
|
// than one server nats.go reconnects to a surviving node automatically when
|
||||||
|
// the one a client is attached to dies, so a node loss is transparent.
|
||||||
|
NatsServers []string
|
||||||
|
// CtrlURLs are ADDITIONAL control-plane HTTP endpoints (one per node) beyond
|
||||||
|
// the primary ctrlURL. Each request is tried against them in order until one
|
||||||
|
// answers, so the control plane survives a node loss too. With the
|
||||||
|
// decentralized KV store every node serves the same state, so any of them
|
||||||
|
// can answer any request.
|
||||||
|
CtrlURLs []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// dedupNonEmpty returns the input with empty strings dropped and duplicates
|
||||||
|
// removed, preserving order. Used to build the NATS seed list and control-plane
|
||||||
|
// list from a primary URL plus optional extras without a redundant entry.
|
||||||
|
func dedupNonEmpty(in []string) []string {
|
||||||
|
seen := map[string]bool{}
|
||||||
|
var out []string
|
||||||
|
for _, s := range in {
|
||||||
|
if s == "" || seen[s] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[s] = true
|
||||||
|
out = append(out, s)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// New connects to NATS and records the control-plane URL with default Options
|
||||||
|
// (no nkey, no TLS). The identity holds the peer's long-term keypairs.
|
||||||
func New(natsURL, ctrlURL string, id cs.Identity) (*Client, error) {
|
func New(natsURL, ctrlURL string, id cs.Identity) (*Client, error) {
|
||||||
nc, err := nats.Connect(natsURL, nats.Name("unibus-client"))
|
return NewWithOptions(natsURL, ctrlURL, id, Options{})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Connect is the single migration seam every peer (worker, chat, mobile,
|
||||||
|
// gateway) uses to pick its security posture from one input: the CA path. With
|
||||||
|
// a non-empty caPath it connects securely — TLS pinned to that CA plus nkey
|
||||||
|
// authentication on the data plane — matching a bus running with bus-auth
|
||||||
|
// enforce + bus-tls. With an empty caPath it falls back to the legacy plaintext,
|
||||||
|
// no-nkey connection for local dev against an unsecured bus. The control-plane
|
||||||
|
// HTTP requests are signed in both cases (that signing is unconditional).
|
||||||
|
func Connect(natsURL, ctrlURL string, id cs.Identity, caPath string) (*Client, error) {
|
||||||
|
if caPath == "" {
|
||||||
|
return New(natsURL, ctrlURL, id)
|
||||||
|
}
|
||||||
|
// A CA implies the bus is TLS on BOTH planes. Refuse a plaintext control-plane
|
||||||
|
// URL: signing gives integrity, not confidentiality, so sending metadata over
|
||||||
|
// http:// when the operator provisioned a CA would silently leak it to a MITM
|
||||||
|
// (audit H5). Force https rather than silently downgrade.
|
||||||
|
if !strings.HasPrefix(ctrlURL, "https://") {
|
||||||
|
return nil, fmt.Errorf("client: control-plane URL %q must be https:// when a CA is provided", ctrlURL)
|
||||||
|
}
|
||||||
|
tlsCfg, err := busauth.LoadCATLSConfig(caPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("client: connect nats %q: %w", natsURL, err)
|
return nil, fmt.Errorf("client: load CA %q: %w", caPath, err)
|
||||||
|
}
|
||||||
|
// Pin the same CA on both planes: nkey+TLS on NATS, TLS on the HTTP control plane.
|
||||||
|
return NewWithOptions(natsURL, ctrlURL, id, Options{UseNkey: true, TLS: tlsCfg, CtrlTLS: tlsCfg})
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWithOptions is New with explicit connection options (nkey auth, and, from
|
||||||
|
// phase 0001d, TLS). It is the single place the data-plane connection is built,
|
||||||
|
// so every peer (worker, chat, mobile, gateway) gets identical behavior by
|
||||||
|
// passing the same Options.
|
||||||
|
func NewWithOptions(natsURL, ctrlURL string, id cs.Identity, opts Options) (*Client, error) {
|
||||||
|
// Seed list = primary + extras. With more than one seed, nats.go fails over
|
||||||
|
// to a surviving node on disconnect; MaxReconnects(-1) keeps it retrying
|
||||||
|
// indefinitely so a node coming back is rejoined rather than given up on.
|
||||||
|
natsServers := dedupNonEmpty(append([]string{natsURL}, opts.NatsServers...))
|
||||||
|
natsOpts := []nats.Option{
|
||||||
|
nats.Name("unibus-client"),
|
||||||
|
nats.MaxReconnects(-1),
|
||||||
|
nats.ReconnectWait(250 * time.Millisecond),
|
||||||
|
}
|
||||||
|
if len(natsServers) > 1 {
|
||||||
|
// Try every seed on the initial connect too, so startup tolerates one
|
||||||
|
// seed being down.
|
||||||
|
natsOpts = append(natsOpts, nats.RetryOnFailedConnect(true))
|
||||||
|
}
|
||||||
|
if opts.UseNkey {
|
||||||
|
nkeyPub, nkeySign, err := busauth.ClientNkey(id.SignPriv)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("client: derive nkey: %w", err)
|
||||||
|
}
|
||||||
|
natsOpts = append(natsOpts, nats.Nkey(nkeyPub, nkeySign))
|
||||||
|
}
|
||||||
|
if opts.TLS != nil {
|
||||||
|
natsOpts = append(natsOpts, nats.Secure(opts.TLS))
|
||||||
|
}
|
||||||
|
nc, err := nats.Connect(strings.Join(natsServers, ","), natsOpts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("client: connect nats %v: %w", natsServers, err)
|
||||||
}
|
}
|
||||||
// JetStream context for the durable plane. Obtaining it does not require any
|
// JetStream context for the durable plane. Obtaining it does not require any
|
||||||
// stream to exist yet and has no effect on cleartext/ephemeral rooms — those
|
// stream to exist yet and has no effect on cleartext/ephemeral rooms — those
|
||||||
@@ -67,18 +184,58 @@ func New(natsURL, ctrlURL string, id cs.Identity) (*Client, error) {
|
|||||||
nc.Close()
|
nc.Close()
|
||||||
return nil, fmt.Errorf("client: init jetstream: %w", err)
|
return nil, fmt.Errorf("client: init jetstream: %w", err)
|
||||||
}
|
}
|
||||||
|
// The control-plane HTTP client pins the bus CA when CtrlTLS is set, so an
|
||||||
|
// https:// control plane is verified against the bus's own CA rather than the
|
||||||
|
// system roots (audit H5). Without it the client stays plaintext for dev.
|
||||||
|
httpClient := &http.Client{Timeout: 10 * time.Second}
|
||||||
|
if opts.CtrlTLS != nil {
|
||||||
|
httpClient.Transport = &http.Transport{TLSClientConfig: opts.CtrlTLS.Clone()}
|
||||||
|
}
|
||||||
return &Client{
|
return &Client{
|
||||||
id: id,
|
id: id,
|
||||||
endpoint: frame.EndpointID(id.SignPub),
|
endpoint: frame.EndpointID(id.SignPub),
|
||||||
nc: nc,
|
nc: nc,
|
||||||
js: js,
|
js: js,
|
||||||
ctrlURL: ctrlURL,
|
ctrlURLs: dedupNonEmpty(append([]string{ctrlURL}, opts.CtrlURLs...)),
|
||||||
http: &http.Client{Timeout: 10 * time.Second},
|
http: httpClient,
|
||||||
keyCache: map[string]map[int][]byte{},
|
natsServers: natsServers,
|
||||||
signCache: map[string][]byte{},
|
natsOpts: natsOpts,
|
||||||
|
keyCache: map[string]map[int][]byte{},
|
||||||
|
signCache: map[string][]byte{},
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RefreshSession rebuilds the data-plane NATS connection so the server's
|
||||||
|
// subject-ACL authenticator re-evaluates this peer's room membership (issue
|
||||||
|
// 0003e, audit H4 residual). Call it after a membership change — a room you
|
||||||
|
// created, were invited to, or joined — when the bus enforces per-subject
|
||||||
|
// permissions, so the new room's subject becomes publishable and subscribable
|
||||||
|
// (NATS freezes permissions at connect time, so the prior connection cannot see
|
||||||
|
// the new room).
|
||||||
|
//
|
||||||
|
// It opens a fresh connection with the same seeds/options and swaps it in.
|
||||||
|
// IMPORTANT: active subscriptions from the previous connection are dropped —
|
||||||
|
// re-subscribe (client.Subscribe) to your rooms after calling this. The key and
|
||||||
|
// signer caches are preserved. On a non-ACL bus this is a no-op-safe reconnect.
|
||||||
|
func (c *Client) RefreshSession() error {
|
||||||
|
nc, err := nats.Connect(strings.Join(c.natsServers, ","), c.natsOpts...)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("client: refresh session: reconnect nats: %w", err)
|
||||||
|
}
|
||||||
|
js, err := jetstream.New(nc)
|
||||||
|
if err != nil {
|
||||||
|
nc.Close()
|
||||||
|
return fmt.Errorf("client: refresh session: init jetstream: %w", err)
|
||||||
|
}
|
||||||
|
old := c.nc
|
||||||
|
c.mu.Lock()
|
||||||
|
c.nc = nc
|
||||||
|
c.js = js
|
||||||
|
c.mu.Unlock()
|
||||||
|
old.Close()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Endpoint returns this client's public identity.
|
// Endpoint returns this client's public identity.
|
||||||
func (c *Client) Endpoint() Endpoint {
|
func (c *Client) Endpoint() Endpoint {
|
||||||
return Endpoint{ID: c.endpoint, SignPub: c.id.SignPub, KexPub: c.id.KexPub}
|
return Endpoint{ID: c.endpoint, SignPub: c.id.SignPub, KexPub: c.id.KexPub}
|
||||||
@@ -90,6 +247,15 @@ func (c *Client) Close() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ConnectedServer returns the URL of the NATS node this client is currently
|
||||||
|
// attached to (empty when disconnected). It is observability for cluster
|
||||||
|
// failover: after a node dies, this reports the surviving node nats.go
|
||||||
|
// reconnected to. IsConnected reports whether the data-plane link is up.
|
||||||
|
func (c *Client) ConnectedServer() string { return c.nc.ConnectedUrl() }
|
||||||
|
|
||||||
|
// IsConnected reports whether the NATS data-plane connection is currently up.
|
||||||
|
func (c *Client) IsConnected() bool { return c.nc.IsConnected() }
|
||||||
|
|
||||||
// ---- key cache ------------------------------------------------------------
|
// ---- key cache ------------------------------------------------------------
|
||||||
|
|
||||||
func (c *Client) cacheKey(roomID string, epoch int, k []byte) {
|
func (c *Client) cacheKey(roomID string, epoch int, k []byte) {
|
||||||
@@ -116,54 +282,105 @@ func (c *Client) getCachedKey(roomID string, epoch int) ([]byte, bool) {
|
|||||||
// ---- control-plane HTTP helpers ------------------------------------------
|
// ---- control-plane HTTP helpers ------------------------------------------
|
||||||
|
|
||||||
func (c *Client) doJSON(method, path string, body, out any) error {
|
func (c *Client) doJSON(method, path string, body, out any) error {
|
||||||
var rdr io.Reader
|
var bodyBytes []byte
|
||||||
if body != nil {
|
if body != nil {
|
||||||
b, err := json.Marshal(body)
|
b, err := json.Marshal(body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("client: marshal request: %w", err)
|
return fmt.Errorf("client: marshal request: %w", err)
|
||||||
}
|
}
|
||||||
rdr = bytes.NewReader(b)
|
bodyBytes = b
|
||||||
}
|
}
|
||||||
req, err := http.NewRequest(method, c.ctrlURL+path, rdr)
|
// Try each control-plane endpoint in order. A transport error (a dead node)
|
||||||
if err != nil {
|
// falls over to the next; an HTTP response (any status) is authoritative and
|
||||||
return fmt.Errorf("client: new request: %w", err)
|
// returned, since every node serves the same state. Each attempt is freshly
|
||||||
}
|
// signed (new nonce), so a failed-over retry is never seen as a replay.
|
||||||
if body != nil {
|
var lastErr error
|
||||||
req.Header.Set("Content-Type", "application/json")
|
for _, base := range c.ctrlURLs {
|
||||||
}
|
req, err := c.newSignedRequestTo(base, method, path, bodyBytes)
|
||||||
resp, err := c.http.Do(req)
|
if err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return fmt.Errorf("client: do %s %s: %w", method, path, err)
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
respBody, _ := io.ReadAll(resp.Body)
|
|
||||||
if resp.StatusCode >= 300 {
|
|
||||||
// Surface the server's structured {"error": "..."} message when present,
|
|
||||||
// instead of leaking the raw HTTP envelope (method, path, status, JSON body).
|
|
||||||
var er struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
}
|
}
|
||||||
if json.Unmarshal(respBody, &er) == nil && er.Error != "" {
|
if body != nil {
|
||||||
return fmt.Errorf("%s (HTTP %d)", er.Error, resp.StatusCode)
|
req.Header.Set("Content-Type", "application/json")
|
||||||
}
|
}
|
||||||
return fmt.Errorf("client: %s %s -> %d: %s", method, path, resp.StatusCode, string(respBody))
|
resp, err := c.http.Do(req)
|
||||||
}
|
if err != nil {
|
||||||
if out != nil {
|
lastErr = err
|
||||||
if err := json.Unmarshal(respBody, out); err != nil {
|
continue // dead node: try the next control plane
|
||||||
return fmt.Errorf("client: decode response: %w", err)
|
|
||||||
}
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
respBody, _ := io.ReadAll(resp.Body)
|
||||||
|
if resp.StatusCode >= 300 {
|
||||||
|
// Surface the server's structured {"error": "..."} message when present,
|
||||||
|
// instead of leaking the raw HTTP envelope (method, path, status, body).
|
||||||
|
var er struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(respBody, &er) == nil && er.Error != "" {
|
||||||
|
return fmt.Errorf("%s (HTTP %d)", er.Error, resp.StatusCode)
|
||||||
|
}
|
||||||
|
return fmt.Errorf("client: %s %s -> %d: %s", method, path, resp.StatusCode, string(respBody))
|
||||||
|
}
|
||||||
|
if out != nil {
|
||||||
|
if err := json.Unmarshal(respBody, out); err != nil {
|
||||||
|
return fmt.Errorf("client: decode response: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
return nil
|
return fmt.Errorf("client: %s %s: all control planes failed: %w", method, path, lastErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// signRequest signs the canonical bytes of req (req must already have its Sig
|
// signRequest signs the canonical bytes of req (req must already have its Sig
|
||||||
// field cleared) with the client's Ed25519 key. It is symmetric with the
|
// field cleared) with the client's Ed25519 key. It is symmetric with the
|
||||||
// server's verifyOwnerSig.
|
// server's verifyOwnerSig. This is the PAYLOAD-level owner signature that
|
||||||
|
// authorizes room operations (invite/rekey) by ownership — distinct from the
|
||||||
|
// transport-level request signature applied by newSignedRequest below, which
|
||||||
|
// authenticates the caller's identity on every request.
|
||||||
func (c *Client) signRequest(req any) []byte {
|
func (c *Client) signRequest(req any) []byte {
|
||||||
b, _ := json.Marshal(req)
|
b, _ := json.Marshal(req)
|
||||||
return cs.SignEd25519(c.id.SignPriv, b)
|
return cs.SignEd25519(c.id.SignPriv, b)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newSignedRequestTo builds an *http.Request to the control-plane endpoint
|
||||||
|
// `base` and attaches the transport authentication headers
|
||||||
|
// (X-Unibus-Pub/Ts/Nonce/Sig) signing the canonical request bytes with this
|
||||||
|
// peer's Ed25519 key. path is the request URI (path plus any query); body is the
|
||||||
|
// raw request body (nil for GET). The server (membership.authenticate) verifies
|
||||||
|
// these headers under the bus-auth flag. The signature covers method+path+ts+
|
||||||
|
// nonce+sha256(body), NOT the host, so the same request can be addressed to any
|
||||||
|
// node — and each failover attempt mints a fresh nonce so it is never a replay.
|
||||||
|
//
|
||||||
|
// Signing happens on every request — including GETs — so that under enforce the
|
||||||
|
// server can authenticate the caller and reject unregistered or revoked
|
||||||
|
// identities uniformly. The canonical construction is the single source of truth
|
||||||
|
// in membership.CanonicalRequest, shared by both sides.
|
||||||
|
func (c *Client) newSignedRequestTo(base, method, path string, body []byte) (*http.Request, error) {
|
||||||
|
var rdr io.Reader
|
||||||
|
if body != nil {
|
||||||
|
rdr = bytes.NewReader(body)
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest(method, base+path, rdr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("client: new request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ts := strconv.FormatInt(time.Now().Unix(), 10)
|
||||||
|
nonceRaw := make([]byte, 16)
|
||||||
|
if _, err := rand.Read(nonceRaw); err != nil {
|
||||||
|
return nil, fmt.Errorf("client: generate nonce: %w", err)
|
||||||
|
}
|
||||||
|
nonce := base64.StdEncoding.EncodeToString(nonceRaw)
|
||||||
|
canonical := membership.CanonicalRequest(method, path, ts, nonce, body)
|
||||||
|
sig := cs.SignEd25519(c.id.SignPriv, canonical)
|
||||||
|
|
||||||
|
req.Header.Set("X-Unibus-Pub", hex.EncodeToString(c.id.SignPub))
|
||||||
|
req.Header.Set("X-Unibus-Ts", ts)
|
||||||
|
req.Header.Set("X-Unibus-Nonce", nonce)
|
||||||
|
req.Header.Set("X-Unibus-Sig", base64.StdEncoding.EncodeToString(sig))
|
||||||
|
return req, nil
|
||||||
|
}
|
||||||
|
|
||||||
// ---- mirror of server wire types (control plane) -------------------------
|
// ---- mirror of server wire types (control plane) -------------------------
|
||||||
|
|
||||||
type policyJSON struct {
|
type policyJSON struct {
|
||||||
@@ -582,7 +799,17 @@ func (c *Client) processFrame(roomID string, info roomView, data []byte, handler
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if info.Policy.SignMsgs && f.Sig != nil {
|
// A room with SignMsgs REQUIRES a signature, so an unsigned frame is
|
||||||
|
// unauthenticated and must be dropped — not silently accepted. The previous
|
||||||
|
// `&& f.Sig != nil` guard verified the signature only when one was present, so
|
||||||
|
// an attacker with data-plane access could publish a frame with Sig==nil and a
|
||||||
|
// forged Sender and have the receiver accept it as authentic in a room that
|
||||||
|
// demands signatures (audit N3, report 0006). Requiring the signature first
|
||||||
|
// closes that spoof.
|
||||||
|
if info.Policy.SignMsgs {
|
||||||
|
if f.Sig == nil {
|
||||||
|
return // signature required by room policy but absent: drop
|
||||||
|
}
|
||||||
pub, err := c.signerPub(roomID, f.Sender)
|
pub, err := c.signerPub(roomID, f.Sender)
|
||||||
if err != nil || !cs.VerifyEd25519(pub, f.SigningBytes(), f.Sig) {
|
if err != nil || !cs.VerifyEd25519(pub, f.SigningBytes(), f.Sig) {
|
||||||
return // unauthenticated frame: drop
|
return // unauthenticated frame: drop
|
||||||
@@ -769,36 +996,50 @@ func (c *Client) FetchMedia(roomID string, f frame.Frame) ([]byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) putBlob(ciphertext []byte) (string, error) {
|
func (c *Client) putBlob(ciphertext []byte) (string, error) {
|
||||||
req, err := http.NewRequest("POST", c.ctrlURL+"/blobs", bytes.NewReader(ciphertext))
|
var lastErr error
|
||||||
if err != nil {
|
for _, base := range c.ctrlURLs {
|
||||||
return "", fmt.Errorf("client: new blob request: %w", err)
|
req, err := c.newSignedRequestTo(base, "POST", "/blobs", ciphertext)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/octet-stream")
|
||||||
|
resp, err := c.http.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
lastErr = err
|
||||||
|
continue // dead node: try the next control plane
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
if resp.StatusCode >= 300 {
|
||||||
|
return "", fmt.Errorf("client: put blob -> %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
var r blobResp
|
||||||
|
if err := json.Unmarshal(body, &r); err != nil {
|
||||||
|
return "", fmt.Errorf("client: decode blob resp: %w", err)
|
||||||
|
}
|
||||||
|
return r.Hash, nil
|
||||||
}
|
}
|
||||||
req.Header.Set("Content-Type", "application/octet-stream")
|
return "", fmt.Errorf("client: put blob: all control planes failed: %w", lastErr)
|
||||||
resp, err := c.http.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("client: put blob: %w", err)
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
body, _ := io.ReadAll(resp.Body)
|
|
||||||
if resp.StatusCode >= 300 {
|
|
||||||
return "", fmt.Errorf("client: put blob -> %d: %s", resp.StatusCode, string(body))
|
|
||||||
}
|
|
||||||
var r blobResp
|
|
||||||
if err := json.Unmarshal(body, &r); err != nil {
|
|
||||||
return "", fmt.Errorf("client: decode blob resp: %w", err)
|
|
||||||
}
|
|
||||||
return r.Hash, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) getBlob(hash string) ([]byte, error) {
|
func (c *Client) getBlob(hash string) ([]byte, error) {
|
||||||
resp, err := c.http.Get(c.ctrlURL + "/blobs/" + hash)
|
var lastErr error
|
||||||
if err != nil {
|
for _, base := range c.ctrlURLs {
|
||||||
return nil, fmt.Errorf("client: get blob: %w", err)
|
req, err := c.newSignedRequestTo(base, "GET", "/blobs/"+hash, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := c.http.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
lastErr = err
|
||||||
|
continue // dead node: try the next control plane
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode >= 300 {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return nil, fmt.Errorf("client: get blob -> %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
return io.ReadAll(resp.Body)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
return nil, fmt.Errorf("client: get blob: all control planes failed: %w", lastErr)
|
||||||
if resp.StatusCode >= 300 {
|
|
||||||
body, _ := io.ReadAll(resp.Body)
|
|
||||||
return nil, fmt.Errorf("client: get blob -> %d: %s", resp.StatusCode, string(body))
|
|
||||||
}
|
|
||||||
return io.ReadAll(resp.Body)
|
|
||||||
}
|
}
|
||||||
|
|||||||
+145
-9
@@ -1,10 +1,13 @@
|
|||||||
package client_test
|
package client_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/tls"
|
||||||
|
"encoding/hex"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@@ -12,6 +15,7 @@ import (
|
|||||||
cs "fn-registry/functions/cybersecurity"
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
"github.com/enmanuel/unibus/pkg/blobstore"
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
"github.com/enmanuel/unibus/pkg/client"
|
"github.com/enmanuel/unibus/pkg/client"
|
||||||
"github.com/enmanuel/unibus/pkg/embeddednats"
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
"github.com/enmanuel/unibus/pkg/frame"
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
@@ -27,6 +31,8 @@ type testHarness struct {
|
|||||||
ctrlURL string
|
ctrlURL string
|
||||||
ns *server.Server
|
ns *server.Server
|
||||||
httpts *httptest.Server
|
httpts *httptest.Server
|
||||||
|
store membership.Store
|
||||||
|
srv *membership.Server
|
||||||
}
|
}
|
||||||
|
|
||||||
func freePort(t *testing.T) int {
|
func freePort(t *testing.T) int {
|
||||||
@@ -39,29 +45,61 @@ func freePort(t *testing.T) int {
|
|||||||
return l.Addr().(*net.TCPAddr).Port
|
return l.Addr().(*net.TCPAddr).Port
|
||||||
}
|
}
|
||||||
|
|
||||||
func newHarness(t *testing.T) *testHarness {
|
func newHarness(t *testing.T) *testHarness { return newHarnessFull(t, membership.AuthOff, false) }
|
||||||
|
|
||||||
|
// newHarnessMode is newHarness with an explicit control-plane auth mode and the
|
||||||
|
// NATS data plane left open (no nkey auth), so HTTP-auth tests can use a plain
|
||||||
|
// client.New that does not present an nkey.
|
||||||
|
func newHarnessMode(t *testing.T, mode membership.AuthMode) *testHarness {
|
||||||
|
return newHarnessFull(t, mode, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// newHarnessFull boots the embedded NATS (optionally with the nkey authenticator
|
||||||
|
// backed by the user allowlist) and the membershipd HTTP server in ctrlMode.
|
||||||
|
// natsAuth and ctrlMode are independent on purpose: an HTTP-enforce test can
|
||||||
|
// keep NATS open, and an nkey test can keep HTTP off, mirroring how the rollout
|
||||||
|
// flags compose. The store is created before NATS so the authenticator can
|
||||||
|
// consult IsAuthorized for live revocation.
|
||||||
|
func newHarnessFull(t *testing.T, ctrlMode membership.AuthMode, natsAuth bool) *testHarness {
|
||||||
|
return bootHarness(t, ctrlMode, natsAuth, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// bootHarness is the shared body: a store, an embedded NATS (optionally with the
|
||||||
|
// nkey authenticator and/or TLS), and the membershipd HTTP server in ctrlMode.
|
||||||
|
func bootHarness(t *testing.T, ctrlMode membership.AuthMode, natsAuth bool, natsTLS *tls.Config) *testHarness {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
|
|
||||||
ns, err := embeddednats.Start(filepath.Join(dir, "js"), freePort(t))
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
t.Fatalf("membership store: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := embeddednats.ServerConfig{
|
||||||
|
StoreDir: filepath.Join(dir, "js"),
|
||||||
|
Host: "127.0.0.1",
|
||||||
|
Port: freePort(t),
|
||||||
|
TLS: natsTLS,
|
||||||
|
}
|
||||||
|
if natsAuth {
|
||||||
|
cfg.Auth = busauth.NewNkeyAuthenticator(store.IsAuthorized)
|
||||||
|
}
|
||||||
|
ns, err := embeddednats.StartServer(cfg)
|
||||||
|
if err != nil {
|
||||||
|
store.Close()
|
||||||
t.Fatalf("embedded nats: %v", err)
|
t.Fatalf("embedded nats: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
|
||||||
if err != nil {
|
|
||||||
ns.Shutdown()
|
|
||||||
t.Fatalf("membership store: %v", err)
|
|
||||||
}
|
|
||||||
blobs, err := blobstore.New(filepath.Join(dir, "blobs"))
|
blobs, err := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ns.Shutdown()
|
ns.Shutdown()
|
||||||
|
store.Close()
|
||||||
t.Fatalf("blob store: %v", err)
|
t.Fatalf("blob store: %v", err)
|
||||||
}
|
}
|
||||||
srv := membership.NewServer(store, blobs)
|
srv := membership.NewServer(store, blobs, ctrlMode)
|
||||||
httpts := httptest.NewServer(srv)
|
httpts := httptest.NewServer(srv)
|
||||||
|
|
||||||
h := &testHarness{natsURL: embeddednats.ClientURL(ns), ctrlURL: httpts.URL, ns: ns, httpts: httpts}
|
h := &testHarness{natsURL: embeddednats.ClientURL(ns), ctrlURL: httpts.URL, ns: ns, httpts: httpts, store: store, srv: srv}
|
||||||
t.Cleanup(func() {
|
t.Cleanup(func() {
|
||||||
httpts.Close()
|
httpts.Close()
|
||||||
store.Close()
|
store.Close()
|
||||||
@@ -71,6 +109,15 @@ func newHarness(t *testing.T) *testHarness {
|
|||||||
return h
|
return h
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// registerClient adds a peer's signing identity to the bus allowlist so its
|
||||||
|
// signed control-plane requests pass under enforce.
|
||||||
|
func registerClient(t *testing.T, h *testHarness, c *client.Client, handle, role string) {
|
||||||
|
t.Helper()
|
||||||
|
if err := h.store.AddUser(hex.EncodeToString(c.Endpoint().SignPub), handle, role); err != nil {
|
||||||
|
t.Fatalf("register %s: %v", handle, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func waitHealth(t *testing.T, ctrlURL string) {
|
func waitHealth(t *testing.T, ctrlURL string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
deadline := time.Now().Add(3 * time.Second)
|
deadline := time.Now().Add(3 * time.Second)
|
||||||
@@ -455,6 +502,95 @@ func TestListMyRoomsDiscovery(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestControlPlaneAuthEnforceE2E closes the loop end to end with the production
|
||||||
|
// client against a server in enforce mode: a registered peer's signed requests
|
||||||
|
// are accepted (golden), and an unregistered peer is rejected with 401 on its
|
||||||
|
// first control-plane call (error path). This proves the client's real
|
||||||
|
// signature construction matches the server's verification.
|
||||||
|
func TestControlPlaneAuthEnforceE2E(t *testing.T) {
|
||||||
|
h := newHarnessMode(t, membership.AuthEnforce)
|
||||||
|
waitHealth(t, h.ctrlURL)
|
||||||
|
|
||||||
|
a, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect A: %v", err)
|
||||||
|
}
|
||||||
|
defer a.Close()
|
||||||
|
registerClient(t, h, a, "alice", membership.RoleAdmin)
|
||||||
|
|
||||||
|
// Golden: registered peer's signed request is accepted.
|
||||||
|
if _, err := a.CreateRoom("room.enforced", room.ModeNATS); err != nil {
|
||||||
|
t.Fatalf("registered peer should create a room under enforce: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: an unregistered peer is rejected on its first control-plane call.
|
||||||
|
b, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect B: %v", err)
|
||||||
|
}
|
||||||
|
defer b.Close()
|
||||||
|
_, err = b.CreateRoom("room.denied", room.ModeNATS)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("unregistered peer must be rejected under enforce")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "401") && !strings.Contains(strings.ToLower(err.Error()), "unauthorized") {
|
||||||
|
t.Fatalf("expected a 401/unauthorized error, got %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Revocation takes effect without restart: revoke A, its next request fails.
|
||||||
|
if err := h.store.RevokeUser(hex.EncodeToString(a.Endpoint().SignPub)); err != nil {
|
||||||
|
t.Fatalf("revoke A: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := a.CreateRoom("room.after-revoke", room.ModeNATS); err == nil {
|
||||||
|
t.Fatalf("revoked peer must be rejected without a server restart")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNatsNkeyAuth exercises the data-plane authenticator: with NATS nkey auth
|
||||||
|
// on, a registered peer connecting with its nkey is accepted and can publish
|
||||||
|
// (golden); an unregistered peer is refused at connect time (error path); and a
|
||||||
|
// peer revoked while the server runs is refused on its NEXT connection, proving
|
||||||
|
// revocation without a restart (edge).
|
||||||
|
func TestNatsNkeyAuth(t *testing.T) {
|
||||||
|
h := newHarnessFull(t, membership.AuthOff, true) // NATS auth on; HTTP off to isolate the data plane
|
||||||
|
waitHealth(t, h.ctrlURL)
|
||||||
|
|
||||||
|
idA := mustIdentity(t)
|
||||||
|
if err := h.store.AddUser(hex.EncodeToString(idA.SignPub), "alice", membership.RoleMember); err != nil {
|
||||||
|
t.Fatalf("register A: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden: registered peer connects with its nkey and uses the bus.
|
||||||
|
a, err := client.NewWithOptions(h.natsURL, h.ctrlURL, idA, client.Options{UseNkey: true})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("registered peer should connect with nkey: %v", err)
|
||||||
|
}
|
||||||
|
defer a.Close()
|
||||||
|
if _, err := a.CreateRoom("room.nkey", room.ModeNATS); err != nil {
|
||||||
|
t.Fatalf("registered peer should operate: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: an unregistered identity is refused at connect time.
|
||||||
|
idB := mustIdentity(t)
|
||||||
|
if _, err := client.NewWithOptions(h.natsURL, h.ctrlURL, idB, client.Options{UseNkey: true}); err == nil {
|
||||||
|
t.Fatalf("unregistered peer must be refused by the NATS authenticator")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: presenting no nkey to an auth-required server is refused.
|
||||||
|
if _, err := client.NewWithOptions(h.natsURL, h.ctrlURL, idB, client.Options{UseNkey: false}); err == nil {
|
||||||
|
t.Fatalf("a client without an nkey must be refused when the server requires auth")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Edge: revoke A while the server runs; A's NEXT connection is refused even
|
||||||
|
// though an already-open connection (a) is unaffected. No server restart.
|
||||||
|
if err := h.store.RevokeUser(hex.EncodeToString(idA.SignPub)); err != nil {
|
||||||
|
t.Fatalf("revoke A: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := client.NewWithOptions(h.natsURL, h.ctrlURL, idA, client.Options{UseNkey: true}); err == nil {
|
||||||
|
t.Fatalf("revoked peer must be refused on a new connection without a restart")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ---- test helpers ---------------------------------------------------------
|
// ---- test helpers ---------------------------------------------------------
|
||||||
|
|
||||||
type collector struct {
|
type collector struct {
|
||||||
|
|||||||
@@ -0,0 +1,87 @@
|
|||||||
|
package client_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/tls"
|
||||||
|
"crypto/x509"
|
||||||
|
"net/http/httptest"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
"github.com/enmanuel/unibus/pkg/client"
|
||||||
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
"github.com/enmanuel/unibus/pkg/room"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestConnectRequiresHTTPSWithCA covers audit H5's client contract: when a CA is
|
||||||
|
// provided the control-plane URL must be https://. A signed request gives
|
||||||
|
// integrity but not confidentiality, so silently talking http:// to a bus the
|
||||||
|
// operator secured with a CA would leak all metadata to a MITM. Connect refuses
|
||||||
|
// the plaintext URL outright (error path; the scheme is checked before any
|
||||||
|
// network use, so a bogus CA path is irrelevant).
|
||||||
|
func TestConnectRequiresHTTPSWithCA(t *testing.T) {
|
||||||
|
_, err := client.Connect("nats://127.0.0.1:4222", "http://127.0.0.1:8470", mustIdentity(t), "/nonexistent/ca.crt")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("Connect with a CA and an http:// control plane must be refused")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "https") {
|
||||||
|
t.Fatalf("error should point the caller at https, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestControlPlaneOverTLS proves the control plane works over TLS pinned to the
|
||||||
|
// bus CA (golden) and that a client lacking the CA cannot complete the handshake
|
||||||
|
// (error path) — so a network observer can neither read nor inject control-plane
|
||||||
|
// traffic. The data plane is left plaintext here to isolate the HTTP-TLS wiring.
|
||||||
|
func TestControlPlaneOverTLS(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
blobs, err := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("blobs: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: filepath.Join(dir, "js"), Host: "127.0.0.1", Port: freePort(t),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nats: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
natsURL := embeddednats.ClientURL(ns)
|
||||||
|
|
||||||
|
// An https control plane wrapping the real membership server.
|
||||||
|
ts := httptest.NewTLSServer(membership.NewServer(store, blobs, membership.AuthOff))
|
||||||
|
t.Cleanup(ts.Close)
|
||||||
|
|
||||||
|
pool := x509.NewCertPool()
|
||||||
|
pool.AddCert(ts.Certificate())
|
||||||
|
|
||||||
|
// Golden: trusting the control-plane CA, an https control-plane request works.
|
||||||
|
good, err := client.NewWithOptions(natsURL, ts.URL, mustIdentity(t),
|
||||||
|
client.Options{CtrlTLS: &tls.Config{RootCAs: pool, MinVersion: tls.VersionTLS12}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect with the pinned CA: %v", err)
|
||||||
|
}
|
||||||
|
defer good.Close()
|
||||||
|
if _, err := good.CreateRoom("room.tls.ctrl", room.ModeNATS); err != nil {
|
||||||
|
t.Fatalf("control plane over TLS should succeed with the pinned CA: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: without the CA the https handshake fails, so the request errors.
|
||||||
|
bad, err := client.NewWithOptions(natsURL, ts.URL, mustIdentity(t),
|
||||||
|
client.Options{CtrlTLS: &tls.Config{RootCAs: x509.NewCertPool(), MinVersion: tls.VersionTLS12}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nats connect (bad CA case): %v", err)
|
||||||
|
}
|
||||||
|
defer bad.Close()
|
||||||
|
if _, err := bad.CreateRoom("room.tls.fail", room.ModeNATS); err == nil {
|
||||||
|
t.Fatalf("a control-plane request without the CA must fail the TLS handshake")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,124 @@
|
|||||||
|
package client_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/client"
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
"github.com/enmanuel/unibus/pkg/room"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestAudit_NoSubjectACL ports the auditor's H4 (Alto) finding under the minimum
|
||||||
|
// defense chosen for this issue (forbid cleartext rooms in public; see
|
||||||
|
// dev/0004d-dataplane-acl.md). The NATS data plane still has no per-subject ACL,
|
||||||
|
// so the guarantee we make is CONTENT confidentiality, proven three ways:
|
||||||
|
//
|
||||||
|
// error : a cleartext (ModeNATS) room cannot be created under the public posture;
|
||||||
|
// golden: a legitimate member (bob) decrypts the secret;
|
||||||
|
// edge : eve, sniffing the raw subject off the data plane, receives only
|
||||||
|
// ciphertext — she never recovers the plaintext the auditor's eve did.
|
||||||
|
func TestAudit_NoSubjectACL(t *testing.T) {
|
||||||
|
h := newHarness(t)
|
||||||
|
h.srv.RequireEncryptedRooms = true // the public posture
|
||||||
|
waitHealth(t, h.ctrlURL)
|
||||||
|
|
||||||
|
alice, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect alice: %v", err)
|
||||||
|
}
|
||||||
|
defer alice.Close()
|
||||||
|
|
||||||
|
// Error path: a cleartext room is refused, so no payload ever rides a subject
|
||||||
|
// in the clear for a sniffer to read (the exact vector the auditor exploited).
|
||||||
|
if _, err := alice.CreateRoom("secret.subject.payroll", room.ModeNATS); err == nil {
|
||||||
|
t.Fatalf("cleartext room must be refused on a public deployment")
|
||||||
|
}
|
||||||
|
|
||||||
|
// alice creates an encrypted room and invites bob (the legitimate reader).
|
||||||
|
const subject = "secret.subject.payroll.e2e"
|
||||||
|
const secret = "internal: salary numbers"
|
||||||
|
roomID, err := alice.CreateRoom(subject, room.ModeMatrix)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("alice create encrypted room: %v", err)
|
||||||
|
}
|
||||||
|
bob, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect bob: %v", err)
|
||||||
|
}
|
||||||
|
defer bob.Close()
|
||||||
|
if err := alice.Invite(roomID, bob.Endpoint()); err != nil {
|
||||||
|
t.Fatalf("alice invite bob: %v", err)
|
||||||
|
}
|
||||||
|
if err := bob.Join(roomID); err != nil {
|
||||||
|
t.Fatalf("bob join: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden: bob (a member) subscribes and decrypts the secret.
|
||||||
|
var bmu sync.Mutex
|
||||||
|
var bobGot []string
|
||||||
|
bobSub, err := bob.Subscribe(roomID, func(_ frame.Frame, plaintext []byte) {
|
||||||
|
bmu.Lock()
|
||||||
|
bobGot = append(bobGot, string(plaintext))
|
||||||
|
bmu.Unlock()
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("bob subscribe: %v", err)
|
||||||
|
}
|
||||||
|
defer bobSub.Unsubscribe()
|
||||||
|
|
||||||
|
// Edge: eve sniffs the raw subject directly off NATS (no membership, no key).
|
||||||
|
rawEve, err := nats.Connect(h.natsURL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("eve raw connect: %v", err)
|
||||||
|
}
|
||||||
|
defer rawEve.Close()
|
||||||
|
eveGot := make(chan []byte, 8)
|
||||||
|
if _, err := rawEve.Subscribe(subject, func(m *nats.Msg) { eveGot <- m.Data }); err != nil {
|
||||||
|
t.Fatalf("eve raw subscribe: %v", err)
|
||||||
|
}
|
||||||
|
if err := rawEve.Flush(); err != nil {
|
||||||
|
t.Fatalf("eve flush: %v", err)
|
||||||
|
}
|
||||||
|
time.Sleep(200 * time.Millisecond) // let both subscriptions settle
|
||||||
|
|
||||||
|
if err := alice.Publish(roomID, []byte(secret)); err != nil {
|
||||||
|
t.Fatalf("alice publish: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// bob must decrypt the secret.
|
||||||
|
if !waitFor(&bmu, &bobGot, func(rs []string) bool {
|
||||||
|
for _, r := range rs {
|
||||||
|
if r == secret {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}, 2*time.Second) {
|
||||||
|
t.Fatalf("bob (member) should decrypt the secret; got %v", snapshot(&bmu, &bobGot))
|
||||||
|
}
|
||||||
|
|
||||||
|
// eve must receive only ciphertext — never the plaintext.
|
||||||
|
select {
|
||||||
|
case data := <-eveGot:
|
||||||
|
if bytes.Contains(data, []byte(secret)) {
|
||||||
|
t.Fatalf("eve sniffed the plaintext off the data plane: %q", data)
|
||||||
|
}
|
||||||
|
f, err := frame.Unmarshal(data)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("eve received an undecodable frame: %v", err)
|
||||||
|
}
|
||||||
|
if string(f.Payload) == secret {
|
||||||
|
t.Fatalf("eve read the secret from the frame payload")
|
||||||
|
}
|
||||||
|
if len(f.Nonce) == 0 {
|
||||||
|
t.Fatalf("expected an AEAD-encrypted payload (non-empty nonce), got cleartext frame")
|
||||||
|
}
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
// eve receiving nothing is also a safe outcome; the assertion is only that
|
||||||
|
// she never gets the plaintext, which holds vacuously here.
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,185 @@
|
|||||||
|
package client_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http/httptest"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
"github.com/enmanuel/unibus/pkg/client"
|
||||||
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
"github.com/enmanuel/unibus/pkg/room"
|
||||||
|
server "github.com/nats-io/nats-server/v2/server"
|
||||||
|
)
|
||||||
|
|
||||||
|
// startClusterNode boots a clustered embedded NATS node (auth off, no route TLS:
|
||||||
|
// this test exercises client failover, not route security — that is covered in
|
||||||
|
// pkg/embeddednats).
|
||||||
|
func startClusterNode(t *testing.T, name string, clientPort, routePort int, peerRoutePorts []int) *server.Server {
|
||||||
|
t.Helper()
|
||||||
|
routes := make([]string, 0, len(peerRoutePorts))
|
||||||
|
for _, p := range peerRoutePorts {
|
||||||
|
routes = append(routes, fmt.Sprintf("nats://127.0.0.1:%d", p))
|
||||||
|
}
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(),
|
||||||
|
Host: "127.0.0.1",
|
||||||
|
Port: clientPort,
|
||||||
|
ServerName: name,
|
||||||
|
Cluster: &embeddednats.ClusterConfig{Name: "unibus-failover", Host: "127.0.0.1", Port: routePort, Routes: routes},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start node %s: %v", name, err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
return ns
|
||||||
|
}
|
||||||
|
|
||||||
|
func waitClusterRoutes(t *testing.T, ns *server.Server) {
|
||||||
|
t.Helper()
|
||||||
|
deadline := time.Now().Add(8 * time.Second)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
if ns.NumRoutes() >= 1 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
}
|
||||||
|
t.Fatalf("node %q never formed a route", ns.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
// portOf extracts the :port of a nats URL for matching ConnectedServer() (which
|
||||||
|
// may report a different host spelling than ClientURL()).
|
||||||
|
func portOf(natsURL string) string {
|
||||||
|
i := strings.LastIndex(natsURL, ":")
|
||||||
|
if i < 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return natsURL[i+1:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestClientFailoverAcrossNodes is the issue's edge case: a client connected to
|
||||||
|
// node A keeps its session when A is killed — nats.go reconnects it to node B
|
||||||
|
// and it keeps receiving messages published on the surviving node.
|
||||||
|
func TestClientFailoverAcrossNodes(t *testing.T) {
|
||||||
|
rp0, rp1 := freePort(t), freePort(t)
|
||||||
|
p0, p1 := freePort(t), freePort(t)
|
||||||
|
n0 := startClusterNode(t, "n0", p0, rp0, []int{rp1})
|
||||||
|
n1 := startClusterNode(t, "n1", p1, rp1, []int{rp0})
|
||||||
|
waitClusterRoutes(t, n0)
|
||||||
|
waitClusterRoutes(t, n1)
|
||||||
|
nodes := map[string]*server.Server{strconv.Itoa(p0): n0, strconv.Itoa(p1): n1}
|
||||||
|
|
||||||
|
// Control plane: one in-process membershipd (metadata only; the data plane is
|
||||||
|
// the NATS cluster). Auth off keeps the test focused on data-plane failover.
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
blobs, err := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("blobs: %v", err)
|
||||||
|
}
|
||||||
|
ctrl := httptest.NewServer(membership.NewServer(store, blobs, membership.AuthOff))
|
||||||
|
t.Cleanup(ctrl.Close)
|
||||||
|
|
||||||
|
url0 := n0.ClientURL()
|
||||||
|
url1 := n1.ClientURL()
|
||||||
|
|
||||||
|
// A seeds BOTH nodes (failover list); B connects directly to n1.
|
||||||
|
a, err := client.NewWithOptions(url0, ctrl.URL, mustIdentity(t), client.Options{NatsServers: []string{url1}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect A: %v", err)
|
||||||
|
}
|
||||||
|
defer a.Close()
|
||||||
|
b, err := client.NewWithOptions(url1, ctrl.URL, mustIdentity(t), client.Options{NatsServers: []string{url0}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect B: %v", err)
|
||||||
|
}
|
||||||
|
defer b.Close()
|
||||||
|
|
||||||
|
roomID, err := a.CreateRoom("room.failover", room.ModeNATS)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("A create room: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
var got []string
|
||||||
|
sub, err := a.Subscribe(roomID, func(_ frame.Frame, plaintext []byte) {
|
||||||
|
mu.Lock()
|
||||||
|
got = append(got, string(plaintext))
|
||||||
|
mu.Unlock()
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("A subscribe: %v", err)
|
||||||
|
}
|
||||||
|
defer sub.Unsubscribe()
|
||||||
|
time.Sleep(200 * time.Millisecond)
|
||||||
|
|
||||||
|
// Pre-kill sanity: B publishes, A receives across the cluster.
|
||||||
|
if err := b.Publish(roomID, []byte("before-kill")); err != nil {
|
||||||
|
t.Fatalf("B publish 1: %v", err)
|
||||||
|
}
|
||||||
|
if !waitFor(&mu, &got, func(rs []string) bool { return contains(rs, "before-kill") }, 3*time.Second) {
|
||||||
|
t.Fatalf("A did not receive the pre-kill message; got %v", snapshot(&mu, &got))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Identify and KILL the node A is attached to, forcing a reconnect.
|
||||||
|
attached := a.ConnectedServer()
|
||||||
|
killPort := portOf(attached)
|
||||||
|
victim, ok := nodes[killPort]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("A is attached to an unknown node %q (port %q)", attached, killPort)
|
||||||
|
}
|
||||||
|
survivorURL := url1
|
||||||
|
if killPort == strconv.Itoa(p1) {
|
||||||
|
survivorURL = url0
|
||||||
|
}
|
||||||
|
victim.Shutdown()
|
||||||
|
victim.WaitForShutdown()
|
||||||
|
|
||||||
|
// A must reconnect to the surviving node.
|
||||||
|
deadline := time.Now().Add(8 * time.Second)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
if a.IsConnected() && portOf(a.ConnectedServer()) == portOf(survivorURL) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
}
|
||||||
|
if !a.IsConnected() || portOf(a.ConnectedServer()) != portOf(survivorURL) {
|
||||||
|
t.Fatalf("A did not fail over to the surviving node (now on %q, want port %s)", a.ConnectedServer(), portOf(survivorURL))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make B publish from the surviving node and confirm A still receives —
|
||||||
|
// the session (its subscription) survived the failover.
|
||||||
|
if survivorURL == url0 {
|
||||||
|
// B's primary was n1 (killed); ensure B is on the survivor too.
|
||||||
|
deadline := time.Now().Add(8 * time.Second)
|
||||||
|
for time.Now().Before(deadline) && portOf(b.ConnectedServer()) != portOf(survivorURL) {
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := b.Publish(roomID, []byte("after-kill")); err != nil {
|
||||||
|
t.Fatalf("B publish 2: %v", err)
|
||||||
|
}
|
||||||
|
if !waitFor(&mu, &got, func(rs []string) bool { return contains(rs, "after-kill") }, 6*time.Second) {
|
||||||
|
t.Fatalf("A did not receive a message after failover; got %v", snapshot(&mu, &got))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func contains(rs []string, want string) bool {
|
||||||
|
for _, r := range rs {
|
||||||
|
if r == want {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
package client_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/client"
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
"github.com/enmanuel/unibus/pkg/room"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestReaudit_SigNilSpoof ports the re-auditor's N3 (Alto) finding: in a room
|
||||||
|
// that REQUIRES per-message signatures, an attacker with data-plane access
|
||||||
|
// publishes a raw frame with Sig==nil and a forged Sender. Before the fix
|
||||||
|
// processFrame verified the signature only when one was present
|
||||||
|
// (`SignMsgs && f.Sig != nil`), so the receiver accepted the unsigned, forged
|
||||||
|
// frame as authentic. The fix drops any unsigned frame in a SignMsgs room.
|
||||||
|
//
|
||||||
|
// Coverage:
|
||||||
|
// - golden: a properly signed frame from a real member IS delivered;
|
||||||
|
// - error : an unsigned frame with a forged Sender in a SignMsgs room is DROPPED;
|
||||||
|
// - edge : a room WITHOUT SignMsgs still delivers an unsigned frame (the drop
|
||||||
|
// is specific to signed rooms, not a blanket reject of unsigned frames).
|
||||||
|
func TestReaudit_SigNilSpoof(t *testing.T) {
|
||||||
|
h := newHarness(t)
|
||||||
|
waitHealth(t, h.ctrlURL)
|
||||||
|
|
||||||
|
alice, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect alice: %v", err)
|
||||||
|
}
|
||||||
|
defer alice.Close()
|
||||||
|
bob, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect bob: %v", err)
|
||||||
|
}
|
||||||
|
defer bob.Close()
|
||||||
|
|
||||||
|
// A signed-but-NOT-encrypted room: SignMsgs enforces authorship, and the lack
|
||||||
|
// of encryption is exactly the case the auditor flagged as Alto (any peer with
|
||||||
|
// the subject can forge a sender if signatures are not strictly required).
|
||||||
|
const subject = "room.signed.spoof"
|
||||||
|
signedPolicy := room.Policy{Encrypt: false, Persist: false, SignMsgs: true}
|
||||||
|
roomID, err := alice.CreateRoom(subject, signedPolicy)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("alice create signed room: %v", err)
|
||||||
|
}
|
||||||
|
if err := alice.Invite(roomID, bob.Endpoint()); err != nil {
|
||||||
|
t.Fatalf("alice invite bob: %v", err)
|
||||||
|
}
|
||||||
|
if err := bob.Join(roomID); err != nil {
|
||||||
|
t.Fatalf("bob join: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
var got []string
|
||||||
|
sub, err := bob.Subscribe(roomID, func(_ frame.Frame, plaintext []byte) {
|
||||||
|
mu.Lock()
|
||||||
|
got = append(got, string(plaintext))
|
||||||
|
mu.Unlock()
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("bob subscribe: %v", err)
|
||||||
|
}
|
||||||
|
defer sub.Unsubscribe()
|
||||||
|
time.Sleep(150 * time.Millisecond)
|
||||||
|
|
||||||
|
// Attacker: a raw NATS connection (the dev harness leaves the data plane open),
|
||||||
|
// no identity, forged Sender, NO signature.
|
||||||
|
const spoofMsg = "I am totally the victim"
|
||||||
|
rawAtk, err := nats.Connect(h.natsURL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("attacker raw connect: %v", err)
|
||||||
|
}
|
||||||
|
defer rawAtk.Close()
|
||||||
|
spoof := frame.Frame{
|
||||||
|
Type: frame.PUB,
|
||||||
|
Subject: subject,
|
||||||
|
Sender: "victim-forged-endpoint",
|
||||||
|
MsgID: "spoof-1",
|
||||||
|
Epoch: 1,
|
||||||
|
Payload: []byte(spoofMsg),
|
||||||
|
// Sig intentionally nil — this is the attack.
|
||||||
|
}
|
||||||
|
sb, err := spoof.Marshal()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal spoof: %v", err)
|
||||||
|
}
|
||||||
|
if err := rawAtk.Publish(subject, sb); err != nil {
|
||||||
|
t.Fatalf("attacker publish: %v", err)
|
||||||
|
}
|
||||||
|
_ = rawAtk.Flush()
|
||||||
|
|
||||||
|
// Golden: alice's properly signed frame must be delivered.
|
||||||
|
const goodMsg = "authentic from alice"
|
||||||
|
if err := alice.Publish(roomID, []byte(goodMsg)); err != nil {
|
||||||
|
t.Fatalf("alice publish: %v", err)
|
||||||
|
}
|
||||||
|
if !waitFor(&mu, &got, func(rs []string) bool {
|
||||||
|
for _, r := range rs {
|
||||||
|
if r == goodMsg {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}, 2*time.Second) {
|
||||||
|
t.Fatalf("a properly signed frame should be delivered; got %v", snapshot(&mu, &got))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: the unsigned, forged frame must NEVER reach the handler.
|
||||||
|
for _, r := range snapshot(&mu, &got) {
|
||||||
|
if r == spoofMsg {
|
||||||
|
t.Fatalf("SIG-NIL SPOOF: receiver accepted an unsigned frame with a forged Sender in a SignMsgs room")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Edge: a room WITHOUT SignMsgs still delivers an unsigned raw frame, proving
|
||||||
|
// the drop is scoped to signed rooms and did not break the plain-NATS path.
|
||||||
|
const subjectOpen = "room.open.nosig"
|
||||||
|
openRoom, err := alice.CreateRoom(subjectOpen, room.ModeNATS)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("alice create open room: %v", err)
|
||||||
|
}
|
||||||
|
openCol := subscribeCollect(t, alice, openRoom)
|
||||||
|
defer openCol.sub.Unsubscribe()
|
||||||
|
time.Sleep(150 * time.Millisecond)
|
||||||
|
|
||||||
|
const openMsg = "unsigned but allowed here"
|
||||||
|
openFrame := frame.Frame{
|
||||||
|
Type: frame.PUB,
|
||||||
|
Subject: subjectOpen,
|
||||||
|
Sender: "anyone",
|
||||||
|
MsgID: "open-1",
|
||||||
|
Payload: []byte(openMsg),
|
||||||
|
// no Sig — fine in a non-signed room
|
||||||
|
}
|
||||||
|
ob, _ := openFrame.Marshal()
|
||||||
|
if err := rawAtk.Publish(subjectOpen, ob); err != nil {
|
||||||
|
t.Fatalf("publish open frame: %v", err)
|
||||||
|
}
|
||||||
|
_ = rawAtk.Flush()
|
||||||
|
if !waitFor(&openCol.mu, &openCol.msgs, func(rs []string) bool {
|
||||||
|
for _, r := range rs {
|
||||||
|
if r == openMsg {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}, 2*time.Second) {
|
||||||
|
t.Fatalf("an unsigned frame in a non-signed room should be delivered; got %v", snapshot(&openCol.mu, &openCol.msgs))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,185 @@
|
|||||||
|
package client_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/ecdsa"
|
||||||
|
"crypto/elliptic"
|
||||||
|
"crypto/rand"
|
||||||
|
"crypto/tls"
|
||||||
|
"crypto/x509"
|
||||||
|
"crypto/x509/pkix"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/pem"
|
||||||
|
"math/big"
|
||||||
|
"net"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/client"
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
"github.com/enmanuel/unibus/pkg/room"
|
||||||
|
)
|
||||||
|
|
||||||
|
// genTestCA mints a throwaway self-signed CA plus a server certificate (SAN
|
||||||
|
// 127.0.0.1 / localhost) signed by it, mirroring deploy/tls/generate-certs.sh
|
||||||
|
// without shelling out to openssl. It returns the server's *tls.Config (cert it
|
||||||
|
// presents) and the CA pool a client must trust to complete the handshake.
|
||||||
|
func genTestCA(t *testing.T) (server *tls.Config, caPool *x509.CertPool) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
// --- CA ---
|
||||||
|
caKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ca key: %v", err)
|
||||||
|
}
|
||||||
|
caTmpl := &x509.Certificate{
|
||||||
|
SerialNumber: big.NewInt(1),
|
||||||
|
Subject: pkix.Name{CommonName: "unibus-test-ca"},
|
||||||
|
NotBefore: time.Now().Add(-time.Hour),
|
||||||
|
NotAfter: time.Now().Add(24 * time.Hour),
|
||||||
|
IsCA: true,
|
||||||
|
KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature,
|
||||||
|
BasicConstraintsValid: true,
|
||||||
|
}
|
||||||
|
caDER, err := x509.CreateCertificate(rand.Reader, caTmpl, caTmpl, &caKey.PublicKey, caKey)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ca cert: %v", err)
|
||||||
|
}
|
||||||
|
caCert, err := x509.ParseCertificate(caDER)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse ca: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- server cert signed by the CA ---
|
||||||
|
srvKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("server key: %v", err)
|
||||||
|
}
|
||||||
|
srvTmpl := &x509.Certificate{
|
||||||
|
SerialNumber: big.NewInt(2),
|
||||||
|
Subject: pkix.Name{CommonName: "unibus-test-server"},
|
||||||
|
NotBefore: time.Now().Add(-time.Hour),
|
||||||
|
NotAfter: time.Now().Add(24 * time.Hour),
|
||||||
|
KeyUsage: x509.KeyUsageDigitalSignature,
|
||||||
|
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
|
||||||
|
DNSNames: []string{"localhost"},
|
||||||
|
IPAddresses: []net.IP{net.IPv4(127, 0, 0, 1)},
|
||||||
|
}
|
||||||
|
srvDER, err := x509.CreateCertificate(rand.Reader, srvTmpl, caCert, &srvKey.PublicKey, caKey)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("server cert: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
srvCertPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: srvDER})
|
||||||
|
srvKeyDER, err := x509.MarshalECPrivateKey(srvKey)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal server key: %v", err)
|
||||||
|
}
|
||||||
|
srvKeyPEM := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: srvKeyDER})
|
||||||
|
srvPair, err := tls.X509KeyPair(srvCertPEM, srvKeyPEM)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("server keypair: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pool := x509.NewCertPool()
|
||||||
|
pool.AddCert(caCert)
|
||||||
|
return &tls.Config{Certificates: []tls.Certificate{srvPair}, MinVersion: tls.VersionTLS12}, pool
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNatsTLS validates the TLS data plane: a client trusting the bus CA
|
||||||
|
// completes the handshake and uses the bus (golden); a client that does NOT
|
||||||
|
// trust the CA fails the handshake (error path).
|
||||||
|
func TestNatsTLS(t *testing.T) {
|
||||||
|
serverTLS, caPool := genTestCA(t)
|
||||||
|
h := bootHarness(t, membership.AuthOff, false, serverTLS)
|
||||||
|
waitHealth(t, h.ctrlURL)
|
||||||
|
|
||||||
|
// Golden: client pinning the CA connects over TLS and operates.
|
||||||
|
clientTLS := &tls.Config{RootCAs: caPool, MinVersion: tls.VersionTLS12}
|
||||||
|
a, err := client.NewWithOptions(h.natsURL, h.ctrlURL, mustIdentity(t), client.Options{TLS: clientTLS})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("client trusting the CA should complete the TLS handshake: %v", err)
|
||||||
|
}
|
||||||
|
defer a.Close()
|
||||||
|
if _, err := a.CreateRoom("room.tls", room.ModeNATS); err != nil {
|
||||||
|
t.Fatalf("TLS client should operate on the bus: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: a client that does not trust the CA fails the handshake. Use an
|
||||||
|
// empty pool (system roots would also reject this private CA, but an empty
|
||||||
|
// pool makes the intent explicit and avoids depending on the host's roots).
|
||||||
|
badTLS := &tls.Config{RootCAs: x509.NewCertPool(), MinVersion: tls.VersionTLS12}
|
||||||
|
if _, err := client.NewWithOptions(h.natsURL, h.ctrlURL, mustIdentity(t), client.Options{TLS: badTLS}); err == nil {
|
||||||
|
t.Fatalf("client without the CA must fail the TLS handshake")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSecureBusEndToEnd is the headline golden of issue 0001: with ALL three
|
||||||
|
// layers active at once — control-plane request signing (enforce), NATS nkey
|
||||||
|
// auth, and TLS — two registered peers run an encrypted room end to end. A
|
||||||
|
// creates a Matrix-policy room, invites B, A publishes and B decrypts. This
|
||||||
|
// proves the layers compose: signed HTTP control plane + authenticated,
|
||||||
|
// encrypted data plane + E2E room content.
|
||||||
|
func TestSecureBusEndToEnd(t *testing.T) {
|
||||||
|
serverTLS, caPool := genTestCA(t)
|
||||||
|
h := bootHarness(t, membership.AuthEnforce, true, serverTLS)
|
||||||
|
waitHealth(t, h.ctrlURL)
|
||||||
|
|
||||||
|
clientTLS := &tls.Config{RootCAs: caPool, MinVersion: tls.VersionTLS12}
|
||||||
|
secure := func(t *testing.T, handle string) (*client.Client, membership.AuthMode) {
|
||||||
|
id := mustIdentity(t)
|
||||||
|
if err := h.store.AddUser(hex.EncodeToString(id.SignPub), handle, membership.RoleMember); err != nil {
|
||||||
|
t.Fatalf("register %s: %v", handle, err)
|
||||||
|
}
|
||||||
|
c, err := client.NewWithOptions(h.natsURL, h.ctrlURL, id, client.Options{UseNkey: true, TLS: clientTLS})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect %s securely: %v", handle, err)
|
||||||
|
}
|
||||||
|
return c, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
a, _ := secure(t, "alice")
|
||||||
|
defer a.Close()
|
||||||
|
b, _ := secure(t, "bob")
|
||||||
|
defer b.Close()
|
||||||
|
|
||||||
|
roomID, err := a.CreateRoom("room.secure", room.ModeMatrix)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("A create encrypted room over secure bus: %v", err)
|
||||||
|
}
|
||||||
|
if err := a.Invite(roomID, b.Endpoint()); err != nil {
|
||||||
|
t.Fatalf("A invite B: %v", err)
|
||||||
|
}
|
||||||
|
if err := b.Join(roomID); err != nil {
|
||||||
|
t.Fatalf("B join: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
|
var got []string
|
||||||
|
sub, err := b.Subscribe(roomID, func(_ frame.Frame, plaintext []byte) {
|
||||||
|
mu.Lock()
|
||||||
|
got = append(got, string(plaintext))
|
||||||
|
mu.Unlock()
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("B subscribe: %v", err)
|
||||||
|
}
|
||||||
|
defer sub.Unsubscribe()
|
||||||
|
time.Sleep(150 * time.Millisecond)
|
||||||
|
|
||||||
|
const msg = "mensaje sobre bus seguro (auth+TLS+E2E)"
|
||||||
|
if err := a.Publish(roomID, []byte(msg)); err != nil {
|
||||||
|
t.Fatalf("A publish: %v", err)
|
||||||
|
}
|
||||||
|
if !waitFor(&mu, &got, func(rs []string) bool {
|
||||||
|
for _, r := range rs {
|
||||||
|
if r == msg {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}, 2*time.Second) {
|
||||||
|
t.Fatalf("B did not receive/decrypt the message over the secured bus; got %v", snapshot(&mu, &got))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,344 @@
|
|||||||
|
package embeddednats_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/ecdsa"
|
||||||
|
"crypto/elliptic"
|
||||||
|
"crypto/rand"
|
||||||
|
"crypto/x509"
|
||||||
|
"crypto/x509/pkix"
|
||||||
|
"encoding/pem"
|
||||||
|
"fmt"
|
||||||
|
"math/big"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
server "github.com/nats-io/nats-server/v2/server"
|
||||||
|
)
|
||||||
|
|
||||||
|
// freePort returns an OS-assigned free TCP port on loopback.
|
||||||
|
func freePort(t *testing.T) int {
|
||||||
|
t.Helper()
|
||||||
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("free port: %v", err)
|
||||||
|
}
|
||||||
|
defer l.Close()
|
||||||
|
return l.Addr().(*net.TCPAddr).Port
|
||||||
|
}
|
||||||
|
|
||||||
|
// startNode boots a clustered embedded NATS node. peerRoutePorts are the route
|
||||||
|
// ports of the OTHER nodes; user/pass gate the route layer (empty disables it);
|
||||||
|
// routeTLS, when non-nil, secures the routes with mutual TLS.
|
||||||
|
func startNode(t *testing.T, name string, clientPort, routePort int, peerRoutePorts []int, user, pass string, routeTLS *clusterTLS) *server.Server {
|
||||||
|
t.Helper()
|
||||||
|
routes := make([]string, 0, len(peerRoutePorts))
|
||||||
|
for _, p := range peerRoutePorts {
|
||||||
|
// Carry the cluster credentials in the route URL so this node
|
||||||
|
// authenticates outbound to its peers' route listeners.
|
||||||
|
if user != "" {
|
||||||
|
routes = append(routes, fmt.Sprintf("nats://%s:%s@127.0.0.1:%d", user, pass, p))
|
||||||
|
} else {
|
||||||
|
routes = append(routes, fmt.Sprintf("nats://127.0.0.1:%d", p))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cc := &embeddednats.ClusterConfig{
|
||||||
|
Name: "unibus-test",
|
||||||
|
Host: "127.0.0.1",
|
||||||
|
Port: routePort,
|
||||||
|
Routes: routes,
|
||||||
|
Username: user,
|
||||||
|
Password: pass,
|
||||||
|
}
|
||||||
|
if routeTLS != nil {
|
||||||
|
cfg, err := busauth.RouteTLSConfig(routeTLS.cert, routeTLS.key, routeTLS.ca)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("route TLS for %s: %v", name, err)
|
||||||
|
}
|
||||||
|
cc.TLS = cfg
|
||||||
|
}
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(),
|
||||||
|
Host: "127.0.0.1",
|
||||||
|
Port: clientPort,
|
||||||
|
ServerName: name,
|
||||||
|
Cluster: cc,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("start node %s: %v", name, err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
return ns
|
||||||
|
}
|
||||||
|
|
||||||
|
// waitRoutes waits until ns has at least want established routes, or fails.
|
||||||
|
func waitRoutes(t *testing.T, ns *server.Server, want int) {
|
||||||
|
t.Helper()
|
||||||
|
deadline := time.Now().Add(8 * time.Second)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
if ns.NumRoutes() >= want {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
}
|
||||||
|
t.Fatalf("node %q never reached %d routes (have %d)", ns.Name(), want, ns.NumRoutes())
|
||||||
|
}
|
||||||
|
|
||||||
|
// stableRouteCount waits for ns's route count to stop changing (the NATS route
|
||||||
|
// pool opens several connections per peer asynchronously) and returns it, so a
|
||||||
|
// test can use it as a baseline that an impostor must not increase.
|
||||||
|
func stableRouteCount(t *testing.T, ns *server.Server) int {
|
||||||
|
t.Helper()
|
||||||
|
prev := -1
|
||||||
|
stableSince := time.Now()
|
||||||
|
deadline := time.Now().Add(5 * time.Second)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
n := ns.NumRoutes()
|
||||||
|
if n != prev {
|
||||||
|
prev = n
|
||||||
|
stableSince = time.Now()
|
||||||
|
} else if time.Since(stableSince) >= 750*time.Millisecond {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
}
|
||||||
|
return prev
|
||||||
|
}
|
||||||
|
|
||||||
|
// pubSubAcrossNodes connects a subscriber to subURL and a publisher to pubURL,
|
||||||
|
// publishes one message on subject, and reports whether it arrived within 3s.
|
||||||
|
// This proves the cluster forwards client subjects between nodes.
|
||||||
|
func pubSubAcrossNodes(t *testing.T, subURL, pubURL, subject, payload string) bool {
|
||||||
|
t.Helper()
|
||||||
|
subConn, err := nats.Connect(subURL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("subscriber connect %s: %v", subURL, err)
|
||||||
|
}
|
||||||
|
defer subConn.Close()
|
||||||
|
got := make(chan string, 1)
|
||||||
|
if _, err := subConn.Subscribe(subject, func(m *nats.Msg) {
|
||||||
|
select {
|
||||||
|
case got <- string(m.Data):
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("subscribe: %v", err)
|
||||||
|
}
|
||||||
|
if err := subConn.Flush(); err != nil {
|
||||||
|
t.Fatalf("flush sub: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pubConn, err := nats.Connect(pubURL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("publisher connect %s: %v", pubURL, err)
|
||||||
|
}
|
||||||
|
defer pubConn.Close()
|
||||||
|
// Retry the publish for a moment: route interest propagation across the
|
||||||
|
// cluster is asynchronous, so the very first publish can race the gossip.
|
||||||
|
deadline := time.Now().Add(3 * time.Second)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
if err := pubConn.Publish(subject, []byte(payload)); err != nil {
|
||||||
|
t.Fatalf("publish: %v", err)
|
||||||
|
}
|
||||||
|
_ = pubConn.Flush()
|
||||||
|
select {
|
||||||
|
case v := <-got:
|
||||||
|
return v == payload
|
||||||
|
case <-time.After(100 * time.Millisecond):
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- golden: two-node cluster forwards client subjects across nodes ----------
|
||||||
|
|
||||||
|
func TestClusterForwardsAcrossNodes(t *testing.T) {
|
||||||
|
rp0, rp1 := freePort(t), freePort(t)
|
||||||
|
n0 := startNode(t, "n0", freePort(t), rp0, []int{rp1}, "clusteruser", "clusterpass", nil)
|
||||||
|
n1 := startNode(t, "n1", freePort(t), rp1, []int{rp0}, "clusteruser", "clusterpass", nil)
|
||||||
|
|
||||||
|
waitRoutes(t, n0, 1)
|
||||||
|
waitRoutes(t, n1, 1)
|
||||||
|
|
||||||
|
if !pubSubAcrossNodes(t, n0.ClientURL(), n1.ClientURL(), "test.cross", "hello-cluster") {
|
||||||
|
t.Fatalf("subject published on n1 did not reach subscriber on n0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- edge: three-node cluster (HA shape) forwards between non-adjacent nodes --
|
||||||
|
|
||||||
|
func TestClusterThreeNodesForward(t *testing.T) {
|
||||||
|
rp0, rp1, rp2 := freePort(t), freePort(t), freePort(t)
|
||||||
|
n0 := startNode(t, "n0", freePort(t), rp0, []int{rp1, rp2}, "u", "p", nil)
|
||||||
|
n1 := startNode(t, "n1", freePort(t), rp1, []int{rp0, rp2}, "u", "p", nil)
|
||||||
|
n2 := startNode(t, "n2", freePort(t), rp2, []int{rp0, rp1}, "u", "p", nil)
|
||||||
|
|
||||||
|
waitRoutes(t, n0, 2)
|
||||||
|
waitRoutes(t, n1, 2)
|
||||||
|
waitRoutes(t, n2, 2)
|
||||||
|
|
||||||
|
// Publish on n2, subscribe on n0: a message must traverse the cluster.
|
||||||
|
if !pubSubAcrossNodes(t, n0.ClientURL(), n2.ClientURL(), "test.ha", "three-node") {
|
||||||
|
t.Fatalf("subject published on n2 did not reach subscriber on n0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- error: a node with the wrong cluster password is rejected as a route -----
|
||||||
|
|
||||||
|
func TestClusterRejectsBadRouteAuth(t *testing.T) {
|
||||||
|
rp0, rp1 := freePort(t), freePort(t)
|
||||||
|
good := startNode(t, "good", freePort(t), rp0, []int{rp1}, "secret", "right", nil)
|
||||||
|
_ = startNode(t, "peer", freePort(t), rp1, []int{rp0}, "secret", "right", nil)
|
||||||
|
waitRoutes(t, good, 1)
|
||||||
|
// Let the route pool settle so the baseline count is stable (NATS opens a
|
||||||
|
// pool of route connections per peer, so NumRoutes counts connections, not
|
||||||
|
// distinct peers).
|
||||||
|
base := stableRouteCount(t, good)
|
||||||
|
|
||||||
|
// Impostor knows the addresses but not the cluster password. It tries to
|
||||||
|
// route to `good`; the route handshake must be rejected, so the impostor
|
||||||
|
// never establishes a route.
|
||||||
|
impostor := startNode(t, "impostor", freePort(t), freePort(t), []int{rp0}, "secret", "WRONG", nil)
|
||||||
|
|
||||||
|
// Give the route layer ample time to (fail to) connect, then assert it never
|
||||||
|
// formed: the impostor has zero routes, and `good`'s route count is unchanged
|
||||||
|
// (it did not accept a route from the impostor).
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
if n := impostor.NumRoutes(); n != 0 {
|
||||||
|
t.Fatalf("impostor with wrong cluster password formed %d routes, want 0", n)
|
||||||
|
}
|
||||||
|
if n := good.NumRoutes(); n != base {
|
||||||
|
t.Fatalf("legit node route count changed from %d to %d after impostor attempt (it accepted the impostor)", base, n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- golden (TLS): mutual-TLS routes forward across nodes ---------------------
|
||||||
|
|
||||||
|
func TestClusterMutualTLSForwards(t *testing.T) {
|
||||||
|
ca, caKey := genCA(t)
|
||||||
|
dir := t.TempDir()
|
||||||
|
tlsA := writeNodeCert(t, dir, "a", ca, caKey)
|
||||||
|
tlsB := writeNodeCert(t, dir, "b", ca, caKey)
|
||||||
|
|
||||||
|
rp0, rp1 := freePort(t), freePort(t)
|
||||||
|
n0 := startNode(t, "n0", freePort(t), rp0, []int{rp1}, "u", "p", tlsA)
|
||||||
|
n1 := startNode(t, "n1", freePort(t), rp1, []int{rp0}, "u", "p", tlsB)
|
||||||
|
|
||||||
|
waitRoutes(t, n0, 1)
|
||||||
|
waitRoutes(t, n1, 1)
|
||||||
|
|
||||||
|
if !pubSubAcrossNodes(t, n0.ClientURL(), n1.ClientURL(), "test.tls", "mtls-ok") {
|
||||||
|
t.Fatalf("subject did not cross the mutual-TLS cluster")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- error (TLS): a node whose cert is not signed by the bus CA cannot join ---
|
||||||
|
|
||||||
|
func TestClusterRejectsUnsignedNode(t *testing.T) {
|
||||||
|
ca, caKey := genCA(t)
|
||||||
|
dir := t.TempDir()
|
||||||
|
tlsGood := writeNodeCert(t, dir, "good", ca, caKey)
|
||||||
|
tlsPeer := writeNodeCert(t, dir, "peer", ca, caKey)
|
||||||
|
|
||||||
|
// The impostor signs its node cert with a DIFFERENT CA, and pins only that
|
||||||
|
// CA. The legit nodes' RequireAndVerifyClientCert against the bus CA rejects
|
||||||
|
// it; the impostor likewise rejects the legit node's cert. No route forms.
|
||||||
|
otherCA, otherKey := genCA(t)
|
||||||
|
tlsImpostor := writeNodeCert(t, dir, "impostor", otherCA, otherKey)
|
||||||
|
|
||||||
|
rp0, rp1 := freePort(t), freePort(t)
|
||||||
|
good := startNode(t, "good", freePort(t), rp0, []int{rp1}, "u", "p", tlsGood)
|
||||||
|
_ = startNode(t, "peer", freePort(t), rp1, []int{rp0}, "u", "p", tlsPeer)
|
||||||
|
waitRoutes(t, good, 1)
|
||||||
|
base := stableRouteCount(t, good)
|
||||||
|
|
||||||
|
impostor := startNode(t, "impostor", freePort(t), freePort(t), []int{rp0}, "u", "p", tlsImpostor)
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
if n := impostor.NumRoutes(); n != 0 {
|
||||||
|
t.Fatalf("impostor with unsigned cert formed %d routes, want 0", n)
|
||||||
|
}
|
||||||
|
if n := good.NumRoutes(); n != base {
|
||||||
|
t.Fatalf("legit node route count changed from %d to %d after unsigned impostor attempt (it accepted the impostor)", base, n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- cert helpers ------------------------------------------------------------
|
||||||
|
|
||||||
|
type clusterTLS struct{ cert, key, ca string } // PEM file paths
|
||||||
|
|
||||||
|
// genCA creates a self-signed ECDSA CA certificate and its key.
|
||||||
|
func genCA(t *testing.T) (*x509.Certificate, *ecdsa.PrivateKey) {
|
||||||
|
t.Helper()
|
||||||
|
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("gen CA key: %v", err)
|
||||||
|
}
|
||||||
|
tmpl := &x509.Certificate{
|
||||||
|
SerialNumber: big.NewInt(1),
|
||||||
|
Subject: pkix.Name{CommonName: "unibus-test-CA"},
|
||||||
|
NotBefore: time.Now().Add(-time.Hour),
|
||||||
|
NotAfter: time.Now().Add(24 * time.Hour),
|
||||||
|
KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature,
|
||||||
|
BasicConstraintsValid: true,
|
||||||
|
IsCA: true,
|
||||||
|
}
|
||||||
|
der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create CA cert: %v", err)
|
||||||
|
}
|
||||||
|
caCert, err := x509.ParseCertificate(der)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse CA cert: %v", err)
|
||||||
|
}
|
||||||
|
return caCert, key
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeNodeCert issues a node certificate signed by ca (SAN 127.0.0.1/::1,
|
||||||
|
// usable as both server and client) and writes cert/key/ca PEM files, returning
|
||||||
|
// their paths for RouteTLSConfig.
|
||||||
|
func writeNodeCert(t *testing.T, dir, name string, ca *x509.Certificate, caKey *ecdsa.PrivateKey) *clusterTLS {
|
||||||
|
t.Helper()
|
||||||
|
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("gen node key: %v", err)
|
||||||
|
}
|
||||||
|
tmpl := &x509.Certificate{
|
||||||
|
SerialNumber: big.NewInt(time.Now().UnixNano()),
|
||||||
|
Subject: pkix.Name{CommonName: name},
|
||||||
|
NotBefore: time.Now().Add(-time.Hour),
|
||||||
|
NotAfter: time.Now().Add(24 * time.Hour),
|
||||||
|
KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
|
||||||
|
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageClientAuth},
|
||||||
|
IPAddresses: []net.IP{net.ParseIP("127.0.0.1"), net.ParseIP("::1")},
|
||||||
|
DNSNames: []string{"localhost"},
|
||||||
|
}
|
||||||
|
der, err := x509.CreateCertificate(rand.Reader, tmpl, ca, &key.PublicKey, caKey)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create node cert: %v", err)
|
||||||
|
}
|
||||||
|
certPath := filepath.Join(dir, name+".crt")
|
||||||
|
keyPath := filepath.Join(dir, name+".key")
|
||||||
|
caPath := filepath.Join(dir, name+"-ca.crt")
|
||||||
|
|
||||||
|
writePEM(t, certPath, "CERTIFICATE", der)
|
||||||
|
keyDER, err := x509.MarshalECPrivateKey(key)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal node key: %v", err)
|
||||||
|
}
|
||||||
|
writePEM(t, keyPath, "EC PRIVATE KEY", keyDER)
|
||||||
|
writePEM(t, caPath, "CERTIFICATE", ca.Raw)
|
||||||
|
return &clusterTLS{cert: certPath, key: keyPath, ca: caPath}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writePEM(t *testing.T, path, blockType string, der []byte) {
|
||||||
|
t.Helper()
|
||||||
|
b := pem.EncodeToMemory(&pem.Block{Type: blockType, Bytes: der})
|
||||||
|
if err := os.WriteFile(path, b, 0o600); err != nil {
|
||||||
|
t.Fatalf("write %s: %v", path, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,22 +6,84 @@
|
|||||||
package embeddednats
|
package embeddednats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/tls"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/url"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
server "github.com/nats-io/nats-server/v2/server"
|
server "github.com/nats-io/nats-server/v2/server"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Start launches an embedded nats-server with JetStream enabled, listening on
|
// ClusterConfig configures the route layer that links several embedded NATS
|
||||||
// the given port and persisting JetStream state under storeDir. The listen host
|
// servers into a single cluster (issue 0003a). It is the data-plane side of
|
||||||
// is left at the nats-server default ("0.0.0.0", all interfaces). It blocks
|
// high availability: with a cluster, a client subject published on one node is
|
||||||
// until the server is ready to accept connections (up to 5s) and returns the
|
// forwarded to subscribers connected to any other node, and (with JetStream
|
||||||
// running server. The caller is responsible for calling Shutdown on it.
|
// replicas > 1) streams/KV are RAFT-replicated across nodes so the loss of one
|
||||||
|
// node does not lose the bus.
|
||||||
//
|
//
|
||||||
// Start is a thin backward-compatible wrapper over StartHost; callers that need
|
// The route layer is a SEPARATE trust boundary from the client data plane: it
|
||||||
// to control the bind interface (loopback vs LAN) should use StartHost directly.
|
// carries server-to-server traffic, so it authenticates NODES, not bus users.
|
||||||
|
// Never reuse the nkey client authenticator here. Routes are secured with their
|
||||||
|
// own shared secret (Username/Password -> NATS Cluster.Authorization) and their
|
||||||
|
// own mutual TLS (TLS, built from the bus CA with busauth.RouteTLSConfig): a
|
||||||
|
// node without the cluster secret and a CA-signed node certificate cannot join
|
||||||
|
// the cluster nor inject messages into it.
|
||||||
|
type ClusterConfig struct {
|
||||||
|
// Name is the cluster name; it MUST be identical on every node or the
|
||||||
|
// servers refuse to gossip routes to each other.
|
||||||
|
Name string
|
||||||
|
// Host and Port are the route listener (server-to-server), distinct from the
|
||||||
|
// client Host/Port. Use a free, non-client port (e.g. 6250).
|
||||||
|
Host string
|
||||||
|
Port int
|
||||||
|
// Routes are the nats-route URLs of the OTHER nodes, e.g.
|
||||||
|
// "nats://user:pass@10.0.0.2:6250". When the route layer is password
|
||||||
|
// protected each URL must carry the same userinfo as the local Username /
|
||||||
|
// Password so this node authenticates outbound to its peers.
|
||||||
|
Routes []string
|
||||||
|
// Username and Password gate the route listener (NATS Cluster.Authorization).
|
||||||
|
// A peer (or impostor) that connects to this node's route port without these
|
||||||
|
// credentials is rejected, so it never becomes a route. Empty disables route
|
||||||
|
// auth (dev / trusted-network only).
|
||||||
|
Username string
|
||||||
|
Password string
|
||||||
|
// TLS, when non-nil, secures the route connections with mutual TLS. Build it
|
||||||
|
// with busauth.RouteTLSConfig(cert, key, ca): the server presents its node
|
||||||
|
// certificate AND requires+verifies the connecting node's certificate against
|
||||||
|
// the bus CA, so an unsigned impostor cannot establish a route even with the
|
||||||
|
// right password. Nil keeps routes plaintext (dev / WireGuard-only).
|
||||||
|
TLS *tls.Config
|
||||||
|
}
|
||||||
|
|
||||||
|
// ServerConfig is the full set of knobs for the embedded NATS server. The zero
|
||||||
|
// value (empty StoreDir aside) yields a dev-friendly server: JetStream on, bound
|
||||||
|
// to all interfaces, no client auth, no TLS, standalone (no cluster). Secured
|
||||||
|
// deployments set Auth and TLS; HA deployments set ServerName + Cluster; tests
|
||||||
|
// set Host to loopback and a free Port.
|
||||||
|
type ServerConfig struct {
|
||||||
|
StoreDir string // JetStream store directory
|
||||||
|
Host string // bind interface; "" = nats-server default ("0.0.0.0")
|
||||||
|
Port int // listen port
|
||||||
|
// ServerName is this node's unique name within the cluster. JetStream's RAFT
|
||||||
|
// layer requires a stable, unique name per node to form its meta-group; leave
|
||||||
|
// it empty for a standalone server (nats-server then auto-generates one).
|
||||||
|
ServerName string
|
||||||
|
// Auth, when non-nil, is installed as CustomClientAuthentication so the data
|
||||||
|
// plane only accepts approved clients (nkey signature + bus allowlist).
|
||||||
|
Auth server.Authentication
|
||||||
|
// TLS, when non-nil, makes the server present a certificate and require TLS
|
||||||
|
// on the data plane. Clients must trust the issuing CA (see busauth).
|
||||||
|
TLS *tls.Config
|
||||||
|
// Cluster, when non-nil, joins this server to a route cluster for high
|
||||||
|
// availability (issue 0003a). Nil keeps the server standalone (the legacy
|
||||||
|
// single-node behavior).
|
||||||
|
Cluster *ClusterConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start is a thin backward-compatible wrapper: embedded JetStream server on the
|
||||||
|
// default interface, no auth, no TLS.
|
||||||
func Start(storeDir string, port int) (*server.Server, error) {
|
func Start(storeDir string, port int) (*server.Server, error) {
|
||||||
return StartHost(storeDir, "", port)
|
return StartServer(ServerConfig{StoreDir: storeDir, Port: port})
|
||||||
}
|
}
|
||||||
|
|
||||||
// StartHost is Start with explicit control over the bind interface. host selects
|
// StartHost is Start with explicit control over the bind interface. host selects
|
||||||
@@ -30,16 +92,49 @@ func Start(storeDir string, port int) (*server.Server, error) {
|
|||||||
// to expose it to the LAN so remote peers (phones, other PCs) can connect. An
|
// to expose it to the LAN so remote peers (phones, other PCs) can connect. An
|
||||||
// empty host falls back to the nats-server default ("0.0.0.0", all interfaces).
|
// empty host falls back to the nats-server default ("0.0.0.0", all interfaces).
|
||||||
func StartHost(storeDir, host string, port int) (*server.Server, error) {
|
func StartHost(storeDir, host string, port int) (*server.Server, error) {
|
||||||
|
return StartServer(ServerConfig{StoreDir: storeDir, Host: host, Port: port})
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartHostAuth is StartHost with an optional custom client authenticator. When
|
||||||
|
// auth is non-nil only clients the authenticator approves may connect; when nil
|
||||||
|
// the server accepts any client (legacy, network-trusted behavior).
|
||||||
|
func StartHostAuth(storeDir, host string, port int, auth server.Authentication) (*server.Server, error) {
|
||||||
|
return StartServer(ServerConfig{StoreDir: storeDir, Host: host, Port: port, Auth: auth})
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartServer launches an embedded nats-server with JetStream from cfg. It
|
||||||
|
// blocks until the server is ready to accept connections (up to 5s) and returns
|
||||||
|
// the running server; the caller must Shutdown it.
|
||||||
|
func StartServer(cfg ServerConfig) (*server.Server, error) {
|
||||||
opts := &server.Options{
|
opts := &server.Options{
|
||||||
JetStream: true,
|
JetStream: true,
|
||||||
StoreDir: storeDir,
|
StoreDir: cfg.StoreDir,
|
||||||
Host: host,
|
Host: cfg.Host,
|
||||||
Port: port,
|
Port: cfg.Port,
|
||||||
|
ServerName: cfg.ServerName,
|
||||||
DontListen: false,
|
DontListen: false,
|
||||||
// Keep the embedded server quiet by default; the host app logs the URLs.
|
// Keep the embedded server quiet by default; the host app logs the URLs.
|
||||||
NoLog: true,
|
NoLog: true,
|
||||||
NoSigs: true,
|
NoSigs: true,
|
||||||
}
|
}
|
||||||
|
if cfg.Auth != nil {
|
||||||
|
opts.CustomClientAuthentication = cfg.Auth
|
||||||
|
// A CustomClientAuthentication alone does not make the server advertise a
|
||||||
|
// nonce in its INFO line, and nats.go refuses to connect with an nkey to a
|
||||||
|
// server that does not ("nkeys not supported by the server"). Forcing the
|
||||||
|
// nonce makes nkey clients sign the challenge our authenticator verifies.
|
||||||
|
opts.AlwaysEnableNonce = true
|
||||||
|
}
|
||||||
|
if cfg.TLS != nil {
|
||||||
|
opts.TLSConfig = cfg.TLS
|
||||||
|
opts.TLS = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.Cluster != nil {
|
||||||
|
if err := applyClusterOpts(opts, cfg.Cluster); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ns, err := server.NewServer(opts)
|
ns, err := server.NewServer(opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -56,6 +151,34 @@ func StartHost(storeDir, host string, port int) (*server.Server, error) {
|
|||||||
return ns, nil
|
return ns, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// applyClusterOpts translates a ClusterConfig into the nats-server route options
|
||||||
|
// on opts: the cluster listener (name + host/port + shared-secret auth + mutual
|
||||||
|
// TLS) and the outbound routes to the other nodes. A malformed route URL is a
|
||||||
|
// configuration error and aborts startup rather than silently dropping a peer.
|
||||||
|
func applyClusterOpts(opts *server.Options, c *ClusterConfig) error {
|
||||||
|
opts.Cluster = server.ClusterOpts{
|
||||||
|
Name: c.Name,
|
||||||
|
Host: c.Host,
|
||||||
|
Port: c.Port,
|
||||||
|
Username: c.Username,
|
||||||
|
Password: c.Password,
|
||||||
|
}
|
||||||
|
if c.TLS != nil {
|
||||||
|
opts.Cluster.TLSConfig = c.TLS
|
||||||
|
// A generous handshake budget: route TLS does a mutual handshake and the
|
||||||
|
// peer may still be booting. The default 2s can flap on a cold cluster.
|
||||||
|
opts.Cluster.TLSTimeout = 5.0
|
||||||
|
}
|
||||||
|
for _, r := range c.Routes {
|
||||||
|
u, err := url.Parse(r)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("embeddednats: parse route %q: %w", r, err)
|
||||||
|
}
|
||||||
|
opts.Routes = append(opts.Routes, u)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// ClientURL returns a NATS connection URL for the running embedded server.
|
// ClientURL returns a NATS connection URL for the running embedded server.
|
||||||
func ClientURL(ns *server.Server) string {
|
func ClientURL(ns *server.Server) string {
|
||||||
return ns.ClientURL()
|
return ns.ClientURL()
|
||||||
|
|||||||
@@ -0,0 +1,118 @@
|
|||||||
|
package membership
|
||||||
|
|
||||||
|
// Per-subject data-plane access control derived from room membership (issue
|
||||||
|
// 0003e, audit H4 residual; tightened in issue 0006b for audit 0008 N2). The
|
||||||
|
// control plane already authorizes metadata by membership; this is the matching
|
||||||
|
// restriction on the NATS data plane so a registered peer can only
|
||||||
|
// publish/subscribe on the subjects of the rooms it actually belongs to — and can
|
||||||
|
// only reach the JetStream API of ITS OWN rooms' streams, never the control-plane
|
||||||
|
// KV buckets.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
)
|
||||||
|
|
||||||
|
// clientInfraSubjects are the subjects every authorized peer needs regardless of
|
||||||
|
// room membership, kept deliberately MINIMAL (issue 0006b, audit 0008 N2):
|
||||||
|
//
|
||||||
|
// - "_INBOX.>" — request/reply plus the JetStream pull-consumer delivery
|
||||||
|
// and publish-ack inboxes.
|
||||||
|
// - "$JS.API.INFO" — account-level JetStream info (limits/usage counters). It
|
||||||
|
// exposes NO room/user/key contents, so granting it leaks nothing.
|
||||||
|
//
|
||||||
|
// It NO LONGER contains "$JS.API.>". That broad grant was the N2 leak: it let any
|
||||||
|
// registered peer drive the whole JetStream API and read the control-plane KV
|
||||||
|
// buckets (KV_UNIBUS_users/rooms/members/room_keys) and the object store directly
|
||||||
|
// over NATS, bypassing the HTTP authorization (requireMember and the own-endpoint
|
||||||
|
// checks). JetStream API access is now granted PER ROOM, scoped to the stream of
|
||||||
|
// each room the peer belongs to (jsSubjectsFor). Because the control-plane KV
|
||||||
|
// streams (KV_UNIBUS_*) and the object store (OBJ_UNIBUS_*) are never a room
|
||||||
|
// stream, they fall outside the closed allow set and are denied by default.
|
||||||
|
var clientInfraSubjects = []string{"_INBOX.>", "$JS.API.INFO"}
|
||||||
|
|
||||||
|
// roomStreamName is the JetStream stream name a persisted room maps to. It MUST
|
||||||
|
// stay identical to pkg/client.streamName ("UNIBUS_" + sanitized roomID) so the
|
||||||
|
// per-room ACL grants exactly the subjects the client's JetStream calls use. Room
|
||||||
|
// ids are ULIDs (no '.'), so the sanitizing is a no-op in practice, but the rule
|
||||||
|
// is replicated defensively so the producer (client) and the authorizer (this
|
||||||
|
// ACL) never drift apart.
|
||||||
|
func roomStreamName(roomID string) string {
|
||||||
|
var b strings.Builder
|
||||||
|
b.Grow(len("UNIBUS_") + len(roomID))
|
||||||
|
b.WriteString("UNIBUS_")
|
||||||
|
for _, r := range roomID {
|
||||||
|
switch {
|
||||||
|
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '_':
|
||||||
|
b.WriteRune(r)
|
||||||
|
default:
|
||||||
|
b.WriteRune('_')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// jsSubjectsFor returns the MINIMAL JetStream API subjects a peer needs to use the
|
||||||
|
// durable stream of ONE persisted room: create/update/info the stream, manage and
|
||||||
|
// pull from its durable consumer, and ack deliveries. Every subject embeds this
|
||||||
|
// room's stream name, so the grant cannot reach another room's stream nor any
|
||||||
|
// control-plane stream (KV_UNIBUS_* / OBJ_UNIBUS_*). The wildcard layout matches
|
||||||
|
// the NATS JetStream API subject grammar (the stream name is the trailing token
|
||||||
|
// of single-verb requests and follows a two-token verb for MSG.GET / MSG.NEXT /
|
||||||
|
// DURABLE.CREATE):
|
||||||
|
//
|
||||||
|
// $JS.API.STREAM.<verb>.<stream> verb in {CREATE,UPDATE,INFO,DELETE,PURGE,...}
|
||||||
|
// $JS.API.STREAM.MSG.<op>.<stream> op in {GET,DELETE}
|
||||||
|
// $JS.API.CONSUMER.<verb>.<stream> verb in {LIST,NAMES,CREATE(ephemeral)}
|
||||||
|
// $JS.API.CONSUMER.<verb>.<stream>.<consumer>... verb in {CREATE,INFO,DELETE}
|
||||||
|
// $JS.API.CONSUMER.<v1>.<v2>.<stream>.<cons> {MSG.NEXT, DURABLE.CREATE}
|
||||||
|
// $JS.ACK.<stream>.> message acknowledgements
|
||||||
|
func jsSubjectsFor(roomID string) []string {
|
||||||
|
s := roomStreamName(roomID)
|
||||||
|
return []string{
|
||||||
|
"$JS.API.STREAM.*." + s,
|
||||||
|
"$JS.API.STREAM.*.*." + s,
|
||||||
|
"$JS.API.CONSUMER.*." + s,
|
||||||
|
"$JS.API.CONSUMER.*." + s + ".>",
|
||||||
|
"$JS.API.CONSUMER.*.*." + s + ".>",
|
||||||
|
"$JS.ACK." + s + ".>",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SubjectACLFor returns a function that maps a signing public key (lowercase hex)
|
||||||
|
// to the data-plane subjects that identity may publish and subscribe to: the
|
||||||
|
// subject of every room it belongs to, the per-room JetStream API subjects of
|
||||||
|
// those rooms (so persisted-room history keeps working), plus the minimal client
|
||||||
|
// infrastructure subjects. It reads the live membership store, so the permissions
|
||||||
|
// reflect the identity's rooms at the moment it connects. A decode error or a
|
||||||
|
// store failure is returned as an error so the caller can fail closed (deny the
|
||||||
|
// connection) rather than grant open access.
|
||||||
|
//
|
||||||
|
// Because NATS freezes permissions at connect time, a peer invited to a new room
|
||||||
|
// after connecting must reconnect (client.RefreshSession) to pick up the new
|
||||||
|
// room's subject. The bus is the authoritative directory of subjects, so an
|
||||||
|
// unlisted subject is simply absent from the allow set.
|
||||||
|
func SubjectACLFor(store Store) func(signPubHex string) ([]string, error) {
|
||||||
|
return func(signPubHex string) ([]string, error) {
|
||||||
|
pub, err := hex.DecodeString(signPubHex)
|
||||||
|
if err != nil || len(pub) != 32 {
|
||||||
|
return nil, fmt.Errorf("acl: malformed sign pub %q", signPubHex)
|
||||||
|
}
|
||||||
|
endpoint := frame.EndpointID(pub)
|
||||||
|
rooms, err := store.ListRoomsForEndpoint(endpoint)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("acl: list rooms for %s: %w", endpoint, err)
|
||||||
|
}
|
||||||
|
// clientInfra + per room: the room subject + that room's JetStream API.
|
||||||
|
subjects := make([]string, 0, len(clientInfraSubjects)+len(rooms)*7)
|
||||||
|
subjects = append(subjects, clientInfraSubjects...)
|
||||||
|
for _, r := range rooms {
|
||||||
|
subjects = append(subjects, r.Subject)
|
||||||
|
subjects = append(subjects, jsSubjectsFor(r.RoomID)...)
|
||||||
|
}
|
||||||
|
return subjects, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,379 @@
|
|||||||
|
package membership_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"net"
|
||||||
|
"net/http/httptest"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
"github.com/enmanuel/unibus/pkg/busauth"
|
||||||
|
"github.com/enmanuel/unibus/pkg/client"
|
||||||
|
"github.com/enmanuel/unibus/pkg/embeddednats"
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
"github.com/enmanuel/unibus/pkg/membership"
|
||||||
|
"github.com/nats-io/nats.go"
|
||||||
|
server "github.com/nats-io/nats-server/v2/server"
|
||||||
|
)
|
||||||
|
|
||||||
|
func aclFreePort(t *testing.T) int {
|
||||||
|
t.Helper()
|
||||||
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("free port: %v", err)
|
||||||
|
}
|
||||||
|
defer l.Close()
|
||||||
|
return l.Addr().(*net.TCPAddr).Port
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustID(t *testing.T) cs.Identity {
|
||||||
|
t.Helper()
|
||||||
|
id, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("identity: %v", err)
|
||||||
|
}
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
|
||||||
|
// aclPermsFunc builds the per-subject PermissionsFunc the ACL authenticator
|
||||||
|
// expects. It delegates to the SAME production wiring membershipd uses
|
||||||
|
// (busauth.PermissionsFromSubjects over membership.SubjectACLFor), so this test
|
||||||
|
// exercises the real path rather than a test-only reimplementation.
|
||||||
|
func aclPermsFunc(store membership.Store) busauth.PermissionsFunc {
|
||||||
|
return busauth.PermissionsFromSubjects(membership.SubjectACLFor(store))
|
||||||
|
}
|
||||||
|
|
||||||
|
// startACLNats boots an embedded NATS whose authenticator confines each peer to
|
||||||
|
// the subjects of the rooms it belongs to (audit H4 residual).
|
||||||
|
func startACLNats(t *testing.T, store membership.Store) *server.Server {
|
||||||
|
t.Helper()
|
||||||
|
auth := busauth.NewNkeyAuthenticatorACL(store.IsAuthorized, aclPermsFunc(store))
|
||||||
|
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
|
||||||
|
StoreDir: t.TempDir(), Host: "127.0.0.1", Port: aclFreePort(t), Auth: auth,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("acl nats: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
|
||||||
|
return ns
|
||||||
|
}
|
||||||
|
|
||||||
|
func nkeyConn(t *testing.T, natsURL string, id cs.Identity, errCh chan error) *nats.Conn {
|
||||||
|
t.Helper()
|
||||||
|
pub, sign, err := busauth.ClientNkey(id.SignPriv)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("nkey: %v", err)
|
||||||
|
}
|
||||||
|
nc, err := nats.Connect(natsURL,
|
||||||
|
nats.Nkey(pub, sign),
|
||||||
|
nats.ErrorHandler(func(_ *nats.Conn, _ *nats.Subscription, e error) {
|
||||||
|
select {
|
||||||
|
case errCh <- e:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect nkey: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(nc.Close)
|
||||||
|
return nc
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustAddUser(t *testing.T, store membership.Store, id cs.Identity, handle string) {
|
||||||
|
t.Helper()
|
||||||
|
if err := store.AddUser(hex.EncodeToString(id.SignPub), handle, membership.RoleMember); err != nil {
|
||||||
|
t.Fatalf("add user %s: %v", handle, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustCreateRoom(t *testing.T, store membership.Store, roomID, subject, ownerEP string, owner cs.Identity) {
|
||||||
|
t.Helper()
|
||||||
|
info := membership.RoomInfo{RoomID: roomID, Subject: subject, OwnerEndpoint: ownerEP}
|
||||||
|
if err := store.CreateRoom(info, owner.SignPub, owner.KexPub, nil); err != nil {
|
||||||
|
t.Fatalf("create room %s: %v", roomID, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newCtrl(t *testing.T, store membership.Store, blobs blobstore.Store) string {
|
||||||
|
t.Helper()
|
||||||
|
ts := httptest.NewServer(membership.NewServer(store, blobs, membership.AuthOff))
|
||||||
|
t.Cleanup(ts.Close)
|
||||||
|
return ts.URL
|
||||||
|
}
|
||||||
|
|
||||||
|
func waitErr(ch chan error, d time.Duration) error {
|
||||||
|
select {
|
||||||
|
case e := <-ch:
|
||||||
|
return e
|
||||||
|
case <-time.After(d):
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func drain(ch chan error) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ch:
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSubjectACLIsolation closes the audit H4 residual: a registered peer is
|
||||||
|
// confined to the subjects of the rooms it belongs to. alice (member of room.A)
|
||||||
|
// may sub/pub room.A but is DENIED sub/pub on room.B, and never reads what bob
|
||||||
|
// (member of room.B) publishes there.
|
||||||
|
func TestSubjectACLIsolation(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
|
||||||
|
alice, bob := mustID(t), mustID(t)
|
||||||
|
aliceEP, bobEP := frame.EndpointID(alice.SignPub), frame.EndpointID(bob.SignPub)
|
||||||
|
mustAddUser(t, store, alice, "alice")
|
||||||
|
mustAddUser(t, store, bob, "bob")
|
||||||
|
const subjA, subjB = "room.acl.a", "room.acl.b"
|
||||||
|
mustCreateRoom(t, store, "ROOMA", subjA, aliceEP, alice)
|
||||||
|
mustCreateRoom(t, store, "ROOMB", subjB, bobEP, bob)
|
||||||
|
|
||||||
|
srv := startACLNats(t, store)
|
||||||
|
url := srv.ClientURL()
|
||||||
|
aliceErr := make(chan error, 4)
|
||||||
|
bobErr := make(chan error, 4)
|
||||||
|
aliceNC := nkeyConn(t, url, alice, aliceErr)
|
||||||
|
bobNC := nkeyConn(t, url, bob, bobErr)
|
||||||
|
|
||||||
|
// alice may subscribe to her own room (no error).
|
||||||
|
aliceGot := make(chan string, 4)
|
||||||
|
if _, err := aliceNC.Subscribe(subjA, func(m *nats.Msg) { aliceGot <- string(m.Data) }); err != nil {
|
||||||
|
t.Fatalf("alice sub A: %v", err)
|
||||||
|
}
|
||||||
|
_ = aliceNC.Flush()
|
||||||
|
if e := waitErr(aliceErr, 300*time.Millisecond); e != nil {
|
||||||
|
t.Fatalf("alice sub to her OWN room raised an error: %v", e)
|
||||||
|
}
|
||||||
|
|
||||||
|
// alice subscribing to bob's room is a permissions violation.
|
||||||
|
if _, err := aliceNC.Subscribe(subjB, func(m *nats.Msg) { aliceGot <- "LEAK:" + string(m.Data) }); err != nil {
|
||||||
|
t.Fatalf("alice sub B (queue): %v", err)
|
||||||
|
}
|
||||||
|
_ = aliceNC.Flush()
|
||||||
|
if e := waitErr(aliceErr, 1*time.Second); e == nil {
|
||||||
|
t.Fatalf("alice subscribing to bob's room should raise a permissions violation")
|
||||||
|
}
|
||||||
|
|
||||||
|
// bob publishes in his room; alice (denied) must not receive it.
|
||||||
|
bobGot := make(chan string, 4)
|
||||||
|
if _, err := bobNC.Subscribe(subjB, func(m *nats.Msg) { bobGot <- string(m.Data) }); err != nil {
|
||||||
|
t.Fatalf("bob sub B: %v", err)
|
||||||
|
}
|
||||||
|
_ = bobNC.Flush()
|
||||||
|
if err := bobNC.Publish(subjB, []byte("internal-bob")); err != nil {
|
||||||
|
t.Fatalf("bob pub B: %v", err)
|
||||||
|
}
|
||||||
|
_ = bobNC.Flush()
|
||||||
|
select {
|
||||||
|
case got := <-bobGot:
|
||||||
|
if got != "internal-bob" {
|
||||||
|
t.Fatalf("bob got %q", got)
|
||||||
|
}
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatalf("bob did not receive his own message")
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case leak := <-aliceGot:
|
||||||
|
t.Fatalf("alice received bob's room traffic despite the ACL: %q", leak)
|
||||||
|
case <-time.After(500 * time.Millisecond):
|
||||||
|
// good: alice never got it
|
||||||
|
}
|
||||||
|
|
||||||
|
// alice publishing into bob's room is denied; bob must not receive it.
|
||||||
|
drain(aliceErr)
|
||||||
|
if err := aliceNC.Publish(subjB, []byte("intruder")); err != nil {
|
||||||
|
t.Fatalf("alice pub B (queue): %v", err)
|
||||||
|
}
|
||||||
|
_ = aliceNC.Flush()
|
||||||
|
if e := waitErr(aliceErr, 1*time.Second); e == nil {
|
||||||
|
t.Fatalf("alice publishing into bob's room should raise a permissions violation")
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case got := <-bobGot:
|
||||||
|
t.Fatalf("bob received alice's cross-room publish despite the ACL: %q", got)
|
||||||
|
case <-time.After(500 * time.Millisecond):
|
||||||
|
// good
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestReaudit_H4_WildcardMetadataLeak ports the re-auditor's H4 vector. Before
|
||||||
|
// the per-subject ACL was WIRED into membershipd (it existed in pkg/membership and
|
||||||
|
// pkg/busauth but the binary used the plain NewNkeyAuthenticator), a registered
|
||||||
|
// NON-member could open a raw NATS connection, Subscribe(">"), and capture every
|
||||||
|
// room's subject plus JetStream stream/advisory activity — the payload stayed E2E
|
||||||
|
// ciphertext, but the metadata leaked. With NewNkeyAuthenticatorACL wired via the
|
||||||
|
// production path (busauth.PermissionsFromSubjects(membership.SubjectACLFor)), a
|
||||||
|
// non-member is confined to the client-infra subjects, so the wildcard and any
|
||||||
|
// foreign room subject are denied.
|
||||||
|
//
|
||||||
|
// Coverage:
|
||||||
|
// - error : a non-member's Subscribe(">") raises a permission violation;
|
||||||
|
// - edge : a non-member subscribing to another room's exact subject is denied;
|
||||||
|
// - golden: the member still pub/subs her own room, and the non-member never
|
||||||
|
// captures that traffic.
|
||||||
|
//
|
||||||
|
// Residual now CLOSED (issue 0006b, audit 0008 N2): the client-infra grant no
|
||||||
|
// longer includes "$JS.API.>". JetStream API access is granted per-room only
|
||||||
|
// (membership.jsSubjectsFor), so a peer can reach the API of its OWN rooms'
|
||||||
|
// streams but not the control-plane KV buckets (KV_UNIBUS_*) nor another room's
|
||||||
|
// stream. See TestAttack0008_N2 for the closed-leak regression.
|
||||||
|
func TestReaudit_H4_WildcardMetadataLeak(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
|
||||||
|
alice, eve := mustID(t), mustID(t)
|
||||||
|
aliceEP := frame.EndpointID(alice.SignPub)
|
||||||
|
mustAddUser(t, store, alice, "alice")
|
||||||
|
mustAddUser(t, store, eve, "eve") // eve is REGISTERED but never a member of alice's room
|
||||||
|
const subject = "room.e2e.confidential"
|
||||||
|
mustCreateRoom(t, store, "ROOMA", subject, aliceEP, alice)
|
||||||
|
|
||||||
|
srv := startACLNats(t, store)
|
||||||
|
url := srv.ClientURL()
|
||||||
|
|
||||||
|
eveErr := make(chan error, 8)
|
||||||
|
eveNC := nkeyConn(t, url, eve, eveErr)
|
||||||
|
eveAll := make(chan *nats.Msg, 16)
|
||||||
|
|
||||||
|
// Error: eve's wildcard subscription is rejected. nats.go creates the local sub
|
||||||
|
// object and the server rejects it asynchronously (delivered to ErrorHandler).
|
||||||
|
if _, err := eveNC.Subscribe(">", func(m *nats.Msg) { eveAll <- m }); err != nil {
|
||||||
|
t.Fatalf("eve sub >: %v", err)
|
||||||
|
}
|
||||||
|
_ = eveNC.Flush()
|
||||||
|
if e := waitErr(eveErr, 1*time.Second); e == nil {
|
||||||
|
t.Fatalf("a non-member's Subscribe(\">\") must raise a permissions violation (wildcard metadata leak still open)")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Edge: eve subscribing to the foreign room's EXACT subject is also denied.
|
||||||
|
drain(eveErr)
|
||||||
|
if _, err := eveNC.Subscribe(subject, func(m *nats.Msg) { eveAll <- m }); err != nil {
|
||||||
|
t.Fatalf("eve sub subject: %v", err)
|
||||||
|
}
|
||||||
|
_ = eveNC.Flush()
|
||||||
|
if e := waitErr(eveErr, 1*time.Second); e == nil {
|
||||||
|
t.Fatalf("a non-member subscribing to another room's subject must be denied")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden: alice (the member) pub/subs her own room with no violation, and eve
|
||||||
|
// never captured the traffic despite her (rejected) wildcard.
|
||||||
|
aliceErr := make(chan error, 4)
|
||||||
|
aliceNC := nkeyConn(t, url, alice, aliceErr)
|
||||||
|
aliceGot := make(chan string, 4)
|
||||||
|
if _, err := aliceNC.Subscribe(subject, func(m *nats.Msg) { aliceGot <- string(m.Data) }); err != nil {
|
||||||
|
t.Fatalf("alice sub own room: %v", err)
|
||||||
|
}
|
||||||
|
_ = aliceNC.Flush()
|
||||||
|
if e := waitErr(aliceErr, 300*time.Millisecond); e != nil {
|
||||||
|
t.Fatalf("alice subscribing to her OWN room raised an error: %v", e)
|
||||||
|
}
|
||||||
|
if err := aliceNC.Publish(subject, []byte("members-only metadata")); err != nil {
|
||||||
|
t.Fatalf("alice publish: %v", err)
|
||||||
|
}
|
||||||
|
_ = aliceNC.Flush()
|
||||||
|
select {
|
||||||
|
case got := <-aliceGot:
|
||||||
|
if got != "members-only metadata" {
|
||||||
|
t.Fatalf("alice got %q", got)
|
||||||
|
}
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatalf("alice did not receive her own room's message")
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case m := <-eveAll:
|
||||||
|
t.Fatalf("eve captured room traffic despite the ACL: subject=%q data=%q", m.Subject, m.Data)
|
||||||
|
case <-time.After(500 * time.Millisecond):
|
||||||
|
// good: eve captured nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRefreshSessionGainsNewRoom is the "permissions refreshed on join" path:
|
||||||
|
// alice is not in room B, so her connection has no permission for its subject;
|
||||||
|
// after she is added to room B and calls RefreshSession, the reconnect
|
||||||
|
// re-derives her permissions and she gains the room's subject.
|
||||||
|
func TestRefreshSessionGainsNewRoom(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := membership.Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
|
||||||
|
alice, bob := mustID(t), mustID(t)
|
||||||
|
aliceEP, bobEP := frame.EndpointID(alice.SignPub), frame.EndpointID(bob.SignPub)
|
||||||
|
mustAddUser(t, store, alice, "alice")
|
||||||
|
mustAddUser(t, store, bob, "bob")
|
||||||
|
const subjB = "room.refresh.b"
|
||||||
|
mustCreateRoom(t, store, "ROOMB", subjB, bobEP, bob)
|
||||||
|
|
||||||
|
srv := startACLNats(t, store)
|
||||||
|
blobs, _ := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
|
ctrl := newCtrl(t, store, blobs)
|
||||||
|
|
||||||
|
aliceC, err := client.NewWithOptions(srv.ClientURL(), ctrl, alice, client.Options{UseNkey: true})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("connect alice: %v", err)
|
||||||
|
}
|
||||||
|
defer aliceC.Close()
|
||||||
|
|
||||||
|
// Add alice to room B (as if invited), then RefreshSession so the
|
||||||
|
// authenticator re-derives her permissions on reconnect.
|
||||||
|
if _, err := store.GetMember("ROOMB", aliceEP); err == nil {
|
||||||
|
t.Fatalf("alice should not be a member yet")
|
||||||
|
}
|
||||||
|
if err := store.AddMember("ROOMB", membership.Member{Endpoint: aliceEP, Role: "member", SignPub: alice.SignPub, KexPub: alice.KexPub}, 1, nil); err != nil {
|
||||||
|
t.Fatalf("add alice to room B: %v", err)
|
||||||
|
}
|
||||||
|
if err := aliceC.RefreshSession(); err != nil {
|
||||||
|
t.Fatalf("refresh session: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
bobErr := make(chan error, 2)
|
||||||
|
bobNC := nkeyConn(t, srv.ClientURL(), bob, bobErr)
|
||||||
|
|
||||||
|
got := make(chan string, 2)
|
||||||
|
sub, err := aliceC.Subscribe("ROOMB", func(_ frame.Frame, plaintext []byte) { got <- string(plaintext) })
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("alice subscribe room B after refresh: %v", err)
|
||||||
|
}
|
||||||
|
defer sub.Unsubscribe()
|
||||||
|
time.Sleep(200 * time.Millisecond)
|
||||||
|
|
||||||
|
// bob publishes a minimal cleartext frame on subjB.
|
||||||
|
f := frame.Frame{Type: frame.PUB, Subject: subjB, Sender: bobEP, MsgID: "m1", Payload: []byte("hello-after-join")}
|
||||||
|
b, _ := f.Marshal()
|
||||||
|
if err := bobNC.Publish(subjB, b); err != nil {
|
||||||
|
t.Fatalf("bob publish: %v", err)
|
||||||
|
}
|
||||||
|
_ = bobNC.Flush()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case msg := <-got:
|
||||||
|
if msg != "hello-after-join" {
|
||||||
|
t.Fatalf("alice got %q", msg)
|
||||||
|
}
|
||||||
|
case <-time.After(3 * time.Second):
|
||||||
|
t.Fatalf("alice did not receive room B traffic after RefreshSession (permissions not refreshed)")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,241 @@
|
|||||||
|
package membership
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AuthMode is the control-plane authentication rollout state (feature flag
|
||||||
|
// bus-auth). It governs how the HTTP middleware treats a request whose signature
|
||||||
|
// is missing, invalid, replayed, skewed, or from an unregistered identity.
|
||||||
|
//
|
||||||
|
// AuthOff — do not verify anything (legacy behavior; default).
|
||||||
|
// AuthSoft — verify and LOG rejections, but let the request through. Lets
|
||||||
|
// clients migrate to signing without an outage.
|
||||||
|
// AuthEnforce — reject unauthenticated requests with 401.
|
||||||
|
type AuthMode int
|
||||||
|
|
||||||
|
const (
|
||||||
|
AuthOff AuthMode = iota
|
||||||
|
AuthSoft
|
||||||
|
AuthEnforce
|
||||||
|
)
|
||||||
|
|
||||||
|
func (m AuthMode) String() string {
|
||||||
|
switch m {
|
||||||
|
case AuthOff:
|
||||||
|
return "off"
|
||||||
|
case AuthSoft:
|
||||||
|
return "soft"
|
||||||
|
case AuthEnforce:
|
||||||
|
return "enforce"
|
||||||
|
default:
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseAuthMode maps the bus-auth flag string to an AuthMode.
|
||||||
|
func ParseAuthMode(s string) (AuthMode, error) {
|
||||||
|
switch s {
|
||||||
|
case "off", "":
|
||||||
|
return AuthOff, nil
|
||||||
|
case "soft":
|
||||||
|
return AuthSoft, nil
|
||||||
|
case "enforce":
|
||||||
|
return AuthEnforce, nil
|
||||||
|
default:
|
||||||
|
return AuthOff, fmt.Errorf("membership: invalid bus-auth mode %q (want off|soft|enforce)", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Control-plane signature headers. The client signs the canonical bytes of the
|
||||||
|
// request and presents these; the server reconstructs the canonical bytes and
|
||||||
|
// verifies. See canonicalRequest for the exact byte layout.
|
||||||
|
const (
|
||||||
|
hdrPub = "X-Unibus-Pub" // signer Ed25519 public key, lowercase hex
|
||||||
|
hdrTs = "X-Unibus-Ts" // unix seconds (string)
|
||||||
|
hdrNonce = "X-Unibus-Nonce" // 16 random bytes, std base64
|
||||||
|
hdrSig = "X-Unibus-Sig" // Ed25519 signature over canonical, std base64
|
||||||
|
)
|
||||||
|
|
||||||
|
// Anti-replay parameters. A request is accepted only if its timestamp is within
|
||||||
|
// clockSkew of now; nonces are remembered for nonceTTL so a captured request
|
||||||
|
// cannot be replayed inside its acceptance window. nonceTTL must be >= the full
|
||||||
|
// acceptance window (2*clockSkew) so a replay can never outlive its memory.
|
||||||
|
const (
|
||||||
|
clockSkew = 30 * time.Second
|
||||||
|
nonceTTL = 60 * time.Second
|
||||||
|
// maxNonceCacheEntries bounds the replay cache so it cannot grow without limit
|
||||||
|
// (audit H7). With IsAuthorized now gating insertion, only authorized traffic
|
||||||
|
// is cached, so this ceiling is only approached under a legitimate burst; at
|
||||||
|
// the cap the oldest nonce is evicted (its TTL is nearly up anyway).
|
||||||
|
maxNonceCacheEntries = 100_000
|
||||||
|
)
|
||||||
|
|
||||||
|
// CanonicalRequest returns the exact bytes that are signed and verified for a
|
||||||
|
// control-plane request:
|
||||||
|
//
|
||||||
|
// method "\n" path "\n" ts "\n" nonce "\n" hex(sha256(body))
|
||||||
|
//
|
||||||
|
// path is the request URI (path plus raw query) so query parameters (endpoint,
|
||||||
|
// epoch) are covered by the signature. It is exported so the client library and
|
||||||
|
// tests sign with the identical construction — the one place this format lives.
|
||||||
|
func CanonicalRequest(method, path, ts, nonce string, body []byte) []byte {
|
||||||
|
sum := sha256.Sum256(body)
|
||||||
|
return []byte(method + "\n" + path + "\n" + ts + "\n" + nonce + "\n" + hex.EncodeToString(sum[:]))
|
||||||
|
}
|
||||||
|
|
||||||
|
// nonceStore is the anti-replay backend: rememberOrReject records a nonce and
|
||||||
|
// reports whether it was unseen (true -> accept) or already seen (false ->
|
||||||
|
// reject the replay). It is an interface (issue 0003e) so the single-node
|
||||||
|
// in-memory cache can be swapped for a replicated KV store: a per-process cache
|
||||||
|
// is BROKEN under multi-node failover (a request captured and replayed to a
|
||||||
|
// DIFFERENT node whose cache never saw the nonce would be accepted), so a
|
||||||
|
// cluster MUST share the nonce state. Every implementation fails CLOSED — a
|
||||||
|
// backend it cannot reach rejects rather than admits.
|
||||||
|
type nonceStore interface {
|
||||||
|
rememberOrReject(nonce string, now time.Time) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// memNonceCache remembers recently-seen nonces to reject replays. It is an
|
||||||
|
// in-memory store guarded by a mutex — sufficient for a SINGLE membershipd
|
||||||
|
// process. A clustered deployment uses kvNonceStore instead (issue 0003e).
|
||||||
|
//
|
||||||
|
// Pruning is O(expired), not O(n): because the TTL is constant, insertion order
|
||||||
|
// equals expiry order, so the oldest entries (front of `order`) are exactly the
|
||||||
|
// ones that expire first (audit H7 — the previous full-map scan under the mutex
|
||||||
|
// was a CPU-amplification vector). A size cap bounds memory.
|
||||||
|
type memNonceCache struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
seen map[string]time.Time // nonce -> expiry
|
||||||
|
order []string // nonces in insertion order == expiry order
|
||||||
|
ttl time.Duration
|
||||||
|
cap int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newMemNonceCache(ttl time.Duration, capacity int) *memNonceCache {
|
||||||
|
return &memNonceCache{seen: make(map[string]time.Time), ttl: ttl, cap: capacity}
|
||||||
|
}
|
||||||
|
|
||||||
|
// rememberOrReject records nonce and returns true if it was unseen, or false if
|
||||||
|
// it is a replay (still live in the cache).
|
||||||
|
func (n *memNonceCache) rememberOrReject(nonce string, now time.Time) bool {
|
||||||
|
n.mu.Lock()
|
||||||
|
defer n.mu.Unlock()
|
||||||
|
|
||||||
|
// Prune expired entries from the front (oldest first). The first live entry
|
||||||
|
// ends the scan — everything behind it was inserted later and is newer.
|
||||||
|
cut := 0
|
||||||
|
for cut < len(n.order) {
|
||||||
|
exp, ok := n.seen[n.order[cut]]
|
||||||
|
if !ok {
|
||||||
|
cut++ // already evicted by the cap path below
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !exp.Before(now) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
delete(n.seen, n.order[cut])
|
||||||
|
cut++
|
||||||
|
}
|
||||||
|
if cut > 0 {
|
||||||
|
n.order = append(n.order[:0], n.order[cut:]...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if exp, ok := n.seen[nonce]; ok && !exp.Before(now) {
|
||||||
|
return false // a live replay
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bound memory: at capacity, evict the oldest entry (its TTL is nearly up).
|
||||||
|
for len(n.seen) >= n.cap && len(n.order) > 0 {
|
||||||
|
oldest := n.order[0]
|
||||||
|
n.order = n.order[1:]
|
||||||
|
delete(n.seen, oldest)
|
||||||
|
}
|
||||||
|
|
||||||
|
n.seen[nonce] = now.Add(n.ttl)
|
||||||
|
n.order = append(n.order, nonce)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// authResult is what a successful authentication yields: the verified signing
|
||||||
|
// key (hex), the endpoint id derived from it, and the authorized user record.
|
||||||
|
// Handlers use endpoint for membership authorization (only a member of a room
|
||||||
|
// may read its metadata/keys); user is available for role checks.
|
||||||
|
type authResult struct {
|
||||||
|
pubHex string
|
||||||
|
endpoint string
|
||||||
|
user User
|
||||||
|
}
|
||||||
|
|
||||||
|
// authenticate verifies the signature headers on r against body and the user
|
||||||
|
// allowlist. It returns an error describing the first failing check; the
|
||||||
|
// middleware decides whether that error blocks (enforce) or only logs (soft).
|
||||||
|
//
|
||||||
|
// Order matters: cheap, non-cryptographic checks (header presence, key shape,
|
||||||
|
// clock skew) run first; the Ed25519 verification runs before the replay cache
|
||||||
|
// is touched so an attacker cannot poison the cache with unsigned nonces; the
|
||||||
|
// allowlist lookup runs last.
|
||||||
|
func (s *Server) authenticate(r *http.Request, body []byte, now time.Time) (authResult, error) {
|
||||||
|
pubHex := r.Header.Get(hdrPub)
|
||||||
|
ts := r.Header.Get(hdrTs)
|
||||||
|
nonce := r.Header.Get(hdrNonce)
|
||||||
|
sigB64 := r.Header.Get(hdrSig)
|
||||||
|
if pubHex == "" || ts == "" || nonce == "" || sigB64 == "" {
|
||||||
|
return authResult{}, fmt.Errorf("missing auth headers")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub, err := hex.DecodeString(pubHex)
|
||||||
|
if err != nil || len(pub) != 32 {
|
||||||
|
return authResult{}, fmt.Errorf("malformed %s (want 32-byte Ed25519 hex)", hdrPub)
|
||||||
|
}
|
||||||
|
|
||||||
|
tsInt, err := strconv.ParseInt(ts, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return authResult{}, fmt.Errorf("malformed %s", hdrTs)
|
||||||
|
}
|
||||||
|
if d := now.Unix() - tsInt; d > int64(clockSkew/time.Second) || d < -int64(clockSkew/time.Second) {
|
||||||
|
return authResult{}, fmt.Errorf("timestamp out of range (skew %ds)", d)
|
||||||
|
}
|
||||||
|
|
||||||
|
sig, err := base64.StdEncoding.DecodeString(sigB64)
|
||||||
|
if err != nil {
|
||||||
|
return authResult{}, fmt.Errorf("malformed %s", hdrSig)
|
||||||
|
}
|
||||||
|
|
||||||
|
canonical := CanonicalRequest(r.Method, r.URL.RequestURI(), ts, nonce, body)
|
||||||
|
if !cs.VerifyEd25519(pub, canonical, sig) {
|
||||||
|
return authResult{}, fmt.Errorf("invalid signature")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Authorize BEFORE touching the replay cache (audit H7): an unregistered
|
||||||
|
// identity can mint valid signatures for free, so caching its nonces would let
|
||||||
|
// it poison/grow the cache pre-auth. Only authorized identities are remembered.
|
||||||
|
if !s.store.IsAuthorized(pubHex) {
|
||||||
|
return authResult{}, fmt.Errorf("identity not authorized")
|
||||||
|
}
|
||||||
|
|
||||||
|
user, err := s.store.GetUser(pubHex)
|
||||||
|
if err != nil {
|
||||||
|
// IsAuthorized passed but the row vanished (race with revoke): fail closed.
|
||||||
|
return authResult{}, fmt.Errorf("identity not authorized")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Anti-replay last: a replayed request from an authorized identity is still
|
||||||
|
// rejected here (the nonce is already live in the cache from its first use).
|
||||||
|
if !s.nonces.rememberOrReject(nonce, now) {
|
||||||
|
return authResult{}, fmt.Errorf("replayed nonce")
|
||||||
|
}
|
||||||
|
|
||||||
|
return authResult{pubHex: pubHex, endpoint: frame.EndpointID(pub), user: user}, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,206 @@
|
|||||||
|
package membership
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/hex"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
)
|
||||||
|
|
||||||
|
// authHarness boots an in-process membershipd HTTP server in the given auth mode
|
||||||
|
// with a fresh store + blob store, and seeds one active admin ("alice").
|
||||||
|
type authHarness struct {
|
||||||
|
ts *httptest.Server
|
||||||
|
store Store
|
||||||
|
alice cs.Identity
|
||||||
|
alicePub string // hex
|
||||||
|
}
|
||||||
|
|
||||||
|
func newAuthHarness(t *testing.T, mode AuthMode) *authHarness {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open store: %v", err)
|
||||||
|
}
|
||||||
|
blobs, err := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open blobs: %v", err)
|
||||||
|
}
|
||||||
|
alice, err := cs.GenerateIdentity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("identity: %v", err)
|
||||||
|
}
|
||||||
|
alicePub := hex.EncodeToString(alice.SignPub)
|
||||||
|
if err := store.AddUser(alicePub, "alice", RoleAdmin); err != nil {
|
||||||
|
t.Fatalf("seed admin: %v", err)
|
||||||
|
}
|
||||||
|
srv := NewServer(store, blobs, mode)
|
||||||
|
ts := httptest.NewServer(srv)
|
||||||
|
t.Cleanup(func() {
|
||||||
|
ts.Close()
|
||||||
|
store.Close()
|
||||||
|
})
|
||||||
|
return &authHarness{ts: ts, store: store, alice: alice, alicePub: alicePub}
|
||||||
|
}
|
||||||
|
|
||||||
|
// signedReq builds a control-plane request signed by id, with explicit ts/nonce
|
||||||
|
// so tests can force skew and replay. It signs via the same CanonicalRequest the
|
||||||
|
// production client uses, so the test verifies the real wire contract.
|
||||||
|
func signedReq(t *testing.T, base, method, path string, body []byte, id cs.Identity, ts int64, nonce string) *http.Request {
|
||||||
|
t.Helper()
|
||||||
|
var rdr io.Reader
|
||||||
|
if body != nil {
|
||||||
|
rdr = bytes.NewReader(body)
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest(method, base+path, rdr)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("new request: %v", err)
|
||||||
|
}
|
||||||
|
tss := strconv.FormatInt(ts, 10)
|
||||||
|
canonical := CanonicalRequest(method, path, tss, nonce, body)
|
||||||
|
sig := cs.SignEd25519(id.SignPriv, canonical)
|
||||||
|
req.Header.Set(hdrPub, hex.EncodeToString(id.SignPub))
|
||||||
|
req.Header.Set(hdrTs, tss)
|
||||||
|
req.Header.Set(hdrNonce, nonce)
|
||||||
|
req.Header.Set(hdrSig, base64.StdEncoding.EncodeToString(sig))
|
||||||
|
return req
|
||||||
|
}
|
||||||
|
|
||||||
|
func do(t *testing.T, req *http.Request) (int, string) {
|
||||||
|
t.Helper()
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("do request: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
b, _ := io.ReadAll(resp.Body)
|
||||||
|
return resp.StatusCode, string(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// okPath is a path that authenticates and returns 200 with an empty list when
|
||||||
|
// the request carries NO membership-bound signer (AuthOff/soft/missing-headers
|
||||||
|
// tests). Under enforce, the per-endpoint room directory is now restricted to
|
||||||
|
// the signer's own endpoint (audit H3), so tests that sign as alice use
|
||||||
|
// aliceRoomsPath instead.
|
||||||
|
const okPath = "/members/alice-endpoint/rooms"
|
||||||
|
|
||||||
|
// aliceRoomsPath is alice's own room directory — the canonical "authenticated
|
||||||
|
// and authorized" 200 path under enforce after H3.
|
||||||
|
func aliceRoomsPath(h *authHarness) string {
|
||||||
|
return "/members/" + frame.EndpointID(h.alice.SignPub) + "/rooms"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden: a request signed by a registered, active identity is accepted.
|
||||||
|
func TestAuthGoldenAccepted(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthEnforce)
|
||||||
|
now := time.Now().Unix()
|
||||||
|
code, _ := do(t, signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "nonce-golden"))
|
||||||
|
if code != http.StatusOK {
|
||||||
|
t.Fatalf("golden signed request should be 200, got %d", code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: a structurally valid signature from an identity that is NOT in the
|
||||||
|
// allowlist is rejected with 401.
|
||||||
|
func TestAuthUnregisteredRejected(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthEnforce)
|
||||||
|
bob, _ := cs.GenerateIdentity()
|
||||||
|
now := time.Now().Unix()
|
||||||
|
code, body := do(t, signedReq(t, h.ts.URL, "GET", okPath, nil, bob, now, "nonce-bob"))
|
||||||
|
if code != http.StatusUnauthorized {
|
||||||
|
t.Fatalf("unregistered identity should be 401, got %d (%s)", code, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: replaying a captured request (same nonce + signature) is rejected.
|
||||||
|
func TestAuthReplayRejected(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthEnforce)
|
||||||
|
now := time.Now().Unix()
|
||||||
|
first := signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "nonce-replay")
|
||||||
|
if code, body := do(t, first); code != http.StatusOK {
|
||||||
|
t.Fatalf("first request should be 200, got %d (%s)", code, body)
|
||||||
|
}
|
||||||
|
// Identical ts + nonce + signature: a replay.
|
||||||
|
second := signedReq(t, h.ts.URL, "GET", aliceRoomsPath(h), nil, h.alice, now, "nonce-replay")
|
||||||
|
if code, body := do(t, second); code != http.StatusUnauthorized {
|
||||||
|
t.Fatalf("replayed request should be 401, got %d (%s)", code, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: a timestamp outside the ±30s window is rejected even with a valid
|
||||||
|
// signature (defends against long-delayed captured requests).
|
||||||
|
func TestAuthClockSkewRejected(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthEnforce)
|
||||||
|
stale := time.Now().Unix() - 120
|
||||||
|
code, body := do(t, signedReq(t, h.ts.URL, "GET", okPath, nil, h.alice, stale, "nonce-skew"))
|
||||||
|
if code != http.StatusUnauthorized {
|
||||||
|
t.Fatalf("clock-skewed request should be 401, got %d (%s)", code, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: tampering the body after signing invalidates the signature.
|
||||||
|
func TestAuthTamperedBodyRejected(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthEnforce)
|
||||||
|
now := time.Now().Unix()
|
||||||
|
req := signedReq(t, h.ts.URL, "POST", "/rooms", []byte(`{"subject":"x"}`), h.alice, now, "nonce-tamper")
|
||||||
|
// Swap the body for different bytes the signature does not cover.
|
||||||
|
req.Body = io.NopCloser(bytes.NewReader([]byte(`{"subject":"evil"}`)))
|
||||||
|
req.ContentLength = int64(len(`{"subject":"evil"}`))
|
||||||
|
code, body := do(t, req)
|
||||||
|
if code != http.StatusUnauthorized {
|
||||||
|
t.Fatalf("tampered body should be 401, got %d (%s)", code, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: missing auth headers under enforce are rejected.
|
||||||
|
func TestAuthMissingHeadersRejected(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthEnforce)
|
||||||
|
req, _ := http.NewRequest("GET", h.ts.URL+okPath, nil)
|
||||||
|
code, _ := do(t, req)
|
||||||
|
if code != http.StatusUnauthorized {
|
||||||
|
t.Fatalf("unsigned request under enforce should be 401, got %d", code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exemption: the health probe bypasses auth even under enforce.
|
||||||
|
func TestAuthHealthExempt(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthEnforce)
|
||||||
|
req, _ := http.NewRequest("GET", h.ts.URL+"/healthz", nil)
|
||||||
|
code, _ := do(t, req)
|
||||||
|
if code != http.StatusOK {
|
||||||
|
t.Fatalf("/healthz must be reachable without auth, got %d", code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Soft mode: an unauthenticated request is logged but allowed through, so
|
||||||
|
// clients can migrate without an outage.
|
||||||
|
func TestAuthSoftAllowsUnauthenticated(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthSoft)
|
||||||
|
req, _ := http.NewRequest("GET", h.ts.URL+okPath, nil)
|
||||||
|
code, _ := do(t, req)
|
||||||
|
if code != http.StatusOK {
|
||||||
|
t.Fatalf("soft mode should allow unsigned request, got %d", code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Off mode (default for legacy callers): no verification at all.
|
||||||
|
func TestAuthOffNoVerification(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthOff)
|
||||||
|
req, _ := http.NewRequest("GET", h.ts.URL+okPath, nil)
|
||||||
|
code, _ := do(t, req)
|
||||||
|
if code != http.StatusOK {
|
||||||
|
t.Fatalf("off mode should allow unsigned request, got %d", code)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,119 @@
|
|||||||
|
package membership
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cs "fn-registry/functions/cybersecurity"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/frame"
|
||||||
|
)
|
||||||
|
|
||||||
|
// seedRoom inserts an encrypted room owned by alice with a sealed key for her,
|
||||||
|
// directly through the store so the test controls membership precisely. It
|
||||||
|
// returns the room id and alice's endpoint.
|
||||||
|
func seedRoom(t *testing.T, h *authHarness, subject string) (string, string) {
|
||||||
|
t.Helper()
|
||||||
|
aliceEp := frame.EndpointID(h.alice.SignPub)
|
||||||
|
roomID := newULID()
|
||||||
|
info := RoomInfo{RoomID: roomID, Subject: subject, OwnerEndpoint: aliceEp, Encrypt: true}
|
||||||
|
if err := h.store.CreateRoom(info, h.alice.SignPub, h.alice.KexPub, []byte("alice-sealed-key")); err != nil {
|
||||||
|
t.Fatalf("seed room: %v", err)
|
||||||
|
}
|
||||||
|
return roomID, aliceEp
|
||||||
|
}
|
||||||
|
|
||||||
|
// register adds id to the bus allowlist so its signed requests clear auth and
|
||||||
|
// reach the handler, where membership authorization (not mere registration) is
|
||||||
|
// what the test exercises.
|
||||||
|
func register(t *testing.T, h *authHarness, id cs.Identity, handle string) {
|
||||||
|
t.Helper()
|
||||||
|
if err := h.store.AddUser(hex.EncodeToString(id.SignPub), handle, RoleMember); err != nil {
|
||||||
|
t.Fatalf("register %s: %v", handle, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestAudit_HorizontalMetadataLeak ports the auditor's H3 (Alto) finding: bob is
|
||||||
|
// REGISTERED on the bus but is NOT a member of alice's room. Before the fix the
|
||||||
|
// GET endpoints checked registration, not membership, so bob could read the
|
||||||
|
// room's subject, the full member list (with everyone's public keys), alice's
|
||||||
|
// room directory, and even alice's sealed key. Now every one of those returns
|
||||||
|
// 403 to bob, while alice (owner/member) and carol (plain member) get 200.
|
||||||
|
func TestAudit_HorizontalMetadataLeak(t *testing.T) {
|
||||||
|
h := newAuthHarness(t, AuthEnforce)
|
||||||
|
roomID, aliceEp := seedRoom(t, h, "secret.subject.payroll")
|
||||||
|
|
||||||
|
// bob: registered, never invited.
|
||||||
|
bob, _ := cs.GenerateIdentity()
|
||||||
|
register(t, h, bob, "bob")
|
||||||
|
|
||||||
|
// carol: registered AND a plain (non-owner) member — the legitimate-member edge.
|
||||||
|
carol, _ := cs.GenerateIdentity()
|
||||||
|
register(t, h, carol, "carol")
|
||||||
|
carolEp := frame.EndpointID(carol.SignPub)
|
||||||
|
if err := h.store.AddMember(roomID, Member{Endpoint: carolEp, Role: RoleMember, SignPub: carol.SignPub, KexPub: carol.KexPub}, 1, []byte("carol-sealed")); err != nil {
|
||||||
|
t.Fatalf("add carol: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
get := func(id cs.Identity, path string) int {
|
||||||
|
n++
|
||||||
|
code, _ := do(t, signedReq(t, h.ts.URL, "GET", path, nil, id, time.Now().Unix(), nonceN(n)))
|
||||||
|
return code
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error path: bob (non-member) is forbidden on every room endpoint.
|
||||||
|
bobChecks := []struct {
|
||||||
|
name string
|
||||||
|
path string
|
||||||
|
}{
|
||||||
|
{"get room", "/rooms/" + roomID},
|
||||||
|
{"list members", "/rooms/" + roomID + "/members"},
|
||||||
|
{"alice room directory", "/members/" + aliceEp + "/rooms"},
|
||||||
|
{"alice sealed key", "/rooms/" + roomID + "/key?endpoint=" + aliceEp},
|
||||||
|
{"bob sealed key in alices room", "/rooms/" + roomID + "/key?endpoint=" + frame.EndpointID(bob.SignPub)},
|
||||||
|
}
|
||||||
|
for _, c := range bobChecks {
|
||||||
|
if code := get(bob, c.path); code != http.StatusForbidden {
|
||||||
|
t.Fatalf("bob (non-member) %s should be 403, got %d", c.name, code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden: alice (owner/member) reads her room's metadata, members, directory, key.
|
||||||
|
aliceChecks := []string{
|
||||||
|
"/rooms/" + roomID,
|
||||||
|
"/rooms/" + roomID + "/members",
|
||||||
|
"/members/" + aliceEp + "/rooms",
|
||||||
|
"/rooms/" + roomID + "/key?endpoint=" + aliceEp,
|
||||||
|
}
|
||||||
|
for _, p := range aliceChecks {
|
||||||
|
if code := get(h.alice, p); code != http.StatusOK {
|
||||||
|
t.Fatalf("alice (owner) %s should be 200, got %d", p, code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Edge: carol is a plain member, not the owner — she may still read the room.
|
||||||
|
if code := get(carol, "/rooms/"+roomID); code != http.StatusOK {
|
||||||
|
t.Fatalf("carol (member) get room should be 200, got %d", code)
|
||||||
|
}
|
||||||
|
if code := get(carol, "/rooms/"+roomID+"/members"); code != http.StatusOK {
|
||||||
|
t.Fatalf("carol (member) list members should be 200, got %d", code)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Edge: carol may fetch her OWN sealed key but not alice's.
|
||||||
|
if code := get(carol, "/rooms/"+roomID+"/key?endpoint="+carolEp); code != http.StatusOK {
|
||||||
|
t.Fatalf("carol fetching her own key should be 200, got %d", code)
|
||||||
|
}
|
||||||
|
if code := get(carol, "/rooms/"+roomID+"/key?endpoint="+aliceEp); code != http.StatusForbidden {
|
||||||
|
t.Fatalf("carol fetching alice's key should be 403, got %d", code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// nonceN yields a distinct nonce per request so the anti-replay cache never
|
||||||
|
// rejects a fresh, legitimately-different request inside one test.
|
||||||
|
func nonceN(i int) string {
|
||||||
|
return "authz-nonce-" + strconv.Itoa(i)
|
||||||
|
}
|
||||||
@@ -0,0 +1,148 @@
|
|||||||
|
package membership
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// readRSSkBRaw reads VmRSS (kB) from /proc without a *testing.T, so it is safe to
|
||||||
|
// call from a sampling goroutine (vmRSSkB calls t.Skip, which may only run on the
|
||||||
|
// test's own goroutine). Returns 0 when unavailable.
|
||||||
|
func readRSSkBRaw() int64 {
|
||||||
|
b, err := os.ReadFile("/proc/self/status")
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
for _, line := range strings.Split(string(b), "\n") {
|
||||||
|
if strings.HasPrefix(line, "VmRSS:") {
|
||||||
|
f := strings.Fields(line)
|
||||||
|
if len(f) >= 2 {
|
||||||
|
v, _ := strconv.ParseInt(f[1], 10, 64)
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestReaudit_DoSConcurrency ports the re-auditor's N2 (Medio-Alto) finding: the
|
||||||
|
// per-request body ceiling and the per-IP rate limit do not bound the AGGREGATE
|
||||||
|
// memory of many concurrent uploads. The auditor drove RSS to ~1.42 GB with 40
|
||||||
|
// concurrent 16 MiB blob uploads. With the global in-flight byte limiter, the
|
||||||
|
// number of simultaneously-buffered uploads is capped, so the resident set stays
|
||||||
|
// bounded regardless of how many connections arrive at once.
|
||||||
|
//
|
||||||
|
// Coverage:
|
||||||
|
// - golden: a normal upload succeeds, and the server is still healthy after the
|
||||||
|
// storm (the limiter did not wedge it);
|
||||||
|
// - edge : concurrency right at the cap is admitted;
|
||||||
|
// - error : a concurrent flood far past the cap sheds the excess with 503
|
||||||
|
// (backpressure) instead of buffering it all, and the RSS spike stays bounded
|
||||||
|
// and does NOT scale with the number of requests.
|
||||||
|
func TestReaudit_DoSConcurrency(t *testing.T) {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
t.Skip("RSS probe is Linux-only")
|
||||||
|
}
|
||||||
|
srv := dosServer(t, AuthOff)
|
||||||
|
// Force a small aggregate cap so the bound is observable in a unit test: with
|
||||||
|
// a 16 MiB blob ceiling, 48 MiB admits ~3 concurrent uploads. Production uses
|
||||||
|
// maxInflightBytes (128 MiB); the mechanism under test is identical.
|
||||||
|
const cap = int64(48) << 20
|
||||||
|
srv.inflight = newInflightLimiter(cap)
|
||||||
|
|
||||||
|
const blob = maxBlobBytes // 16 MiB, the per-request ceiling
|
||||||
|
const n = 40 // the auditor's figure
|
||||||
|
|
||||||
|
// A spike bound: with the cap admitting ~3 concurrent 16 MiB uploads and a
|
||||||
|
// ~2x copy factor (auth buffer + handler buffer) plus Go runtime slack, the
|
||||||
|
// delta should stay well under this. Without the limiter, 40 concurrent
|
||||||
|
// uploads admitted at once would add hundreds of MB (the auditor saw ~1.4 GB).
|
||||||
|
const maxSpikeKB = int64(256) << 10 // 256 MiB
|
||||||
|
|
||||||
|
runtime.GC()
|
||||||
|
before := readRSSkBRaw()
|
||||||
|
|
||||||
|
// Sample peak RSS while the storm runs.
|
||||||
|
var peak int64
|
||||||
|
atomic.StoreInt64(&peak, before)
|
||||||
|
stop := make(chan struct{})
|
||||||
|
var sampler sync.WaitGroup
|
||||||
|
sampler.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer sampler.Done()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-stop:
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
if v := readRSSkBRaw(); v > atomic.LoadInt64(&peak) {
|
||||||
|
atomic.StoreInt64(&peak, v)
|
||||||
|
}
|
||||||
|
time.Sleep(2 * time.Millisecond)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
var got503, got200 int64
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/blobs", &zeroReader{remaining: blob})
|
||||||
|
req.ContentLength = blob
|
||||||
|
// Distinct source IP per request: this is the multi-IP (botnet) shape the
|
||||||
|
// auditor flagged, where the per-IP rate limit gives no aggregate defense.
|
||||||
|
// The in-flight byte limiter is the global bound that must hold here.
|
||||||
|
req.RemoteAddr = "198.51.100." + strconv.Itoa(i%254+1) + ":1234"
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
srv.ServeHTTP(rec, req)
|
||||||
|
switch rec.Code {
|
||||||
|
case http.StatusServiceUnavailable:
|
||||||
|
atomic.AddInt64(&got503, 1)
|
||||||
|
case http.StatusOK:
|
||||||
|
atomic.AddInt64(&got200, 1)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
close(stop)
|
||||||
|
sampler.Wait()
|
||||||
|
|
||||||
|
runtime.GC()
|
||||||
|
delta := atomic.LoadInt64(&peak) - before
|
||||||
|
|
||||||
|
// Error path: the flood must have hit the cap and shed the excess with 503.
|
||||||
|
if got503 == 0 {
|
||||||
|
t.Fatalf("a concurrent flood of %d uploads past the cap should shed some with 503; got 200=%d 503=%d", n, got200, got503)
|
||||||
|
}
|
||||||
|
// The aggregate memory must stay bounded — not scale with n.
|
||||||
|
if delta > maxSpikeKB {
|
||||||
|
t.Fatalf("aggregate RSS spiked %d kB under %d concurrent uploads (bound %d kB): in-flight limiter not bounding memory", delta, n, maxSpikeKB)
|
||||||
|
}
|
||||||
|
// All reservations released after the storm.
|
||||||
|
if f := srv.inflight.inFlight(); f != 0 {
|
||||||
|
t.Fatalf("after the storm inFlight = %d, want 0 (reservations leaked)", f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden: the server is still healthy and serves a normal upload (from a fresh
|
||||||
|
// IP so the per-IP rate limiter, untouched here, is not what we measure).
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
gReq := httptest.NewRequest(http.MethodPost, "/blobs", strings.NewReader("hello after storm"))
|
||||||
|
gReq.RemoteAddr = "203.0.113.9:9999"
|
||||||
|
srv.ServeHTTP(rec, gReq)
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("a normal upload after the storm should be 200, got %d (%s)", rec.Code, rec.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("N2 bound: %d uploads -> 200=%d 503=%d, RSS delta %d kB (bound %d kB), cap %d MiB",
|
||||||
|
n, got200, got503, delta, maxSpikeKB, cap>>20)
|
||||||
|
}
|
||||||
@@ -0,0 +1,206 @@
|
|||||||
|
package membership
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/enmanuel/unibus/pkg/blobstore"
|
||||||
|
)
|
||||||
|
|
||||||
|
// dosServer builds a Server backed by a fresh store + blob store so a test can
|
||||||
|
// drive ServeHTTP in-process (white-box) and observe its memory behavior without
|
||||||
|
// a network round trip — the same in-process technique the auditor used.
|
||||||
|
func dosServer(t *testing.T, mode AuthMode) *Server {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
store, err := Open(filepath.Join(dir, "unibus.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open store: %v", err)
|
||||||
|
}
|
||||||
|
blobs, err := blobstore.New(filepath.Join(dir, "blobs"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open blobs: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
return NewServer(store, blobs, mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// zeroReader yields up to remaining zero bytes without ever allocating them, so
|
||||||
|
// the test process itself never materializes a huge buffer (which would taint the
|
||||||
|
// RSS measurement we are trying to make about the SERVER).
|
||||||
|
type zeroReader struct{ remaining int64 }
|
||||||
|
|
||||||
|
func (z *zeroReader) Read(p []byte) (int, error) {
|
||||||
|
if z.remaining <= 0 {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
n := int64(len(p))
|
||||||
|
if n > z.remaining {
|
||||||
|
n = z.remaining
|
||||||
|
}
|
||||||
|
for i := int64(0); i < n; i++ {
|
||||||
|
p[i] = 0
|
||||||
|
}
|
||||||
|
z.remaining -= n
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// vmRSSkB reads the resident set size (kB) of this process from /proc. Linux-only;
|
||||||
|
// the caller skips on other platforms.
|
||||||
|
func vmRSSkB(t *testing.T) int64 {
|
||||||
|
t.Helper()
|
||||||
|
b, err := os.ReadFile("/proc/self/status")
|
||||||
|
if err != nil {
|
||||||
|
t.Skipf("cannot read /proc/self/status: %v", err)
|
||||||
|
}
|
||||||
|
for _, line := range strings.Split(string(b), "\n") {
|
||||||
|
if strings.HasPrefix(line, "VmRSS:") {
|
||||||
|
f := strings.Fields(line)
|
||||||
|
if len(f) >= 2 {
|
||||||
|
v, _ := strconv.ParseInt(f[1], 10, 64)
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Skip("VmRSS not present in /proc/self/status")
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestAudit_DoSBodyLimitNoAuth ports the auditor's H1 (Critical) vector: a peer
|
||||||
|
// with NO valid signature posts an oversized body. Before the fix the middleware
|
||||||
|
// io.ReadAll'd it unbounded (the auditor sent 400 MB and watched RSS jump from
|
||||||
|
// 18 MB to 898 MB). Now the request is rejected 413 and the resident set does NOT
|
||||||
|
// spike. Two shapes are covered:
|
||||||
|
//
|
||||||
|
// (1) a truthful, over-ceiling Content-Length -> rejected before any byte is read;
|
||||||
|
// (2) a lying / unknown length (chunked) -> MaxBytesReader trips mid-read,
|
||||||
|
// capping the buffered bytes at the ceiling instead of the attacker's 400 MB.
|
||||||
|
func TestAudit_DoSBodyLimitNoAuth(t *testing.T) {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
t.Skip("RSS probe is Linux-only")
|
||||||
|
}
|
||||||
|
srv := dosServer(t, AuthEnforce) // enforce: the request carries no signature
|
||||||
|
|
||||||
|
const huge = int64(400) << 20 // 400 MiB — the auditor's figure
|
||||||
|
// A spike threshold an order of magnitude below the attack. The old code would
|
||||||
|
// add ~400 MB+; the fix keeps the delta to at most one bounded buffer.
|
||||||
|
const maxSpikeKB = int64(96) << 10 // 96 MiB
|
||||||
|
|
||||||
|
// Shape 1: declared Content-Length over the blob ceiling -> early 413, no read.
|
||||||
|
runtime.GC()
|
||||||
|
before := vmRSSkB(t)
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/blobs", &zeroReader{remaining: huge})
|
||||||
|
req.ContentLength = huge
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
srv.ServeHTTP(rec, req)
|
||||||
|
if rec.Code != http.StatusRequestEntityTooLarge {
|
||||||
|
t.Fatalf("over-declared body should be 413, got %d", rec.Code)
|
||||||
|
}
|
||||||
|
runtime.GC()
|
||||||
|
if d := vmRSSkB(t) - before; d > maxSpikeKB {
|
||||||
|
t.Fatalf("RSS spiked %d kB on a pre-declared oversized body (limit %d kB)", d, maxSpikeKB)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shape 2: unknown length (chunked-style). The middleware cannot reject by
|
||||||
|
// Content-Length, so MaxBytesReader must cap the read at maxBlobBytes.
|
||||||
|
runtime.GC()
|
||||||
|
before = vmRSSkB(t)
|
||||||
|
req = httptest.NewRequest(http.MethodPost, "/blobs", &zeroReader{remaining: huge})
|
||||||
|
req.ContentLength = -1
|
||||||
|
rec = httptest.NewRecorder()
|
||||||
|
srv.ServeHTTP(rec, req)
|
||||||
|
if rec.Code != http.StatusRequestEntityTooLarge {
|
||||||
|
t.Fatalf("unknown-length oversized body should be 413, got %d", rec.Code)
|
||||||
|
}
|
||||||
|
runtime.GC()
|
||||||
|
if d := vmRSSkB(t) - before; d > maxSpikeKB {
|
||||||
|
t.Fatalf("RSS spiked %d kB on a chunked oversized body (limit %d kB)", d, maxSpikeKB)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBlobLimitGoldenAndBoundary covers the golden path (a normal blob is stored)
|
||||||
|
// and the boundary (a body exactly at the ceiling is accepted; one byte over by
|
||||||
|
// truthful Content-Length is rejected before buffering).
|
||||||
|
func TestBlobLimitGoldenAndBoundary(t *testing.T) {
|
||||||
|
srv := dosServer(t, AuthOff) // AuthOff: the limits apply regardless of auth mode
|
||||||
|
|
||||||
|
// Golden: a small blob is accepted and hashed.
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
srv.ServeHTTP(rec, httptest.NewRequest(http.MethodPost, "/blobs", strings.NewReader("hello blob")))
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("normal blob should be 200, got %d (%s)", rec.Code, rec.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Boundary: exactly at the ceiling is allowed (MaxBytesReader permits N bytes).
|
||||||
|
atLimit := strings.Repeat("a", maxBlobBytes)
|
||||||
|
rec = httptest.NewRecorder()
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/blobs", strings.NewReader(atLimit))
|
||||||
|
req.ContentLength = int64(len(atLimit))
|
||||||
|
srv.ServeHTTP(rec, req)
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("blob exactly at the ceiling should be 200, got %d", rec.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error: one byte over the ceiling (truthful Content-Length) -> 413 pre-read.
|
||||||
|
rec = httptest.NewRecorder()
|
||||||
|
req = httptest.NewRequest(http.MethodPost, "/blobs", &zeroReader{remaining: maxBlobBytes + 1})
|
||||||
|
req.ContentLength = maxBlobBytes + 1
|
||||||
|
srv.ServeHTTP(rec, req)
|
||||||
|
if rec.Code != http.StatusRequestEntityTooLarge {
|
||||||
|
t.Fatalf("blob one byte over the ceiling should be 413, got %d", rec.Code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestControlBodyLimit checks the smaller JSON ceiling on a non-blob route: a body
|
||||||
|
// over maxControlBodyBytes is rejected 413 before the handler runs.
|
||||||
|
func TestControlBodyLimit(t *testing.T) {
|
||||||
|
srv := dosServer(t, AuthOff)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/rooms", &zeroReader{remaining: maxControlBodyBytes + 1})
|
||||||
|
req.ContentLength = maxControlBodyBytes + 1
|
||||||
|
srv.ServeHTTP(rec, req)
|
||||||
|
if rec.Code != http.StatusRequestEntityTooLarge {
|
||||||
|
t.Fatalf("control body over 1 MiB should be 413, got %d", rec.Code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRateLimitPerIP exercises the per-IP throttle: a burst from one IP eventually
|
||||||
|
// gets 429 (error path), while a spread across distinct IPs is never throttled
|
||||||
|
// (edge — the bucket is keyed per source, not global).
|
||||||
|
func TestRateLimitPerIP(t *testing.T) {
|
||||||
|
srv := dosServer(t, AuthOff)
|
||||||
|
|
||||||
|
// Same IP: well past the burst -> at least one 429.
|
||||||
|
got429 := false
|
||||||
|
for i := 0; i < defaultRateBurst+50; i++ {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/rooms/none", nil)
|
||||||
|
req.RemoteAddr = "203.0.113.7:5555"
|
||||||
|
srv.ServeHTTP(rec, req)
|
||||||
|
if rec.Code == http.StatusTooManyRequests {
|
||||||
|
got429 = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !got429 {
|
||||||
|
t.Fatalf("a flood from one IP should eventually be rate-limited (429)")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Distinct IPs: each gets a fresh bucket, so none is throttled.
|
||||||
|
for i := 0; i < 100; i++ {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/rooms/none", nil)
|
||||||
|
req.RemoteAddr = "198.51.100." + strconv.Itoa(i%254+1) + ":4444"
|
||||||
|
srv.ServeHTTP(rec, req)
|
||||||
|
if rec.Code == http.StatusTooManyRequests {
|
||||||
|
t.Fatalf("distinct IPs must not share a rate bucket; IP #%d got 429", i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,85 @@
|
|||||||
|
package membership
|
||||||
|
|
||||||
|
import "sync/atomic"
|
||||||
|
|
||||||
|
// inflightLimiter is a non-blocking, byte-counting concurrency limiter: a global
|
||||||
|
// cap on how many bytes of request body the server will buffer simultaneously.
|
||||||
|
//
|
||||||
|
// The per-request body ceilings (maxControlBodyBytes / maxBlobBytes) bound a
|
||||||
|
// single request, and the per-IP rate limiter throttles a single source, but
|
||||||
|
// neither bounds the AGGREGATE memory across many concurrent uploads: the
|
||||||
|
// re-audit (report 0006, N2) showed 40 concurrent 16 MiB blob uploads driving
|
||||||
|
// RSS to ~1.42 GB, and a distributed (multi-IP) flood scales without a ceiling
|
||||||
|
// because the rate limiter is per-IP. This limiter is the missing aggregate
|
||||||
|
// bound: ServeHTTP reserves a request's worst-case buffered size before reading
|
||||||
|
// the body and releases it when the request finishes, so the total bytes in
|
||||||
|
// flight can never exceed max regardless of how many connections or source IPs
|
||||||
|
// arrive at once.
|
||||||
|
//
|
||||||
|
// It is intentionally NON-blocking: when a reservation does not fit, the caller
|
||||||
|
// sheds the request with backpressure (503) rather than parking a goroutine,
|
||||||
|
// which would let an attacker exhaust goroutines/connections instead of RAM. The
|
||||||
|
// counter is maintained with sync/atomic (a CAS loop), so it is safe for
|
||||||
|
// concurrent use without a mutex.
|
||||||
|
//
|
||||||
|
// Implementation note: this lives inside unibus rather than the fn-registry
|
||||||
|
// (where a generic concurrency primitive would normally belong) because the
|
||||||
|
// registry's functions/core package pulls in transitive dependencies that
|
||||||
|
// require CGO (mattn/go-sqlite3) and external modules, which are incompatible
|
||||||
|
// with unibus's CGO_ENABLED=0 build, and because this work is scoped to the
|
||||||
|
// unibus sub-repo.
|
||||||
|
type inflightLimiter struct {
|
||||||
|
max int64 // immutable after construction; <= 0 disables the limiter
|
||||||
|
used int64 // bytes currently reserved; accessed ONLY via sync/atomic
|
||||||
|
}
|
||||||
|
|
||||||
|
// newInflightLimiter builds a limiter with a cap of maxBytes bytes in flight.
|
||||||
|
// maxBytes <= 0 disables the cap (tryAcquire always grants), which is the
|
||||||
|
// loopback/dev posture where an aggregate memory ceiling is not wanted.
|
||||||
|
func newInflightLimiter(maxBytes int64) *inflightLimiter {
|
||||||
|
return &inflightLimiter{max: maxBytes}
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryAcquire reserves n bytes without blocking. It returns true and reserves the
|
||||||
|
// bytes when they fit within the cap (used+n <= max), or false (reserving
|
||||||
|
// nothing) when they do not. n <= 0 is granted without reserving, and a disabled
|
||||||
|
// limiter (max <= 0) always grants. Safe for concurrent use.
|
||||||
|
func (l *inflightLimiter) tryAcquire(n int64) bool {
|
||||||
|
if l.max <= 0 || n <= 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
cur := atomic.LoadInt64(&l.used)
|
||||||
|
if cur+n > l.max {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if atomic.CompareAndSwapInt64(&l.used, cur, cur+n) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// release returns n previously reserved bytes. It must be paired with a
|
||||||
|
// tryAcquire that granted. A disabled limiter or n <= 0 is a no-op. The counter
|
||||||
|
// never drops below zero (a defensive clamp against an accidental double release).
|
||||||
|
func (l *inflightLimiter) release(n int64) {
|
||||||
|
if l.max <= 0 || n <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
cur := atomic.LoadInt64(&l.used)
|
||||||
|
nv := cur - n
|
||||||
|
if nv < 0 {
|
||||||
|
nv = 0
|
||||||
|
}
|
||||||
|
if atomic.CompareAndSwapInt64(&l.used, cur, nv) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// inFlight returns the bytes currently reserved. It is observability for tests
|
||||||
|
// and metrics.
|
||||||
|
func (l *inflightLimiter) inFlight() int64 {
|
||||||
|
return atomic.LoadInt64(&l.used)
|
||||||
|
}
|
||||||
@@ -0,0 +1,97 @@
|
|||||||
|
package membership
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestInflightLimiterBasics covers the limiter contract: granting within the cap
|
||||||
|
// (golden), the exact boundary (edge), refusal over the cap without mutating the
|
||||||
|
// counter (error), the disabled mode, and the defensive clamp on over-release.
|
||||||
|
func TestInflightLimiterBasics(t *testing.T) {
|
||||||
|
l := newInflightLimiter(100)
|
||||||
|
|
||||||
|
// Golden: a reservation within the cap is granted and reflected.
|
||||||
|
if !l.tryAcquire(60) {
|
||||||
|
t.Fatalf("acquire 60 within cap 100 should grant")
|
||||||
|
}
|
||||||
|
if l.inFlight() != 60 {
|
||||||
|
t.Fatalf("inFlight = %d, want 60", l.inFlight())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Edge: exactly reaching the cap (60+40 == 100) is granted.
|
||||||
|
if !l.tryAcquire(40) {
|
||||||
|
t.Fatalf("acquire to the exact cap should grant")
|
||||||
|
}
|
||||||
|
if l.inFlight() != 100 {
|
||||||
|
t.Fatalf("inFlight = %d, want 100", l.inFlight())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error: one more byte over the full cap is refused, and the counter is left
|
||||||
|
// untouched (a refused reservation reserves nothing).
|
||||||
|
if l.tryAcquire(1) {
|
||||||
|
t.Fatalf("acquire over a full cap must be refused")
|
||||||
|
}
|
||||||
|
if l.inFlight() != 100 {
|
||||||
|
t.Fatalf("a refused acquire must not change inFlight; got %d", l.inFlight())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release frees capacity again.
|
||||||
|
l.release(100)
|
||||||
|
if l.inFlight() != 0 {
|
||||||
|
t.Fatalf("inFlight after full release = %d, want 0", l.inFlight())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defensive: an over-release never drives the counter negative.
|
||||||
|
l.release(50)
|
||||||
|
if l.inFlight() != 0 {
|
||||||
|
t.Fatalf("over-release must clamp at 0; got %d", l.inFlight())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInflightLimiterDisabled verifies that a non-positive cap disables the
|
||||||
|
// limiter: every reservation is granted and nothing is tracked (the loopback/dev
|
||||||
|
// posture).
|
||||||
|
func TestInflightLimiterDisabled(t *testing.T) {
|
||||||
|
for _, max := range []int64{0, -1} {
|
||||||
|
l := newInflightLimiter(max)
|
||||||
|
if !l.tryAcquire(1 << 30) {
|
||||||
|
t.Fatalf("disabled limiter (max=%d) must always grant", max)
|
||||||
|
}
|
||||||
|
if l.inFlight() != 0 {
|
||||||
|
t.Fatalf("disabled limiter must not track usage; got %d", l.inFlight())
|
||||||
|
}
|
||||||
|
l.release(1 << 30) // no-op, must not panic
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInflightLimiterConcurrent hammers the limiter from many goroutines with
|
||||||
|
// equal-sized acquire/release pairs and asserts the invariant never breaks: the
|
||||||
|
// counter returns to 0 and never exceeds the cap. Run with -race for the memory
|
||||||
|
// model guarantee.
|
||||||
|
func TestInflightLimiterConcurrent(t *testing.T) {
|
||||||
|
const cap = 1000
|
||||||
|
const chunk = 7
|
||||||
|
l := newInflightLimiter(cap)
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for g := 0; g < 64; g++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for i := 0; i < 2000; i++ {
|
||||||
|
if l.tryAcquire(chunk) {
|
||||||
|
if f := l.inFlight(); f > cap {
|
||||||
|
t.Errorf("inFlight %d exceeded cap %d", f, cap)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
l.release(chunk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
if l.inFlight() != 0 {
|
||||||
|
t.Fatalf("after all goroutines, inFlight = %d, want 0", l.inFlight())
|
||||||
|
}
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user