3 Commits

Author SHA1 Message Date
egutierrez 5af945778b feat(android): app nativa Kotlin/Compose (login, rooms, chat)
App Android Material 3, tema oscuro con acento índigo/violeta que replica el
look & feel de la app web (web/src):

- Login: identidad + contraseña, candado de marca, estilo Card.
- Lista de rooms: avatar+handle, buscador rooms/usuarios/mensajes, items con
  candado (E2E) / hash (cleartext), hora, último mensaje y badge de no leídos.
- Chat estilo Element: avatar+nombre+hora+texto, composer redondeado con send.

Arquitectura por capas: UnibusRepository aísla la UI de la fuente de datos.
MockUnibusRepository (en memoria) alimenta la iteración de diseño;
BindingUnibusRepository implementa la misma interfaz sobre el binding gomobile
(unibus.aar) para conectar el bus real sin tocar las pantallas. AppViewModel
orquesta el estado; navegación por estado (login -> rooms -> chat), KISS.

Build: ./gradlew assembleDebug (AGP 8.5.2, Gradle 8.7, Kotlin 1.9.24,
Compose BOM 2024.06.00, compileSdk 34, minSdk 21). El .aar se regenera con
mobile/gen_aar.sh (no se versiona, 38MB).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 18:16:23 +02:00
egutierrez f92973f5fe feat(mobile): rehacer binding gomobile sobre pkg/client
Reintroduce mobile/unibus.go (package mobile), borrado en la limpieza de
frontends experimentales. Expone una API plana gomobile-friendly sobre
pkg/client para que la app Android sea un peer del bus con el mismo cifrado
de extremo a extremo que cualquier otro:

- GenerateIdentity, NewSession (vía client.Connect con TLS+nkey+caPath)
- EndpointID, ConnectedServer, IsConnected
- CreateRoom, Join, RefreshSession (contrato de membresía issue 0006e)
- Publish, Subscribe(FrameListener), ListRoomsJSON
- Card, Invite, Kick, Request, Close

No reimplementa criptografía: todo delega en pkg/client. FrameListener
documenta el contrato de hilo (los callbacks llegan en una goroutine de NATS;
Kotlin debe saltar al hilo principal). gen_aar.sh regenera el .aar.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 18:16:23 +02:00
egutierrez 380d795ffb chore: ignore go.work local workspace
When building the gomobile binding from a git worktree outside the registry
tree, pkg/client's replace to fn-registry needs an absolute path. A local
go.work (gitignored) supplies it without touching the committed go.mod.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 18:16:23 +02:00
54 changed files with 2117 additions and 1874 deletions
+4
View File
@@ -14,3 +14,7 @@ worker.id
/chat /chat
*.exe *.exe
registry.db registry.db
# local workspace (no committear: replace absoluto al registry)
go.work
go.work.sum
+15
View File
@@ -0,0 +1,15 @@
# Android / Gradle build artifacts
*.iml
.gradle/
/local.properties
/.idea
.DS_Store
/build
/app/build
/captures
.externalNativeBuild
.cxx
# binding gomobile regenerable (38MB): ver mobile/gen_aar.sh
/app/libs/*.aar
/app/libs/*-sources.jar
+75
View File
@@ -0,0 +1,75 @@
plugins {
id("com.android.application")
id("org.jetbrains.kotlin.android")
id("org.jetbrains.kotlin.plugin.serialization")
}
android {
namespace = "com.unibus.app"
compileSdk = 34
defaultConfig {
applicationId = "com.unibus.app"
minSdk = 21
targetSdk = 34
versionCode = 1
versionName = "0.1.0"
// The unibus.aar ships native libgojni.so for these ABIs. Limit the APK
// to the desktop/emulator + phone ABIs we actually target.
ndk {
abiFilters += listOf("arm64-v8a", "armeabi-v7a", "x86", "x86_64")
}
}
buildTypes {
release {
isMinifyEnabled = false
proguardFiles(
getDefaultProguardFile("proguard-android-optimize.txt"),
"proguard-rules.pro",
)
}
}
compileOptions {
sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
}
kotlinOptions {
jvmTarget = "17"
}
buildFeatures {
compose = true
}
composeOptions {
// Compose compiler matching Kotlin 1.9.24.
kotlinCompilerExtensionVersion = "1.5.14"
}
packaging {
resources {
excludes += "/META-INF/{AL2.0,LGPL2.1}"
}
}
}
dependencies {
// gomobile binding over pkg/client (real end-to-end crypto on device).
implementation(files("libs/unibus.aar"))
implementation("androidx.core:core-ktx:1.13.1")
implementation("androidx.activity:activity-compose:1.9.0")
implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.8.2")
implementation("androidx.lifecycle:lifecycle-viewmodel-compose:2.8.2")
val composeBom = platform("androidx.compose:compose-bom:2024.06.00")
implementation(composeBom)
implementation("androidx.compose.ui:ui")
implementation("androidx.compose.ui:ui-graphics")
implementation("androidx.compose.material3:material3")
implementation("androidx.compose.material:material-icons-extended")
implementation("androidx.compose.ui:ui-tooling-preview")
debugImplementation("androidx.compose.ui:ui-tooling")
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.3")
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.8.1")
}
+12
View File
@@ -0,0 +1,12 @@
# libs/
`unibus.aar` (binding gomobile sobre `pkg/client`, ~38 MB con `libgojni.so` para
4 ABIs) vive aquí pero **no se versiona** — es un artefacto de build reproducible.
Regenéralo con:
```bash
../../mobile/gen_aar.sh
```
(desde la raíz del repo: `./mobile/gen_aar.sh`). Requiere Go + gomobile + Android NDK.
+4
View File
@@ -0,0 +1,4 @@
# gomobile binding: keep the generated Go<->Java bridge classes intact so the
# JNI layer can find them by name at runtime.
-keep class go.** { *; }
-keep class com.unibus.core.mobile.** { *; }
+25
View File
@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
<!-- The bus is reached over the network (NATS data plane + control plane). -->
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
<application
android:allowBackup="true"
android:label="unibus"
android:icon="@mipmap/ic_launcher"
android:theme="@style/Theme.Unibus"
android:supportsRtl="true">
<activity
android:name=".MainActivity"
android:exported="true"
android:theme="@style/Theme.Unibus">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>
@@ -0,0 +1,88 @@
package com.unibus.app
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.setValue
import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope
import com.unibus.app.data.Message
import com.unibus.app.data.MockUnibusRepository
import com.unibus.app.data.Room
import com.unibus.app.data.UnibusRepository
import com.unibus.app.data.User
import kotlinx.coroutines.launch
/**
* Estado de la app. Orquesta el [UnibusRepository] (mock por defecto) y expone
* estado observable a Compose. Cambiar el repo por [com.unibus.app.data.BindingUnibusRepository]
* conecta la UI al bus real sin tocar las pantallas.
*/
class AppViewModel(
private val repo: UnibusRepository,
) : ViewModel() {
// Constructor no-arg para que androidx `viewModel()` lo instancie por
// reflexión. Por defecto usa el repositorio mock (iteración de diseño).
constructor() : this(MockUnibusRepository())
var user by mutableStateOf<User?>(null)
private set
var rooms by mutableStateOf<List<Room>>(emptyList())
private set
var activeRoomId by mutableStateOf<String?>(null)
private set
var messages by mutableStateOf<List<Message>>(emptyList())
private set
var connecting by mutableStateOf(false)
private set
var error by mutableStateOf<String?>(null)
private set
val activeRoom: Room?
get() = rooms.firstOrNull { it.id == activeRoomId }
fun connect(handle: String, password: String) {
if (connecting) return
connecting = true
error = null
viewModelScope.launch {
repo.connect(handle, password)
.onSuccess {
user = it
rooms = repo.listRooms()
}
.onFailure { error = it.message ?: "No se pudo conectar" }
connecting = false
}
}
fun openRoom(id: String) {
activeRoomId = id
messages = repo.messagesOf(id)
repo.subscribe(id) { incoming ->
if (activeRoomId == id) messages = messages + incoming
}
}
fun closeRoom() {
activeRoomId = null
messages = emptyList()
}
fun send(text: String) {
val rid = activeRoomId ?: return
val body = text.trim()
if (body.isEmpty()) return
viewModelScope.launch {
repo.send(rid, body).onSuccess { messages = messages + it }
}
}
fun logout() {
repo.close()
user = null
rooms = emptyList()
activeRoomId = null
messages = emptyList()
}
}
@@ -0,0 +1,63 @@
package com.unibus.app
import android.os.Bundle
import androidx.activity.ComponentActivity
import androidx.activity.compose.BackHandler
import androidx.activity.compose.setContent
import androidx.compose.runtime.Composable
import androidx.compose.runtime.CompositionLocalProvider
import androidx.lifecycle.viewmodel.compose.viewModel
import com.unibus.app.ui.ChatScreen
import com.unibus.app.ui.LoginScreen
import com.unibus.app.ui.RoomListScreen
import com.unibus.app.ui.theme.LocalUnibusColors
import com.unibus.app.ui.theme.UnibusColors
import com.unibus.app.ui.theme.UnibusTheme
class MainActivity : ComponentActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContent {
UnibusTheme {
CompositionLocalProvider(LocalUnibusColors provides UnibusColors()) {
UnibusApp()
}
}
}
}
}
/**
* Navegación por estado (sin librería de routing — KISS): el usuario fluye
* Login → lista de rooms → chat, igual que la web pero en una sola columna.
*/
@Composable
private fun UnibusApp(vm: AppViewModel = viewModel()) {
val user = vm.user
val activeRoom = vm.activeRoom
when {
user == null -> LoginScreen(
connecting = vm.connecting,
error = vm.error,
onLogin = { handle, password -> vm.connect(handle, password) },
)
activeRoom == null -> RoomListScreen(
user = user,
rooms = vm.rooms,
onSelect = { vm.openRoom(it) },
onLogout = { vm.logout() },
)
else -> {
BackHandler { vm.closeRoom() }
ChatScreen(
room = activeRoom,
messages = vm.messages,
onSend = { vm.send(it) },
onBack = { vm.closeRoom() },
)
}
}
}
@@ -0,0 +1,157 @@
package com.unibus.app.data
import android.content.Context
import android.os.Handler
import android.os.Looper
import com.unibus.core.mobile.FrameListener
import com.unibus.core.mobile.Mobile
import com.unibus.core.mobile.Session
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import java.io.File
/**
* Implementación real sobre el binding gomobile (pkg/client): cifrado de extremo
* a extremo EN el dispositivo, igual que cualquier otro peer del bus. Comparte
* interfaz con [MockUnibusRepository], así que la UI no cambia al enchufarla.
*
* Estado: cableado completo y compilable contra unibus.aar. La iteración 1 de la
* app corre sobre el mock para iterar el diseño; para activar el bus real basta
* con instanciar este repo en [com.unibus.app.MainActivity] pasando las URLs del
* bus y (si el bus exige TLS+auth) el ca.crt en assets.
*
* Contrato de membresía (issue 0006e): tras CreateRoom / Join / Invite hay que
* llamar [refresh] ANTES de subscribe/publish en esa room, o un bus seguro
* deniega el subject. refresh() además tira las suscripciones: re-suscribir luego.
*/
class BindingUnibusRepository(
context: Context,
private val natsURL: String,
private val ctrlURL: String,
) : UnibusRepository {
private val appContext = context.applicationContext
private val mainHandler = Handler(Looper.getMainLooper())
private val json = Json { ignoreUnknownKeys = true }
private var session: Session? = null
private var user: User? = null
@Serializable
private data class RoomDTO(
val room_id: String,
val subject: String,
val epoch: Int = 0,
val encrypted: Boolean = false,
val role: String = "",
)
/** Ruta sandbox de la identidad de larga duración (claves privadas). */
private fun identityPath(): String =
File(appContext.filesDir, "identity.key").absolutePath
/**
* Copia ca.crt de assets a un fichero local y devuelve su ruta; "" si no hay
* (bus de desarrollo en texto plano). El binding pinea TLS a este CA cuando
* la ruta no está vacía.
*/
private fun caPathOrEmpty(): String {
return try {
val out = File(appContext.filesDir, "ca.crt")
appContext.assets.open("ca.crt").use { input ->
out.outputStream().use { input.copyTo(it) }
}
out.absolutePath
} catch (_: Exception) {
""
}
}
override suspend fun connect(handle: String, password: String): Result<User> =
withContext(Dispatchers.IO) {
try {
// La identidad se persiste cifrada en el sandbox; password la
// desbloquea en una iteración futura (hoy LoadOrCreateIdentity la
// crea/lee directamente). handle es la etiqueta visible local.
Mobile.generateIdentity(identityPath())
val s = Mobile.newSession(identityPath(), natsURL, ctrlURL, caPathOrEmpty())
session = s
val u = User(id = s.endpointID(), handle = handle)
user = u
Result.success(u)
} catch (e: Exception) {
Result.failure(e)
}
}
override suspend fun listRooms(): List<Room> = withContext(Dispatchers.IO) {
val s = session ?: return@withContext emptyList()
val raw = runCatching { s.listRoomsJSON() }.getOrDefault("[]")
val dtos = runCatching { json.decodeFromString<List<RoomDTO>>(raw) }.getOrDefault(emptyList())
dtos.map {
Room(
id = it.room_id,
name = it.subject,
encrypted = it.encrypted,
lastMessage = "",
lastTs = System.currentTimeMillis(),
unread = 0,
messages = emptyList(),
)
}
}
override fun messagesOf(roomId: String): List<Message> = emptyList()
override fun subscribe(roomId: String, onMessage: (Message) -> Unit) {
val s = session ?: return
val myId = user?.id
// FrameListener.onFrame llega en una goroutine de NATS: saltamos al hilo
// principal antes de tocar estado de Compose.
val listener = object : FrameListener {
override fun onFrame(rid: String, sender: String, msgID: String, text: String) {
val msg = Message(
id = msgID,
sender = sender,
body = text,
ts = System.currentTimeMillis(),
mine = sender == myId,
)
mainHandler.post { onMessage(msg) }
}
}
runCatching { s.subscribe(roomId, listener) }
}
override suspend fun send(roomId: String, text: String): Result<Message> =
withContext(Dispatchers.IO) {
val s = session ?: return@withContext Result.failure(IllegalStateException("sin sesión"))
try {
s.publish(roomId, text)
Result.success(
Message(
id = "local-${System.currentTimeMillis()}",
sender = user?.id ?: "yo",
body = text,
ts = System.currentTimeMillis(),
mine = true,
),
)
} catch (e: Exception) {
Result.failure(e)
}
}
/** Reaplica permisos tras un cambio de membresía. Re-suscribir después. */
suspend fun refresh(): Result<Unit> = withContext(Dispatchers.IO) {
runCatching { session?.refreshSession(); Unit }
}
override fun close() {
runCatching { session?.close() }
session = null
user = null
}
}
@@ -0,0 +1,59 @@
package com.unibus.app.data
// Datos de muestra para iterar el diseño sin el bus conectado (espejo de mock.ts).
private const val NOW = 1749300000000L
private fun m(n: Int): Long = NOW - n * 60_000L
val MOCK_ROOMS: List<Room> = listOf(
Room(
id = "general",
name = "general",
encrypted = true,
lastMessage = "¿Lo desplegamos hoy?",
lastTs = m(2),
unread = 3,
messages = listOf(
Message("1", "ana", "Buenas, ¿cómo va el cluster?", m(40)),
Message("2", "lucas", "Los 3 nodos en R3, quorum verde", m(38), mine = true),
Message("3", "ana", "Brutal. ¿Y el frontend?", m(30)),
Message("4", "leo", "Primera iteración lista, estilo Element", m(6)),
Message("5", "ana", "¿Lo desplegamos hoy?", m(2)),
),
),
Room(
id = "board",
name = "board · privado",
encrypted = true,
lastMessage = "Os paso el acta cifrada",
lastTs = m(95),
unread = 0,
messages = listOf(
Message("1", "ceo", "Reunión a las 18:00", m(120)),
Message("2", "lucas", "Anotado", m(96), mine = true),
Message("3", "ceo", "Os paso el acta cifrada", m(95)),
),
),
Room(
id = "bots",
name = "bots",
encrypted = false,
lastMessage = "echo: ping",
lastTs = m(210),
unread = 0,
messages = listOf(
Message("1", "lucas", "!ping", m(212), mine = true),
Message("2", "echobot", "echo: ping", m(210)),
),
),
Room(
id = "infra",
name = "infra",
encrypted = true,
lastMessage = "magnus + homer + datardos OK",
lastTs = m(330),
unread = 1,
messages = listOf(
Message("1", "leo", "magnus + homer + datardos OK", m(330)),
),
),
)
@@ -0,0 +1,30 @@
package com.unibus.app.data
/**
* Modelos de dominio de la UI. En la iteración 1 se llenan con datos mock; más
* adelante vendrán del binding gomobile (pkg/client) a través de
* [UnibusRepository]. Reflejan los tipos de la app web (types.ts).
*/
data class User(
val id: String,
val handle: String,
)
data class Message(
val id: String,
val sender: String, // handle
val body: String,
val ts: Long, // epoch ms
val mine: Boolean = false,
)
data class Room(
val id: String,
val name: String,
val encrypted: Boolean,
val lastMessage: String,
val lastTs: Long,
val unread: Int,
val messages: List<Message>,
)
@@ -0,0 +1,74 @@
package com.unibus.app.data
/**
* Capa de repositorio que aísla la UI de la fuente de datos. La iteración 1 usa
* [MockUnibusRepository] (en memoria) para iterar el diseño. Cuando se enchufe
* el bus real, [BindingUnibusRepository] (en BindingRepository.kt) implementa
* esta misma interfaz sobre el binding gomobile (pkg/client), sin tocar la UI.
*/
interface UnibusRepository {
/** Desbloquea/crea la identidad y conecta al bus. Devuelve el usuario logueado. */
suspend fun connect(handle: String, password: String): Result<User>
/** Rooms a las que pertenece el peer. */
suspend fun listRooms(): List<Room>
/** Mensajes históricos conocidos de una room (mock: los del propio Room). */
fun messagesOf(roomId: String): List<Message>
/**
* Suscribe a una room. [onMessage] se invoca por cada mensaje entrante.
* Las implementaciones que vienen del bus DEBEN entregar [onMessage] en el
* hilo principal (el binding lo recibe en una goroutine de NATS).
*/
fun subscribe(roomId: String, onMessage: (Message) -> Unit)
/** Publica texto en la room. */
suspend fun send(roomId: String, text: String): Result<Message>
/** Cierra la sesión. */
fun close()
}
/**
* Implementación en memoria: arranca con [MOCK_ROOMS] y acumula los mensajes que
* el usuario envía. No toca red ni binding — sirve para construir y revisar la UI.
*/
class MockUnibusRepository : UnibusRepository {
private var user: User? = null
private val sent = mutableMapOf<String, MutableList<Message>>()
override suspend fun connect(handle: String, password: String): Result<User> {
val u = User(id = handle, handle = handle)
user = u
return Result.success(u)
}
override suspend fun listRooms(): List<Room> = MOCK_ROOMS
override fun messagesOf(roomId: String): List<Message> {
val base = MOCK_ROOMS.firstOrNull { it.id == roomId }?.messages.orEmpty()
return base + (sent[roomId].orEmpty())
}
override fun subscribe(roomId: String, onMessage: (Message) -> Unit) {
// El mock no recibe tráfico entrante; el eco lo gestiona la UI al enviar.
}
override suspend fun send(roomId: String, text: String): Result<Message> {
val handle = user?.handle ?: "yo"
val msg = Message(
id = "local-${System.currentTimeMillis()}",
sender = handle,
body = text,
ts = System.currentTimeMillis(),
mine = true,
)
sent.getOrPut(roomId) { mutableListOf() }.add(msg)
return Result.success(msg)
}
override fun close() {
user = null
}
}
@@ -0,0 +1,203 @@
package com.unibus.app.ui
import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.size
import androidx.compose.foundation.lazy.LazyColumn
import androidx.compose.foundation.lazy.items
import androidx.compose.foundation.lazy.rememberLazyListState
import androidx.compose.foundation.shape.CircleShape
import androidx.compose.foundation.text.KeyboardActions
import androidx.compose.foundation.text.KeyboardOptions
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.automirrored.filled.ArrowBack
import androidx.compose.material.icons.automirrored.filled.Send
import androidx.compose.material.icons.filled.AttachFile
import androidx.compose.material.icons.filled.Lock
import androidx.compose.material.icons.filled.MoreVert
import androidx.compose.material.icons.filled.Tag
import androidx.compose.material3.HorizontalDivider
import androidx.compose.material3.Icon
import androidx.compose.material3.IconButton
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Text
import androidx.compose.material3.TextFieldDefaults
import androidx.compose.runtime.Composable
import androidx.compose.runtime.LaunchedEffect
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.draw.clip
import androidx.compose.ui.graphics.Color
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.text.style.TextOverflow
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import com.unibus.app.data.Message
import com.unibus.app.data.Room
import com.unibus.app.ui.theme.Brand3
import com.unibus.app.ui.theme.LocalUnibusColors
@Composable
fun ChatScreen(
room: Room,
messages: List<Message>,
onSend: (String) -> Unit,
onBack: () -> Unit,
) {
val colors = LocalUnibusColors.current
var draft by remember { mutableStateOf("") }
val listState = rememberLazyListState()
LaunchedEffect(messages.size, room.id) {
if (messages.isNotEmpty()) listState.animateScrollToItem(messages.size - 1)
}
Column(
modifier = Modifier
.fillMaxSize()
.background(colors.chatBg),
) {
// Header
Row(
modifier = Modifier
.fillMaxWidth()
.padding(horizontal = 6.dp, vertical = 8.dp),
verticalAlignment = Alignment.CenterVertically,
) {
IconButton(onClick = onBack) {
Icon(Icons.AutoMirrored.Filled.ArrowBack, contentDescription = "Atrás", tint = Color.White)
}
InitialsAvatar(room.name, size = 38.dp, rounded = true, accent = true)
Column(
modifier = Modifier
.weight(1f)
.padding(start = 10.dp),
) {
Row(verticalAlignment = Alignment.CenterVertically) {
Text(
room.name,
fontWeight = FontWeight(650),
fontSize = 16.sp,
color = Color.White,
maxLines = 1,
overflow = TextOverflow.Ellipsis,
)
Icon(
if (room.encrypted) Icons.Filled.Lock else Icons.Filled.Tag,
contentDescription = null,
tint = colors.dimmed,
modifier = Modifier
.padding(start = 6.dp)
.size(14.dp),
)
}
Text(
if (room.encrypted) "cifrada · E2E" else "abierta · cleartext",
color = colors.dimmed,
fontSize = 11.sp,
)
}
IconButton(onClick = { /* opciones de room (futuro) */ }) {
Icon(Icons.Filled.MoreVert, contentDescription = "Opciones", tint = colors.dimmed)
}
}
HorizontalDivider(color = colors.divider)
// Mensajes
LazyColumn(
state = listState,
modifier = Modifier
.weight(1f)
.fillMaxWidth(),
contentPadding = androidx.compose.foundation.layout.PaddingValues(14.dp),
verticalArrangement = Arrangement.spacedBy(16.dp),
) {
items(messages, key = { it.id }) { msg -> MessageRow(msg) }
}
HorizontalDivider(color = colors.divider)
// Composer
Row(
modifier = Modifier
.fillMaxWidth()
.padding(8.dp),
verticalAlignment = Alignment.CenterVertically,
) {
IconButton(onClick = { /* adjuntar (futuro) */ }) {
Icon(Icons.Filled.AttachFile, contentDescription = "Adjuntar", tint = colors.dimmed)
}
OutlinedTextField(
value = draft,
onValueChange = { draft = it },
placeholder = { Text("Mensaje a ${room.name}") },
singleLine = true,
shape = CircleShape,
colors = TextFieldDefaults.colors(
focusedContainerColor = colors.field,
unfocusedContainerColor = colors.field,
),
modifier = Modifier.weight(1f),
keyboardOptions = KeyboardOptions(imeAction = androidx.compose.ui.text.input.ImeAction.Send),
keyboardActions = KeyboardActions(onSend = {
if (draft.trim().isNotEmpty()) { onSend(draft); draft = "" }
}),
)
Box(
modifier = Modifier
.padding(start = 6.dp)
.size(46.dp)
.clip(CircleShape)
.background(if (draft.trim().isEmpty()) colors.field else colors.brand),
contentAlignment = Alignment.Center,
) {
IconButton(
onClick = { if (draft.trim().isNotEmpty()) { onSend(draft); draft = "" } },
enabled = draft.trim().isNotEmpty(),
) {
Icon(Icons.AutoMirrored.Filled.Send, contentDescription = "Enviar", tint = Color.White)
}
}
}
}
}
@Composable
private fun MessageRow(msg: Message) {
val colors = LocalUnibusColors.current
Row(verticalAlignment = Alignment.Top) {
InitialsAvatar(msg.sender, size = 36.dp, rounded = false, accent = msg.mine)
Column(modifier = Modifier.padding(start = 10.dp)) {
Row(verticalAlignment = Alignment.Bottom) {
Text(
msg.sender,
fontWeight = FontWeight.SemiBold,
fontSize = 14.sp,
color = if (msg.mine) Brand3 else Color.White,
)
Text(
timeShort(msg.ts),
color = colors.dimmed,
fontSize = 11.sp,
modifier = Modifier.padding(start = 8.dp),
)
}
Text(
msg.body,
fontSize = 14.sp,
color = com.unibus.app.ui.theme.OnSurface,
modifier = Modifier.padding(top = 1.dp),
)
}
}
}
@@ -0,0 +1,48 @@
package com.unibus.app.ui
import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.size
import androidx.compose.foundation.shape.CircleShape
import androidx.compose.foundation.shape.RoundedCornerShape
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.draw.clip
import androidx.compose.ui.graphics.Color
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.Dp
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import com.unibus.app.ui.theme.Brand5
/**
* Avatar con iniciales, equivalente al <Avatar> de la web. [rounded] = esquinas
* (rooms/chat header) vs círculo (usuarios). [accent] colorea el de marca.
*/
@Composable
fun InitialsAvatar(
text: String,
size: Dp = 42.dp,
rounded: Boolean = true,
accent: Boolean = false,
modifier: Modifier = Modifier,
) {
val shape = if (rounded) RoundedCornerShape((size.value * 0.28f).dp) else CircleShape
val bg = if (accent) Brand5 else Color(0xFF3A3D44) // gris neutro tipo Avatar color="gray"
Box(
modifier = modifier
.size(size)
.clip(shape)
.background(bg),
contentAlignment = Alignment.Center,
) {
Text(
text = initials(text),
color = Color.White,
fontWeight = FontWeight.SemiBold,
fontSize = (size.value * 0.36f).sp,
)
}
}
@@ -0,0 +1,154 @@
package com.unibus.app.ui
import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.size
import androidx.compose.foundation.shape.CircleShape
import androidx.compose.foundation.text.KeyboardActions
import androidx.compose.foundation.text.KeyboardOptions
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.Lock
import androidx.compose.material.icons.filled.VpnKey
import androidx.compose.material3.Button
import androidx.compose.material3.Card
import androidx.compose.material3.CardDefaults
import androidx.compose.material3.CircularProgressIndicator
import androidx.compose.material3.Icon
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.draw.clip
import androidx.compose.ui.text.input.ImeAction
import androidx.compose.ui.text.input.PasswordVisualTransformation
import androidx.compose.ui.text.style.TextAlign
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import com.unibus.app.ui.theme.Brand4
import com.unibus.app.ui.theme.Dark7
import com.unibus.app.ui.theme.Dark9
import com.unibus.app.ui.theme.LocalUnibusColors
@Composable
fun LoginScreen(
connecting: Boolean,
error: String?,
onLogin: (handle: String, password: String) -> Unit,
) {
val colors = LocalUnibusColors.current
var handle by remember { mutableStateOf("") }
var password by remember { mutableStateOf("") }
val ready = handle.trim().isNotEmpty() && password.isNotEmpty() && !connecting
fun submit() {
if (ready) onLogin(handle.trim(), password)
}
Box(
modifier = Modifier
.fillMaxSize()
.background(Dark9),
contentAlignment = Alignment.Center,
) {
Card(
modifier = Modifier
.padding(24.dp)
.fillMaxWidth(),
colors = CardDefaults.cardColors(containerColor = Dark7),
shape = androidx.compose.foundation.shape.RoundedCornerShape(16.dp),
) {
Column(
modifier = Modifier
.fillMaxWidth()
.padding(28.dp),
horizontalAlignment = Alignment.CenterHorizontally,
verticalArrangement = Arrangement.spacedBy(18.dp),
) {
// ThemeIcon "light brand" — círculo translúcido con candado.
Box(
modifier = Modifier
.size(60.dp)
.clip(CircleShape)
.background(Brand4.copy(alpha = 0.18f)),
contentAlignment = Alignment.Center,
) {
Icon(
Icons.Filled.Lock,
contentDescription = null,
tint = Brand4,
modifier = Modifier.size(30.dp),
)
}
Column(horizontalAlignment = Alignment.CenterHorizontally) {
Text("unibus", fontSize = 26.sp, color = Brand4)
Text(
"Mensajería cifrada de extremo a extremo",
color = colors.dimmed,
fontSize = 13.sp,
textAlign = TextAlign.Center,
)
}
OutlinedTextField(
value = handle,
onValueChange = { handle = it },
label = { Text("Identidad") },
placeholder = { Text("tu-handle") },
singleLine = true,
modifier = Modifier.fillMaxWidth(),
keyboardOptions = KeyboardOptions(imeAction = ImeAction.Next),
)
OutlinedTextField(
value = password,
onValueChange = { password = it },
label = { Text("Contraseña") },
placeholder = { Text("••••••••") },
singleLine = true,
visualTransformation = PasswordVisualTransformation(),
leadingIcon = { Icon(Icons.Filled.VpnKey, contentDescription = null) },
modifier = Modifier.fillMaxWidth(),
keyboardOptions = KeyboardOptions(imeAction = ImeAction.Go),
keyboardActions = KeyboardActions(onGo = { submit() }),
)
Text(
"Desbloquea tu identidad cifrada en este dispositivo",
color = colors.dimmed,
fontSize = 12.sp,
modifier = Modifier.fillMaxWidth(),
)
if (error != null) {
Text(error, color = androidx.compose.ui.graphics.Color(0xFFFF6B6B), fontSize = 13.sp)
}
Button(
onClick = { submit() },
enabled = ready,
modifier = Modifier.fillMaxWidth(),
) {
if (connecting) {
CircularProgressIndicator(
modifier = Modifier.size(18.dp),
strokeWidth = 2.dp,
color = androidx.compose.ui.graphics.Color.White,
)
} else {
Text("Conectar")
}
}
}
}
}
}
@@ -0,0 +1,199 @@
package com.unibus.app.ui
import androidx.compose.foundation.background
import androidx.compose.foundation.clickable
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.size
import androidx.compose.foundation.lazy.LazyColumn
import androidx.compose.foundation.lazy.items
import androidx.compose.foundation.shape.CircleShape
import androidx.compose.foundation.shape.RoundedCornerShape
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.automirrored.filled.Logout
import androidx.compose.material.icons.filled.Lock
import androidx.compose.material.icons.filled.MoreVert
import androidx.compose.material.icons.filled.Search
import androidx.compose.material.icons.filled.Tag
import androidx.compose.material3.Badge
import androidx.compose.material3.DropdownMenu
import androidx.compose.material3.DropdownMenuItem
import androidx.compose.material3.HorizontalDivider
import androidx.compose.material3.Icon
import androidx.compose.material3.IconButton
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.draw.clip
import androidx.compose.ui.graphics.Color
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.text.style.TextOverflow
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import com.unibus.app.data.Room
import com.unibus.app.data.User
import com.unibus.app.ui.theme.LocalUnibusColors
@Composable
fun RoomListScreen(
user: User,
rooms: List<Room>,
onSelect: (String) -> Unit,
onLogout: () -> Unit,
) {
val colors = LocalUnibusColors.current
var query by remember { mutableStateOf("") }
val q = query.trim().lowercase()
val filtered = if (q.isEmpty()) rooms else rooms.filter {
it.name.lowercase().contains(q) || it.messages.any { m -> m.body.lowercase().contains(q) }
}
Column(
modifier = Modifier
.fillMaxSize()
.background(colors.sidebarBg),
) {
// Header: avatar + handle + menú
Row(
modifier = Modifier
.fillMaxWidth()
.padding(horizontal = 12.dp, vertical = 10.dp),
verticalAlignment = Alignment.CenterVertically,
) {
InitialsAvatar(user.handle, size = 36.dp, rounded = false, accent = true)
Text(
user.handle,
fontWeight = FontWeight.SemiBold,
fontSize = 15.sp,
color = Color.White,
maxLines = 1,
overflow = TextOverflow.Ellipsis,
modifier = Modifier
.weight(1f)
.padding(start = 10.dp),
)
var menuOpen by remember { mutableStateOf(false) }
Box {
IconButton(onClick = { menuOpen = true }) {
Icon(Icons.Filled.MoreVert, contentDescription = "Menú", tint = colors.dimmed)
}
DropdownMenu(expanded = menuOpen, onDismissRequest = { menuOpen = false }) {
DropdownMenuItem(
text = { Text("Desconectar") },
onClick = { menuOpen = false; onLogout() },
leadingIcon = {
Icon(Icons.AutoMirrored.Filled.Logout, contentDescription = null, modifier = Modifier.size(18.dp))
},
)
}
}
}
// Buscador
OutlinedTextField(
value = query,
onValueChange = { query = it },
placeholder = { Text("Buscar rooms, usuarios, mensajes…") },
leadingIcon = { Icon(Icons.Filled.Search, contentDescription = null, modifier = Modifier.size(18.dp)) },
singleLine = true,
modifier = Modifier
.fillMaxWidth()
.padding(horizontal = 12.dp, vertical = 4.dp),
)
HorizontalDivider(color = colors.divider)
if (filtered.isEmpty()) {
Text(
"Sin resultados",
color = colors.dimmed,
fontSize = 14.sp,
modifier = Modifier
.fillMaxWidth()
.padding(top = 24.dp),
textAlign = androidx.compose.ui.text.style.TextAlign.Center,
)
} else {
LazyColumn(
modifier = Modifier.fillMaxSize(),
contentPadding = androidx.compose.foundation.layout.PaddingValues(6.dp),
verticalArrangement = Arrangement.spacedBy(2.dp),
) {
items(filtered, key = { it.id }) { room ->
RoomItem(room = room, onClick = { onSelect(room.id) })
}
}
}
}
}
@Composable
private fun RoomItem(room: Room, onClick: () -> Unit) {
val colors = LocalUnibusColors.current
Row(
modifier = Modifier
.fillMaxWidth()
.clip(RoundedCornerShape(10.dp))
.clickable(onClick = onClick)
.padding(8.dp),
verticalAlignment = Alignment.CenterVertically,
) {
InitialsAvatar(room.name, size = 46.dp, rounded = true)
Column(
modifier = Modifier
.weight(1f)
.padding(start = 10.dp),
) {
Row(verticalAlignment = Alignment.CenterVertically) {
Icon(
if (room.encrypted) Icons.Filled.Lock else Icons.Filled.Tag,
contentDescription = if (room.encrypted) "cifrada" else "abierta",
tint = colors.dimmed,
modifier = Modifier.size(13.dp),
)
Text(
room.name,
fontWeight = FontWeight.SemiBold,
fontSize = 14.sp,
color = Color.White,
maxLines = 1,
overflow = TextOverflow.Ellipsis,
modifier = Modifier
.weight(1f)
.padding(start = 4.dp),
)
Text(timeShort(room.lastTs), color = colors.dimmed, fontSize = 11.sp)
}
Row(
verticalAlignment = Alignment.CenterVertically,
modifier = Modifier.padding(top = 2.dp),
) {
Text(
room.lastMessage,
color = colors.dimmed,
fontSize = 12.sp,
maxLines = 1,
overflow = TextOverflow.Ellipsis,
modifier = Modifier.weight(1f),
)
if (room.unread > 0) {
Badge(
containerColor = colors.brand,
contentColor = Color.White,
) { Text(room.unread.toString()) }
}
}
}
}
}
@@ -0,0 +1,17 @@
package com.unibus.app.ui
import java.util.Calendar
/** Iniciales (hasta 2 letras/dígitos) para los avatares, igual que la web. */
fun initials(s: String): String {
val cleaned = s.filter { it.isLetterOrDigit() }
return if (cleaned.isEmpty()) "?" else cleaned.take(2).uppercase()
}
/** Hora corta HH:mm a partir de epoch ms. */
fun timeShort(ts: Long): String {
val c = Calendar.getInstance().apply { timeInMillis = ts }
val h = c.get(Calendar.HOUR_OF_DAY).toString().padStart(2, '0')
val min = c.get(Calendar.MINUTE).toString().padStart(2, '0')
return "$h:$min"
}
@@ -0,0 +1,80 @@
package com.unibus.app.ui.theme
import androidx.compose.foundation.isSystemInDarkTheme
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Typography
import androidx.compose.material3.darkColorScheme
import androidx.compose.runtime.Composable
import androidx.compose.runtime.staticCompositionLocalOf
import androidx.compose.ui.graphics.Color
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.sp
// --- Brand: índigo/violeta de unibus (mismos tonos que el tema Mantine de la web) ---
val Brand2 = Color(0xFFB5A3F5) // brand.2
val Brand3 = Color(0xFF8D70ED) // brand.3 — legible sobre fondo oscuro
val Brand4 = Color(0xFF6C47E6) // brand.4 — acento principal
val Brand5 = Color(0xFF5A2FE2) // brand.5 — filled
// --- Grises oscuros equivalentes a la escala dark.* de Mantine ---
val Dark9 = Color(0xFF101113) // fondo de la app (login)
val Dark8 = Color(0xFF141517) // sidebar / lista de rooms
val Dark7 = Color(0xFF1A1B1E) // panel de chat / superficie
val Dark6 = Color(0xFF25262B) // item activo / elevado
val Dark5 = Color(0xFF2C2E33) // campos de entrada
val Dark4 = Color(0xFF373A40) // bordes / divisores
val Dimmed = Color(0xFF909296) // texto secundario
val OnSurface = Color(0xFFE3E3E6) // texto principal
/**
* Tokens de color que Material 3 no expresa directamente y que la UI replica de
* la web (matices dark.6/7/8/9, color "dimmed", borde). Se exponen vía un
* CompositionLocal para que cualquier composable los lea sin prop-drilling.
*/
data class UnibusColors(
val appBg: Color = Dark9,
val sidebarBg: Color = Dark8,
val chatBg: Color = Dark7,
val activeItem: Color = Dark6,
val field: Color = Dark5,
val divider: Color = Dark4,
val dimmed: Color = Dimmed,
val brand: Color = Brand4,
)
val LocalUnibusColors = staticCompositionLocalOf { UnibusColors() }
private val UnibusDarkScheme = darkColorScheme(
primary = Brand4,
onPrimary = Color.White,
primaryContainer = Brand5,
onPrimaryContainer = Color.White,
secondary = Brand3,
background = Dark9,
onBackground = OnSurface,
surface = Dark7,
onSurface = OnSurface,
surfaceVariant = Dark6,
onSurfaceVariant = Dimmed,
outline = Dark4,
error = Color(0xFFFF6B6B),
)
private val UnibusTypography = Typography(
titleLarge = Typography().titleLarge.copy(fontWeight = FontWeight(650)),
titleMedium = Typography().titleMedium.copy(fontWeight = FontWeight(650)),
bodyMedium = Typography().bodyMedium.copy(fontSize = 14.sp),
labelLarge = Typography().labelLarge.copy(fontWeight = FontWeight.SemiBold),
)
@Composable
fun UnibusTheme(content: @Composable () -> Unit) {
// unibus es dark-first; ignoramos el modo del sistema a propósito.
@Suppress("UNUSED_EXPRESSION")
isSystemInDarkTheme()
MaterialTheme(
colorScheme = UnibusDarkScheme,
typography = UnibusTypography,
content = content,
)
}
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<!-- Material "lock" glyph, white, centered in the adaptive-icon safe zone.
24dp source scaled x3 (=72dp) and translated by 18 to center it. -->
<group
android:scaleX="3"
android:scaleY="3"
android:translateX="18"
android:translateY="18">
<path
android:fillColor="#FFFFFF"
android:pathData="M12,17c1.1,0 2,-0.9 2,-2s-0.9,-2 -2,-2 -2,0.9 -2,2 0.9,2 2,2zM18,8h-1V6c0,-2.76 -2.24,-5 -5,-5S7,3.24 7,6v2H6c-1.1,0 -2,0.9 -2,2v10c0,1.1 0.9,2 2,2h12c1.1,0 2,-0.9 2,-2V10c0,-1.1 -0.9,-2 -2,-2zM9,6c0,-1.66 1.34,-3 3,-3s3,1.34 3,3v2H9V6z" />
</group>
</vector>
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@color/unibus_brand" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@color/unibus_brand" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<!-- dark.9 — app background -->
<color name="unibus_bg">#101113</color>
<!-- brand.5 — índigo/violeta accent, used as launcher icon background -->
<color name="unibus_brand">#5A2FE2</color>
</resources>
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<string name="app_name">unibus</string>
</resources>
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<!-- Compose-only host theme: no action bar, dark window background matching
the app's dark.9 surface so there is no white flash before Compose draws. -->
<style name="Theme.Unibus" parent="android:Theme.Material.NoActionBar">
<item name="android:windowBackground">@color/unibus_bg</item>
<item name="android:statusBarColor">@color/unibus_bg</item>
<item name="android:navigationBarColor">@color/unibus_bg</item>
<item name="android:windowLightStatusBar">false</item>
</style>
</resources>
+5
View File
@@ -0,0 +1,5 @@
plugins {
id("com.android.application") version "8.5.2" apply false
id("org.jetbrains.kotlin.android") version "1.9.24" apply false
id("org.jetbrains.kotlin.plugin.serialization") version "1.9.24" apply false
}
+5
View File
@@ -0,0 +1,5 @@
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
android.useAndroidX=true
android.nonTransitiveRClass=true
kotlin.code.style=official
org.gradle.caching=true
Binary file not shown.
+7
View File
@@ -0,0 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
Vendored Executable
+249
View File
@@ -0,0 +1,249 @@
#!/bin/sh
#
# Copyright © 2015-2021 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
#
# Gradle start up script for POSIX generated by Gradle.
#
# Important for running:
#
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
# noncompliant, but you have some other compliant shell such as ksh or
# bash, then to run this script, type that shell name before the whole
# command line, like:
#
# ksh Gradle
#
# Busybox and similar reduced shells will NOT work, because this script
# requires all of these POSIX shell features:
# * functions;
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
# * compound commands having a testable exit status, especially «case»;
# * various built-in commands including «command», «set», and «ulimit».
#
# Important for patching:
#
# (2) This script targets any POSIX shell, so it avoids extensions provided
# by Bash, Ksh, etc; in particular arrays are avoided.
#
# The "traditional" practice of packing multiple parameters into a
# space-separated string is a well documented source of bugs and security
# problems, so this is (mostly) avoided, by progressively accumulating
# options in "$@", and eventually passing that to Java.
#
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
# see the in-line comments for details.
#
# There are tweaks for specific operating systems such as AIX, CygWin,
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
app_path=$0
# Need this for daisy-chained symlinks.
while
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
[ -h "$app_path" ]
do
ls=$( ls -ld "$app_path" )
link=${ls#*' -> '}
case $link in #(
/*) app_path=$link ;; #(
*) app_path=$APP_HOME$link ;;
esac
done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
warn () {
echo "$*"
} >&2
die () {
echo
echo "$*"
echo
exit 1
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in #(
CYGWIN* ) cygwin=true ;; #(
Darwin* ) darwin=true ;; #(
MSYS* | MINGW* ) msys=true ;; #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD=java
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
fi
# Collect all arguments for the java command, stacking in reverse order:
# * args from the command line
# * the main class name
# * -classpath
# * -D...appname settings
# * --module-path (only if needed)
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
JAVACMD=$( cygpath --unix "$JAVACMD" )
# Now convert the arguments - kludge to limit ourselves to /bin/sh
for arg do
if
case $arg in #(
-*) false ;; # don't mess with options #(
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
[ -e "$t" ] ;; #(
*) false ;;
esac
then
arg=$( cygpath --path --ignore --mixed "$arg" )
fi
# Roll the args list around exactly as many times as the number of
# args, so each arg winds up back in the position where it started, but
# possibly modified.
#
# NB: a `for` loop captures its iteration list before it begins, so
# changing the positional parameters here affects neither the number of
# iterations, nor the values presented in `arg`.
shift # remove old arg
set -- "$@" "$arg" # push replacement arg
done
fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
org.gradle.wrapper.GradleWrapperMain \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
# set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#
eval "set -- $(
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
xargs -n1 |
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
tr '\n' ' '
)" '"$@"'
exec "$JAVACMD" "$@"
+92
View File
@@ -0,0 +1,92 @@
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if %ERRORLEVEL% equ 0 goto execute
echo. 1>&2
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo. 1>&2
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
set EXIT_CODE=%ERRORLEVEL%
if %EXIT_CODE% equ 0 set EXIT_CODE=1
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
exit /b %EXIT_CODE%
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega
+24
View File
@@ -0,0 +1,24 @@
pluginManagement {
repositories {
google {
content {
includeGroupByRegex("com\\.android.*")
includeGroupByRegex("com\\.google.*")
includeGroupByRegex("androidx.*")
}
}
mavenCentral()
gradlePluginPortal()
}
}
dependencyResolutionManagement {
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
repositories {
google()
mavenCentral()
}
}
rootProject.name = "unibus"
include(":app")
+1 -82
View File
@@ -2,7 +2,7 @@
name: unibus name: unibus
lang: go lang: go
domain: infra domain: infra
version: 0.11.0 version: 0.8.0
description: "Bus de mensajería unificado sobre NATS+JetStream con cifrado E2E por room (megolm/olm reducido): service de membresía/claves, librería cliente y peers demo." description: "Bus de mensajería unificado sobre NATS+JetStream con cifrado E2E por room (megolm/olm reducido): service de membresía/claves, librería cliente y peers demo."
tags: [service, messaging, nats, e2e] tags: [service, messaging, nats, e2e]
uses_functions: uses_functions:
@@ -122,21 +122,6 @@ Para apuntar a un NATS externo en producción: `--nats-url nats://host:4222` en
las rutas GET de lectura. Confía en la red interna. Las rutas mutantes las rutas GET de lectura. Confía en la red interna. Las rutas mutantes
(`/rooms`, `/invite`, `/rekey`) sí exigen firma Ed25519 del owner sobre los (`/rooms`, `/invite`, `/rekey`) sí exigen firma Ed25519 del owner sobre los
bytes canónicos de la request. Endurecer es fase posterior. bytes canónicos de la request. Endurecer es fase posterior.
- **Gestión de usuarios: storage unificado, alta por dos vías.** El allowlist de
usuarios vive en el MISMO store que las rooms (`pkg/membership.Store`): SQLite en
single-node, JetStream KV replicado (`UNIBUS_users`) en cluster. El `Server` ya
tiene ese store privilegiado abierto (es quien sirve el KV en cada nodo), así que
expone `GET/POST /users` y `POST /users/{signpub}/revoke` como API HTTP admin-only,
simétrica con las rutas de rooms: el panel de administración firma como admin y el
server ejecuta la mutación contra el mismo store. El panel NO necesita `--db`, ni la
identidad interna, ni correr en un nodo del cluster; funciona idéntico en single-node
y cluster. La autorización es default-deny: solo un firmante que el store confirma como
`role == "admin"` activo pasa, cualquier otro recibe 403 (encima de la firma+nonce+TLS
ya existentes). La CLI `membershipd user add --store kv` sigue existiendo SOLO para
sembrar el admin #0 (bootstrap del huevo-gallina: sin un admin sembrado no hay quién
firme el primer `POST /users`); a partir de ahí toda la gestión es HTTP admin-only. El
alta es idempotente igual que la CLI: re-alta de una clave ya registrada = 409, sin
sobrescribir ni elevar rol; el revoke es un flip de status (sin hard-delete), auditable.
- **Identidad = secreto crítico.** El archivo de identidad (`worker.id`, - **Identidad = secreto crítico.** El archivo de identidad (`worker.id`,
`chat.id`) contiene las claves privadas (Ed25519 + X25519). Se escribe 0600. `chat.id`) contiene las claves privadas (Ed25519 + X25519). Se escribe 0600.
Perderlo = mensajes ilegibles, sin recuperación. Trátalo como una clave SSH. Perderlo = mensajes ilegibles, sin recuperación. Trátalo como una clave SSH.
@@ -169,72 +154,6 @@ agent.<nombre>.{in,out} inbox/outbox de agente LLM (agent.scout.in)
## Capability growth log ## Capability growth log
- v0.11.0 (2026-06-07) — flag dedicado `UNIBUS_NATS_MONITOR` que abre el endpoint
de monitoring HTTP del nats-server embebido (`127.0.0.1:8222`, loopback only) de
forma DESACOPLADA del debug-log. Antes el monitoring solo se abría con
`UNIBUS_NATS_DEBUG=1`, que además encendía el log verboso del nats-server
(rutas/RAFT/subjects a journald en claro) — incompatible con el endurecimiento
del issue 0007. El cómputo de los toggles se extrae a una función pura
`natsLogOpts(debugEnv, monitorEnv) (noLog, debug, trace, monitor)`: `MONITOR=1`
abre el endpoint dejando el log en silencio (`NoLog` true / `Debug` false), y se
mantiene el acoplamiento inverso por compatibilidad (`DEBUG` sigue implicando
`MONITOR`). El bind loopback `127.0.0.1` queda hardcoded — el monitoring NUNCA es
público y no lleva auth; lo lee un scraper local que empuja a VictoriaMetrics
(dashboard `unibus-nats` en `fleet_monitoring`). Se versiona el cableado de
deploy: drop-in systemd aditivo `membershipd-cluster.service.d/nats-monitor.conf`
(`Environment=UNIBUS_NATS_MONITOR=1`) + sección "NATS server metrics" en el
README del cluster con el runbook de activación rolling (magnus→homer→datardos)
y gate de reconvergencia R3 (`followers 2/2`) entre nodos. Tests nuevos: tabla
pura del desacoplamiento (monitor on ⇒ log NO debug; debug ⇒ monitor; default
cerrado) + server real con `MONITOR=1` que confirma `/varz` 200 en loopback:8222
y server sin flag con el endpoint cerrado. Cambios 100% aditivos: sin el flag el
comportamiento es idéntico; build/test verdes.
- v0.10.0 (2026-06-07) — API HTTP admin-only de gestión de usuarios, cerrando la
última asimetría del control plane: las rooms tenían superficie HTTP firmada
(`POST /rooms`, etc.) pero los users solo se gestionaban por CLI local o acceso
directo al store. Se añaden `GET /users` (lista completa, incluidos revocados),
`POST /users` (alta `{sign_pub, handle, role}`: valida hex de 64 chars + role en
`{admin, member}`, 409 idempotente que no sobrescribe ni eleva rol) y
`POST /users/{signpub}/revoke` (flip de status, sin hard-delete). Los tres pasan por
un helper `requireAdmin` default-deny que confirma contra el store que el firmante
autenticado es un user `role == "admin"` activo (el endpoint id es un hash one-way de
la clave, así que el contexto lleva ahora también el `sign_pub` hex del firmante para
resolver `GetUser`); cualquier otro firmante recibe 403, encima de la firma+nonce+TLS+
enforce ya heredadas del middleware. NO se abre conexión KV nueva ni se usa la identidad
interna: el server escribe vía su `s.store` privilegiado, el MISMO que las rooms (SQLite
single-node, KV `UNIBUS_users` en cluster). `pkg/client` gana `ListUsers/AddUser/RevokeUser`
(tipo plano `UserInfo`) firmando como admin, así la pestaña Users del panel deja de
necesitar `--db`/acceso KV directo. La CLI `membershipd user add --store kv` queda SOLO
para sembrar el admin #0 (bootstrap). La validación de `sign_pub` se unifica en
`membership.ValidateSignPubHex`, reusada por la CLI y los handlers. Tests nuevos:
no-admin → 403 en los tres endpoints, roundtrip admin add→list→revoke, y validación
(hex inválido → 400, role inválido → 400, re-alta → 409), más un test de cliente contra
un membershipd embebido. Cambios 100% aditivos: el comportamiento single-node y de las
rutas de rooms no cambia; vet/build/test verdes.
- v0.9.0 (2026-06-07) — cierre de los gaps que el despliegue del cluster (report
0011) dejó abiertos (report 0012). (GAP A) Nueva capability `membershipd user
add|list|revoke --store kv`: alta/baja de usuarios contra el KV replicado del
cluster EN MARCHA, sin el procedimiento de parar-sembrar-rearrancar. Usa la
conexión interna privilegiada — el daemon persiste su identidad de servicio con
`--internal-id-file` (cada nodo genera/carga la suya, 0600 junto a las claves TLS)
y la CLI, ejecutada por loopback en un nodo, presenta esa nkey que el
autenticador reconoce con permisos plenos de JetStream; ninguna identidad de
usuario normal puede tocar los buckets `KV_UNIBUS_*` bajo la ACL por-subject. El
alta es idempotente (re-alta de la misma clave = `ErrUserExists` explícito, sin
sobrescribir ni elevar rol), commitea con quórum 2/3 (HA, imprime
`followers_current`) y rechaza un destino remoto sin `--ca` (igual que
`migrate-to-kv`). (GAP B) Nuevo `cmd/clientcheck`: verificación end-to-end real
con un cliente autenticado (identidad operator, nkey+TLS+https) que crea una room
E2E, publica y recibe descifrado contra el cluster vivo, incluido un nodo parado a
media transmisión donde el cliente hace failover a un superviviente y sigue
recibiendo con cero pérdida (quórum 2/3) — el plano de datos que el chaos test del
0011 nunca probó. (GAP C) Runbook `deploy/cluster/README.md` corregido: el orden
de arranque "magnus solo y verifica healthz" deadlockeaba (un nodo solo no tiene
quórum del meta-group y nunca sirve healthz); se documenta el arranque por quórum,
que R1 es un SPOF inservible (ir directo a R3) y la nueva vía de alta con el
cluster vivo. La plantilla de deploy (unit + `deploy-cluster.sh`) emite ya
`INTERNAL_ID_FILE` y el flag. Verificado contra los 3 VPS reales (magnus + homer +
datardos); posture enforce+ACL+TLS+R3 intacta.
- v0.8.0 (2026-06-07) — completar y endurecer el cluster (issue 0006, fases - v0.8.0 (2026-06-07) — completar y endurecer el cluster (issue 0006, fases
0006a0006g) que cierra los bloqueantes de la auditoría dedicada del cluster 0006a0006g) que cierra los bloqueantes de la auditoría dedicada del cluster
(report 0008) y cablea el control plane descentralizado que 0003 dejó a medias. (report 0008) y cablea el control plane descentralizado que 0003 dejó a medias.
-260
View File
@@ -1,260 +0,0 @@
// Command clientcheck is an end-to-end verification client for a live unibus
// cluster (issue 0011 GAP B). The 0011 chaos test validated only the control
// plane (healthz + meta/stream-leader failover + KV readable with 2/3); it never
// connected an authenticated bus client (nkey + TLS) to create a room and
// publish/subscribe through it, least of all across a node loss. clientcheck does
// exactly that with a real identity (the operator), so the data-plane end-to-end
// path — connect, create an E2E room, publish, receive decrypted — is exercised
// against the running cluster, including while a node is stopped.
//
// It is a reusable tool, not a throwaway script: point it at the cluster's CA,
// an identity file, and the NATS + control-plane seed lists.
//
// # golden: connect, create an E2E room, publish N, confirm N decrypted back
// clientcheck --ca ca.crt --identity-file operator.id \
// --nats-seeds nats://A:4250,nats://B:4250,nats://C:4250 \
// --ctrl-seeds https://A:8470,https://B:8470,https://C:8470 --messages 5
//
// # loop: publish a counter every interval for the duration, logging the node
// # it is attached to — stop a node mid-run (systemctl stop membershipd-cluster)
// # and watch it fail over to a survivor and keep receiving (quorum 2/3).
// clientcheck ... --mode loop --duration 45s --interval 1s
package main
import (
"crypto/rand"
"encoding/hex"
"flag"
"fmt"
"log"
"sort"
"strings"
"sync"
"time"
"github.com/enmanuel/unibus/pkg/busauth"
"github.com/enmanuel/unibus/pkg/client"
"github.com/enmanuel/unibus/pkg/frame"
"github.com/enmanuel/unibus/pkg/room"
)
func main() {
var (
caPath = flag.String("ca", "", "bus CA cert pinning TLS on both planes (required for a secured cluster)")
idFile = flag.String("identity-file", "", "path to the client identity JSON (e.g. `pass show unibus/operator-identity` written 0600) (required)")
natsSeeds = flag.String("nats-seeds", "", "comma-separated NATS urls of the cluster nodes (required)")
ctrlSeeds = flag.String("ctrl-seeds", "", "comma-separated control-plane https urls of the cluster nodes (required)")
subject = flag.String("subject", "test.gapcheck", "test room subject PREFIX; a random token is appended so runs never collide with real rooms")
messages = flag.Int("messages", 5, "golden mode: number of messages to publish and expect back")
mode = flag.String("mode", "golden", "golden (publish N, verify N decrypted) | loop (publish a counter for --duration, for failover testing)")
duration = flag.Duration("duration", 30*time.Second, "loop mode: how long to keep publishing")
interval = flag.Duration("interval", 1*time.Second, "loop mode: delay between published messages")
)
flag.Parse()
if *idFile == "" || *natsSeeds == "" || *ctrlSeeds == "" {
log.Fatalf("clientcheck: --identity-file, --nats-seeds and --ctrl-seeds are required")
}
id, err := client.LoadIdentity(*idFile)
if err != nil {
log.Fatalf("clientcheck: load identity: %v", err)
}
natsList := splitCSV(*natsSeeds)
ctrlList := splitCSV(*ctrlSeeds)
if len(natsList) == 0 || len(ctrlList) == 0 {
log.Fatalf("clientcheck: empty --nats-seeds or --ctrl-seeds")
}
// Build the secure client options: nkey on the data plane, TLS pinned to the
// bus CA on both planes, and the FULL seed lists so nats.go fails over to a
// surviving node when the attached one dies (the failover this tool verifies).
opts := client.Options{
NatsServers: natsList[1:],
CtrlURLs: ctrlList[1:],
}
if *caPath != "" {
tlsCfg, err := busauth.LoadCATLSConfig(*caPath)
if err != nil {
log.Fatalf("clientcheck: load CA: %v", err)
}
opts.UseNkey = true
opts.TLS = tlsCfg
opts.CtrlTLS = tlsCfg
for _, u := range ctrlList {
if !strings.HasPrefix(u, "https://") {
log.Fatalf("clientcheck: control URL %q must be https:// when --ca is set", u)
}
}
}
c, err := client.NewWithOptions(natsList[0], ctrlList[0], id, opts)
if err != nil {
log.Fatalf("clientcheck: connect: %v", err)
}
defer c.Close()
log.Printf("connected: endpoint=%s nats=%s", c.Endpoint().ID, c.ConnectedServer())
// Create an EPHEMERAL E2E room (encrypted + signed, NOT persisted): the test
// stays end-to-end encrypted (the cluster requires encryption on a public
// bind) while leaving no durable JetStream stream behind. The random subject
// token guarantees the room is unique and never a real room.
rnd := make([]byte, 8)
if _, err := rand.Read(rnd); err != nil {
log.Fatalf("clientcheck: random: %v", err)
}
subj := fmt.Sprintf("%s.%s", *subject, hex.EncodeToString(rnd))
policy := room.Policy{Encrypt: true, Persist: false, SignMsgs: true}
roomID, err := c.CreateRoom(subj, policy)
if err != nil {
log.Fatalf("clientcheck: create room: %v", err)
}
log.Printf("created E2E room: id=%s subject=%s (encrypt=%v sign=%v persist=%v)", roomID, subj, policy.Encrypt, policy.SignMsgs, policy.Persist)
// Under the per-subject ACL, NATS freezes permissions at connect time, so the
// just-created room's subject is not yet publishable/subscribable on the live
// connection. RefreshSession reconnects so the authenticator re-derives the
// ACL (now including this room) — the post-0006 contract every client follows
// after a membership change.
if err := c.RefreshSession(); err != nil {
log.Fatalf("clientcheck: refresh session: %v", err)
}
switch *mode {
case "golden":
runGolden(c, roomID, *messages)
case "loop":
runLoop(c, roomID, *duration, *interval)
default:
log.Fatalf("clientcheck: --mode must be golden or loop, got %q", *mode)
}
}
// runGolden subscribes, publishes n messages, and asserts all n come back
// decrypted. Exits non-zero if any are missing.
func runGolden(c *client.Client, roomID string, n int) {
var mu sync.Mutex
got := map[string]bool{}
sub, err := c.Subscribe(roomID, func(_ frame.Frame, plaintext []byte) {
mu.Lock()
got[string(plaintext)] = true
mu.Unlock()
})
if err != nil {
log.Fatalf("clientcheck: subscribe: %v", err)
}
defer sub.Unsubscribe()
time.Sleep(300 * time.Millisecond) // let the subscription settle
want := make([]string, n)
for i := 0; i < n; i++ {
msg := fmt.Sprintf("gapcheck-e2e-%d", i)
want[i] = msg
if err := c.Publish(roomID, []byte(msg)); err != nil {
log.Fatalf("clientcheck: publish %d: %v", i, err)
}
}
log.Printf("published %d messages to %s; waiting for decrypted echoes...", n, roomID)
deadline := time.Now().Add(15 * time.Second)
for time.Now().Before(deadline) {
mu.Lock()
have := len(got)
mu.Unlock()
if have >= n {
break
}
time.Sleep(100 * time.Millisecond)
}
mu.Lock()
defer mu.Unlock()
missing := 0
for _, w := range want {
if !got[w] {
missing++
log.Printf(" MISSING: %q", w)
}
}
log.Printf("connected node at finish: %s", c.ConnectedServer())
if missing > 0 {
log.Fatalf("GOLDEN FAIL: %d/%d messages not received decrypted", missing, n)
}
log.Printf("GOLDEN OK: all %d messages received and decrypted end-to-end", n)
}
// runLoop publishes a numbered message every interval for the duration and logs
// the count received plus the node currently attached, so an operator stopping a
// cluster node mid-run sees the client fail over to a survivor and keep receiving
// (quorum 2/3). It is the live failover-with-a-connected-client test the 0011
// chaos run never performed.
func runLoop(c *client.Client, roomID string, duration, interval time.Duration) {
var mu sync.Mutex
received := 0
servers := map[string]int{} // node -> #ticks observed attached
sub, err := c.Subscribe(roomID, func(_ frame.Frame, _ []byte) {
mu.Lock()
received++
mu.Unlock()
})
if err != nil {
log.Fatalf("clientcheck: subscribe: %v", err)
}
defer sub.Unsubscribe()
time.Sleep(300 * time.Millisecond)
log.Printf("loop: publishing every %s for %s — stop a node now to test failover", interval, duration)
end := time.Now().Add(duration)
sent := 0
for time.Now().Before(end) {
msg := fmt.Sprintf("gapcheck-loop-%d", sent)
err := c.Publish(roomID, []byte(msg))
sent++
mu.Lock()
recv := received
mu.Unlock()
node := c.ConnectedServer()
up := c.IsConnected()
if node != "" {
mu.Lock()
servers[node]++
mu.Unlock()
}
pubStatus := "ok"
if err != nil {
pubStatus = "ERR:" + err.Error()
}
log.Printf(" t=%2ds sent=%d recv=%d up=%v node=%s publish=%s",
sent, sent, recv, up, node, pubStatus)
time.Sleep(interval)
}
mu.Lock()
defer mu.Unlock()
log.Printf("loop done: sent=%d received=%d", sent, received)
nodes := make([]string, 0, len(servers))
for n := range servers {
nodes = append(nodes, n)
}
sort.Strings(nodes)
for _, n := range nodes {
log.Printf(" attached to %s for %d ticks", n, servers[n])
}
if len(servers) > 1 {
log.Printf("FAILOVER OBSERVED: client was attached to %d distinct nodes across the run", len(servers))
}
if received == 0 {
log.Fatalf("LOOP FAIL: received 0 messages")
}
log.Printf("LOOP OK: client kept receiving across the run (received=%d)", received)
}
func splitCSV(s string) []string {
var out []string
for _, p := range strings.Split(s, ",") {
if p = strings.TrimSpace(p); p != "" {
out = append(out, p)
}
}
return out
}
-152
View File
@@ -1,152 +0,0 @@
package main
// Integration tests for issue 0011 GAP A: `membershipd user add --store kv`
// adds users to a RUNNING cluster's replicated allowlist via the privileged
// internal connection, instead of the stop-seed-restart procedure the 0011
// deploy required. These exercise the real connectKVStore path (load the
// persisted internal identity from a file, present its nkey, open the KV store,
// write the user) against an embedded enforce node, plus the idempotency and
// error semantics the DoD calls for. Multi-node replication and node-down quorum
// are validated against the live cluster (report 0012).
import (
"encoding/hex"
"errors"
"path/filepath"
"testing"
"time"
cs "fn-registry/functions/cybersecurity"
"github.com/enmanuel/unibus/pkg/busauth"
"github.com/enmanuel/unibus/pkg/client"
"github.com/enmanuel/unibus/pkg/embeddednats"
"github.com/enmanuel/unibus/pkg/membership"
)
// startEnforceKVNode boots a single embedded enforce node whose authenticator
// recognizes internalPubHex as the privileged internal identity, bootstraps the
// KV control-plane store over the in-process internal connection, and publishes
// it into the holder — the exact sequence main.go performs for --store kv. It
// returns the client URL the CLI connects to.
func startEnforceKVNode(t *testing.T, internalID cs.Identity) string {
t.Helper()
holder := &storeHolder{}
auth := busauth.NewNkeyAuthenticatorACLInternal(
holder.IsAuthorized,
busauth.PermissionsFromSubjects(holder.subjectACL),
hex.EncodeToString(internalID.SignPub),
)
ns, err := embeddednats.StartServer(embeddednats.ServerConfig{
StoreDir: t.TempDir(), Host: "127.0.0.1", Port: freePort(t), Auth: auth,
})
if err != nil {
t.Fatalf("start enforce node: %v", err)
}
t.Cleanup(func() { ns.Shutdown(); ns.WaitForShutdown() })
intNC, js, err := connectInternalJS(ns, internalID, true)
if err != nil {
t.Fatalf("bootstrap internal connection: %v", err)
}
t.Cleanup(intNC.Close)
kvStore, err := membership.OpenJetStream(js, membership.JetStreamConfig{Replicas: 1, OpTimeout: 3 * time.Second})
if err != nil {
t.Fatalf("bootstrap KV store: %v", err)
}
holder.set(kvStore)
return ns.ClientURL()
}
// TestUserAddStoreKV_GoldenAndIdempotent is the GAP A golden + edge-1: the CLI
// connection (real connectKVStore, loading the internal identity from a file and
// presenting its nkey) writes a user into the live KV allowlist, the user is
// authorized afterward, and re-adding the same key is an explicit ErrUserExists
// with no corruption (the unchanged row is still authorized).
func TestUserAddStoreKV_GoldenAndIdempotent(t *testing.T) {
idFile := filepath.Join(t.TempDir(), "internal.id")
internalID, err := client.LoadOrCreateIdentity(idFile) // persists 0600
if err != nil {
t.Fatalf("persist internal identity: %v", err)
}
url := startEnforceKVNode(t, internalID)
// Golden: connect as the privileged internal identity (loopback, no TLS) and
// add a new user, exactly as `user add --store kv` does.
kv, err := connectKVStore(url, idFile, "", 1)
if err != nil {
t.Fatalf("connectKVStore (privileged): %v", err)
}
defer kv.Close()
newUser, err := cs.GenerateIdentity()
if err != nil {
t.Fatalf("new user identity: %v", err)
}
pub := hex.EncodeToString(newUser.SignPub)
if err := kv.store.AddUser(pub, "gapcheck_user", membership.RoleMember); err != nil {
t.Fatalf("add user to live KV: %v", err)
}
if !kv.store.IsAuthorized(pub) {
t.Fatalf("user added to KV must be authorized")
}
// Edge 1: re-adding the same key is a clean, non-destructive ErrUserExists.
err = kv.store.AddUser(pub, "gapcheck_user", membership.RoleMember)
if !errors.Is(err, membership.ErrUserExists) {
t.Fatalf("re-add must return ErrUserExists (idempotent), got %v", err)
}
// A different handle/role with the SAME key is also rejected — the row is not
// silently overwritten (no role flip).
if err := kv.store.AddUser(pub, "impostor", membership.RoleAdmin); !errors.Is(err, membership.ErrUserExists) {
t.Fatalf("re-add with a different role must NOT overwrite; want ErrUserExists, got %v", err)
}
u, err := kv.store.GetUser(pub)
if err != nil {
t.Fatalf("get user: %v", err)
}
if u.Handle != "gapcheck_user" || u.Role != membership.RoleMember || u.Status != membership.StatusActive {
t.Fatalf("idempotent re-add corrupted the row: %+v", u)
}
}
// TestUserAddStoreKV_RequiresInternalIdentity: --store kv without a usable
// internal identity file fails loudly (missing file, empty path) rather than
// silently connecting unprivileged.
func TestUserAddStoreKV_RequiresInternalIdentity(t *testing.T) {
if _, err := connectKVStore("nats://127.0.0.1:4250", "", "", 1); err == nil {
t.Fatalf("empty --internal-id-file must be an error")
}
missing := filepath.Join(t.TempDir(), "nope.id")
if _, err := connectKVStore("nats://127.0.0.1:4250", missing, "", 1); err == nil {
t.Fatalf("missing internal identity file must be an error")
}
}
// TestUserAddStoreKV_UnreachableKV is the GAP A error case: pointing --store kv
// at a dead endpoint yields a clear, handled error (no crash, no silent success).
func TestUserAddStoreKV_UnreachableKV(t *testing.T) {
idFile := filepath.Join(t.TempDir(), "internal.id")
if _, err := client.LoadOrCreateIdentity(idFile); err != nil {
t.Fatalf("persist internal identity: %v", err)
}
// A loopback port with nothing listening: connect must fail fast and wrapped.
_, err := connectKVStore("nats://127.0.0.1:1/", idFile, "", 1)
if err == nil {
t.Fatalf("connecting to a dead endpoint must error")
}
}
// TestUserAddStoreKV_RemoteWithoutCARefused: a non-loopback target without --ca
// is refused so the allowlist write never travels in cleartext (audit 0008 N6,
// same guard as migrate-to-kv).
func TestUserAddStoreKV_RemoteWithoutCARefused(t *testing.T) {
idFile := filepath.Join(t.TempDir(), "internal.id")
if _, err := client.LoadOrCreateIdentity(idFile); err != nil {
t.Fatalf("persist internal identity: %v", err)
}
_, err := connectKVStore("nats://203.0.113.1:4250", idFile, "", 1)
if err == nil {
t.Fatalf("remote target without --ca must be refused")
}
}
-24
View File
@@ -24,7 +24,6 @@ import (
"github.com/enmanuel/unibus/pkg/blobstore" "github.com/enmanuel/unibus/pkg/blobstore"
"github.com/enmanuel/unibus/pkg/busauth" "github.com/enmanuel/unibus/pkg/busauth"
"github.com/enmanuel/unibus/pkg/client"
"github.com/enmanuel/unibus/pkg/embeddednats" "github.com/enmanuel/unibus/pkg/embeddednats"
"github.com/enmanuel/unibus/pkg/membership" "github.com/enmanuel/unibus/pkg/membership"
) )
@@ -84,17 +83,6 @@ func main() {
// "kv" puts rooms/members/keys/users in replicated JetStream KV so any node // "kv" puts rooms/members/keys/users in replicated JetStream KV so any node
// in the cluster serves the same state. // in the cluster serves the same state.
storeBackend = flag.String("store", "sqlite", "control-plane store backend: sqlite (default, single-node) | kv (replicated JetStream, decentralized)") storeBackend = flag.String("store", "sqlite", "control-plane store backend: sqlite (default, single-node) | kv (replicated JetStream, decentralized)")
// Persisted internal service identity (issue 0011 gaps, GAP A): when set, the
// privileged internal identity used to manage JetStream is LOADED from this
// file (generated and persisted on first start) instead of being a fresh
// ephemeral key each boot. Persisting it is what lets `membershipd user add
// --store kv` write the replicated allowlist of a LIVE cluster: that CLI,
// run over loopback on a node, loads the SAME identity and presents the nkey
// this node's authenticator already grants full permissions. Empty keeps the
// ephemeral-per-process behavior (single-node/dev default, unchanged). The
// file holds a private key: it is written 0600 and belongs next to the node's
// TLS keys (deploy keeps it under secrets/, gitignored).
internalIDFile = flag.String("internal-id-file", "", "path to a persisted internal service identity (JSON); enables `membershipd user add --store kv` against the live cluster. Empty = ephemeral per-process identity (dev default)")
) )
flag.Parse() flag.Parse()
@@ -148,22 +136,10 @@ func main() {
var internalID cs.Identity var internalID cs.Identity
var internalPubHex string var internalPubHex string
if needJS && enforce && *natsURL == "" { if needJS && enforce && *natsURL == "" {
if *internalIDFile != "" {
// Persisted identity: load it, generating + writing it (0600) on first
// start. A stable internal key is what `user add --store kv` presents to
// add users to a live cluster (GAP A); rotate it by deleting the file and
// restarting.
internalID, err = client.LoadOrCreateIdentity(*internalIDFile)
if err != nil {
log.Fatalf("load internal service identity %q: %v", *internalIDFile, err)
}
log.Printf("internal service identity: persisted (%s)", *internalIDFile)
} else {
internalID, err = cs.GenerateIdentity() internalID, err = cs.GenerateIdentity()
if err != nil { if err != nil {
log.Fatalf("generate internal identity: %v", err) log.Fatalf("generate internal identity: %v", err)
} }
}
internalPubHex = hex.EncodeToString(internalID.SignPub) internalPubHex = hex.EncodeToString(internalID.SignPub)
} }
+15 -82
View File
@@ -1,7 +1,7 @@
package main package main
import ( import (
"errors" "encoding/hex"
"flag" "flag"
"fmt" "fmt"
"os" "os"
@@ -50,26 +50,13 @@ commands:
list List all registered users list List all registered users
revoke Revoke a user (denies access on both planes immediately) revoke Revoke a user (denies access on both planes immediately)
store backends (--store):
sqlite local SQLite database (default; seeds the first admin offline)
kv the RUNNING cluster's replicated JetStream KV allowlist, via the
privileged internal connection — add users with the cluster live,
no stop-seed-restart needed (run over loopback/SSH on a node)
examples: examples:
membershipd user add --handle alice --sign-pub <64-hex> --role admin membershipd user add --handle alice --sign-pub <64-hex> --role admin
membershipd user add --store kv --handle bob --sign-pub <64-hex> --role member membershipd user list
membershipd user list --store kv
membershipd user revoke <64-hex> membershipd user revoke <64-hex>
common flags: common flags:
--db <path> SQLite database path (--store sqlite; default ./local_files/unibus.db) --db <path> SQLite database path (default ./local_files/unibus.db)
--store kv flags (defaults assume an on-node invocation):
--nats-url <url> cluster NATS (default nats://127.0.0.1:4250)
--internal-id-file <path> persisted internal service identity (default /opt/unibus/secrets/internal.id)
--ca <path> CA cert pinning the data-plane TLS (default /opt/unibus/tls/ca.crt)
--kv-replicas <n> KV replication factor, match the cluster (default 3)
`) `)
} }
@@ -89,56 +76,16 @@ func openStore(path string) membership.Store {
// validateSignPubHex ensures the key is exactly a 32-byte Ed25519 public key in // validateSignPubHex ensures the key is exactly a 32-byte Ed25519 public key in
// hex (64 hex chars). Catching this here turns a silent "authorized nobody" into // hex (64 hex chars). Catching this here turns a silent "authorized nobody" into
// an explicit error at seed time. It delegates to membership.ValidateSignPubHex // an explicit error at seed time.
// so the CLI and the HTTP user-management handlers share one rule.
func validateSignPubHex(signPub string) error { func validateSignPubHex(signPub string) error {
return membership.ValidateSignPubHex(signPub) b, err := hex.DecodeString(signPub)
}
// kvFlags holds the connection flags shared by the --store kv path of the user
// subcommands. registerKVFlags wires them onto a flag set so add and list expose
// an identical interface.
type kvFlags struct {
store *string
natsURL *string
internalID *string
ca *string
replicas *int
}
func registerKVFlags(fs *flag.FlagSet) kvFlags {
return kvFlags{
store: fs.String("store", "sqlite", "user store backend: sqlite (local DB) | kv (the live cluster's replicated allowlist)"),
natsURL: fs.String("nats-url", defaultClusterNatsURL, "cluster NATS url for --store kv"),
internalID: fs.String("internal-id-file", defaultInternalIDFile, "persisted internal service identity for --store kv"),
ca: fs.String("ca", defaultClusterCAFile, "CA cert pinning TLS on the --store kv NATS connection"),
replicas: fs.Int("kv-replicas", 3, "KV replication factor for --store kv (match the cluster)"),
}
}
// resolveStore returns the membership store for the chosen backend plus a cleanup
// func. For --store kv it opens the privileged connection to the live cluster; for
// sqlite it opens the local file. It exits the process with a clear message on any
// failure (a dead NATS, a missing identity file), so a broken --store kv add fails
// loudly instead of silently — Error case of the GAP A DoD. The returned *kvConn
// is non-nil only for the kv backend (so the caller can report replication).
func resolveStore(cmd string, kf kvFlags, dbPath string) (membership.Store, *kvConn, func()) {
switch *kf.store {
case "sqlite":
store := openStore(dbPath)
return store, nil, func() { store.Close() }
case "kv":
kv, err := connectKVStore(*kf.natsURL, *kf.internalID, *kf.ca, *kf.replicas)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "membershipd %s: --store kv: %v\n", cmd, err) return fmt.Errorf("sign-pub is not valid hex: %w", err)
os.Exit(1)
} }
return kv.store, kv, kv.Close if len(b) != 32 {
default: return fmt.Errorf("sign-pub must be a 32-byte Ed25519 public key (64 hex chars), got %d bytes", len(b))
fmt.Fprintf(os.Stderr, "membershipd %s: --store must be \"sqlite\" or \"kv\", got %q\n", cmd, *kf.store)
os.Exit(2)
return nil, nil, func() {}
} }
return nil
} }
func userAdd(args []string) { func userAdd(args []string) {
@@ -147,7 +94,6 @@ func userAdd(args []string) {
signPub := fs.String("sign-pub", "", "Ed25519 signing public key in hex (required)") signPub := fs.String("sign-pub", "", "Ed25519 signing public key in hex (required)")
role := fs.String("role", membership.RoleMember, "role: admin or member") role := fs.String("role", membership.RoleMember, "role: admin or member")
dbPath := fs.String("db", defaultDBPath, "SQLite database path") dbPath := fs.String("db", defaultDBPath, "SQLite database path")
kf := registerKVFlags(fs)
_ = fs.Parse(args) _ = fs.Parse(args)
if *handle == "" || *signPub == "" { if *handle == "" || *signPub == "" {
@@ -159,35 +105,23 @@ func userAdd(args []string) {
os.Exit(2) os.Exit(2)
} }
store, kv, closeStore := resolveStore("user add", kf, *dbPath) store := openStore(*dbPath)
defer closeStore() defer store.Close()
if err := store.AddUser(*signPub, *handle, *role); err != nil { if err := store.AddUser(*signPub, *handle, *role); err != nil {
if errors.Is(err, membership.ErrUserExists) {
// Idempotency contract (GAP A): re-adding the same key is an EXPLICIT,
// non-destructive error — the existing row is left untouched (no silent
// upsert that could flip a role or clobber status, which would corrupt the
// allowlist). To replace a user, `user revoke <key>` then add again.
fmt.Fprintf(os.Stderr, "membershipd user add: user %s already registered (unchanged); revoke it first to replace\n", *signPub)
os.Exit(1)
}
fmt.Fprintf(os.Stderr, "membershipd user add: %v\n", err) fmt.Fprintf(os.Stderr, "membershipd user add: %v\n", err)
os.Exit(1) os.Exit(1)
} }
fmt.Printf("added user %q (%s) role=%s\n", *handle, *signPub, *role) fmt.Printf("added user %q (%s) role=%s\n", *handle, *signPub, *role)
if kv != nil {
reportKVReplication(kv.js)
}
} }
func userList(args []string) { func userList(args []string) {
fs := flag.NewFlagSet("user list", flag.ExitOnError) fs := flag.NewFlagSet("user list", flag.ExitOnError)
dbPath := fs.String("db", defaultDBPath, "SQLite database path") dbPath := fs.String("db", defaultDBPath, "SQLite database path")
kf := registerKVFlags(fs)
_ = fs.Parse(args) _ = fs.Parse(args)
store, _, closeStore := resolveStore("user list", kf, *dbPath) store := openStore(*dbPath)
defer closeStore() defer store.Close()
users, err := store.ListUsers() users, err := store.ListUsers()
if err != nil { if err != nil {
@@ -209,7 +143,6 @@ func userList(args []string) {
func userRevoke(args []string) { func userRevoke(args []string) {
fs := flag.NewFlagSet("user revoke", flag.ExitOnError) fs := flag.NewFlagSet("user revoke", flag.ExitOnError)
dbPath := fs.String("db", defaultDBPath, "SQLite database path") dbPath := fs.String("db", defaultDBPath, "SQLite database path")
kf := registerKVFlags(fs)
// Go's flag package stops at the first non-flag argument, so `revoke <key> // Go's flag package stops at the first non-flag argument, so `revoke <key>
// --db path` would otherwise leave --db unparsed. Pull a leading positional // --db path` would otherwise leave --db unparsed. Pull a leading positional
@@ -234,8 +167,8 @@ func userRevoke(args []string) {
os.Exit(2) os.Exit(2)
} }
store, _, closeStore := resolveStore("user revoke", kf, *dbPath) store := openStore(*dbPath)
defer closeStore() defer store.Close()
if err := store.RevokeUser(signPub); err != nil { if err := store.RevokeUser(signPub); err != nil {
fmt.Fprintf(os.Stderr, "membershipd user revoke: %v\n", err) fmt.Fprintf(os.Stderr, "membershipd user revoke: %v\n", err)
-151
View File
@@ -1,151 +0,0 @@
package main
import (
"context"
"fmt"
"os"
"time"
"github.com/enmanuel/unibus/pkg/busauth"
"github.com/enmanuel/unibus/pkg/client"
"github.com/enmanuel/unibus/pkg/membership"
"github.com/nats-io/nats.go"
"github.com/nats-io/nats.go/jetstream"
)
// users_kv.go is the `--store kv` half of the user administration CLI (issue 0011
// gaps, GAP A): adding and listing bus users directly against the RUNNING
// cluster's replicated JetStream KV allowlist, with no need to stop the cluster,
// seed a standalone node, and restart (the procedure the 0011 deploy required).
//
// The mechanism is the cluster's own privileged internal connection. Under
// enforce every bus user is confined by the per-subject ACL to the JetStream API
// of its own rooms, so no ordinary identity may touch the control-plane buckets
// (KV_UNIBUS_*). The ONLY identity the authenticator grants full JetStream
// permissions is membershipd's internal service identity. By persisting that
// identity to a file (membershipd --internal-id-file) the same key becomes
// available to this CLI, which presents it as its NATS nkey and is therefore
// recognized as the privileged internal client and allowed to read/write the KV.
//
// Intended invocation is over loopback on a cluster node (SSH): the data-plane
// TLS certificate's SAN covers 127.0.0.1/localhost and the internal identity file
// lives 0600 next to the node's TLS keys. Using the file requires root on the
// node, which already implies full control of that node — so co-locating it adds
// no practical exposure beyond what the TLS server key and cluster password
// already represent.
// defaultClusterNatsURL is the node-local NATS listener. The CLI is meant to run
// on a cluster node over SSH, talking to that node's own embedded server.
const defaultClusterNatsURL = "nats://127.0.0.1:4250"
// Deploy-default paths for the privileged identity and the data-plane CA, so an
// on-node invocation needs only --handle/--sign-pub/--role. Override for other
// layouts.
const (
defaultInternalIDFile = "/opt/unibus/secrets/internal.id"
defaultClusterCAFile = "/opt/unibus/tls/ca.crt"
)
// kvConn bundles the privileged NATS connection to a live cluster and the
// KV-backed control-plane store opened over it. Close releases both.
type kvConn struct {
nc *nats.Conn
js jetstream.JetStream
store membership.Store
}
func (k *kvConn) Close() {
if k == nil {
return
}
if k.store != nil {
_ = k.store.Close()
}
if k.nc != nil {
k.nc.Close()
}
}
// connectKVStore opens the privileged internal connection to the cluster's NATS
// and the JetStream KV control-plane store on top of it. internalIDFile is the
// membershipd-persisted internal service identity whose nkey the authenticator
// grants full permissions; caPath pins the data-plane TLS (empty only for a
// non-TLS dev cluster). A non-loopback target without --ca is refused, mirroring
// migrate-to-kv (audit 0008 N6): the allowlist write must not travel in cleartext.
func connectKVStore(natsURL, internalIDFile, caPath string, replicas int) (*kvConn, error) {
if internalIDFile == "" {
return nil, fmt.Errorf("--internal-id-file is required for --store kv (the privileged identity membershipd persists with --internal-id-file)")
}
// Confidentiality guard: a remote NATS without TLS would expose the allowlist
// (handles/roles/sign-pubs) and the privileged nkey handshake in cleartext.
if !isLoopbackURL(natsURL) && caPath == "" {
return nil, fmt.Errorf("refusing to connect to remote %q without --ca: the allowlist write would travel in cleartext — pin TLS with --ca, or run over a loopback --nats-url on a node", natsURL)
}
id, err := client.LoadIdentity(internalIDFile)
if err != nil {
return nil, fmt.Errorf("load internal identity: %w", err)
}
nkeyPub, nkeySign, err := busauth.ClientNkey(id.SignPriv)
if err != nil {
return nil, fmt.Errorf("derive nkey from internal identity: %w", err)
}
opts := []nats.Option{
nats.Name("membershipd-user-cli"),
nats.Nkey(nkeyPub, nkeySign),
}
if caPath != "" {
tlsCfg, err := busauth.LoadCATLSConfig(caPath)
if err != nil {
return nil, fmt.Errorf("load CA %q: %w", caPath, err)
}
opts = append(opts, nats.Secure(tlsCfg))
}
nc, err := nats.Connect(natsURL, opts...)
if err != nil {
return nil, fmt.Errorf("connect cluster NATS %q: %w", natsURL, err)
}
js, err := jetstream.New(nc)
if err != nil {
nc.Close()
return nil, fmt.Errorf("jetstream: %w", err)
}
store, err := membership.OpenJetStream(js, membership.JetStreamConfig{Replicas: replicas})
if err != nil {
nc.Close()
return nil, fmt.Errorf("open KV control-plane store: %w", err)
}
return &kvConn{nc: nc, js: js, store: store}, nil
}
// reportKVReplication prints the replication status of the allowlist bucket
// stream (KV_UNIBUS_users) right after a write, so the operator sees the add
// landed on a quorum and replicated to the followers — executable evidence that
// the live-cluster add is HA, not single-node. Best-effort: a read failure is a
// note, not an error (the write itself already succeeded).
func reportKVReplication(js jetstream.JetStream) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
st, err := js.Stream(ctx, "KV_UNIBUS_users")
if err != nil {
fmt.Fprintf(os.Stderr, "note: could not read KV_UNIBUS_users stream info: %v\n", err)
return
}
info, err := st.Info(ctx)
if err != nil {
fmt.Fprintf(os.Stderr, "note: could not read KV_UNIBUS_users stream info: %v\n", err)
return
}
if info.Cluster == nil {
fmt.Printf("KV_UNIBUS_users: standalone (R1, no cluster replication); msgs=%d\n", info.State.Msgs)
return
}
current := 0
for _, r := range info.Cluster.Replicas {
if r.Current {
current++
}
}
fmt.Printf("KV_UNIBUS_users: leader=%s followers_current=%d/%d msgs=%d\n",
info.Cluster.Leader, current, len(info.Cluster.Replicas), info.State.Msgs)
}
+39 -201
View File
@@ -5,12 +5,9 @@ This directory holds the material to bring up unibus as a **3-node cluster**
plane (rooms/members/keys/users on JetStream KV + the anti-replay nonce bucket) plane (rooms/members/keys/users on JetStream KV + the anti-replay nonce bucket)
survives the loss of any one node (quorum 2/3). survives the loss of any one node (quorum 2/3).
> **Status: this cluster is DEPLOYED in production** (magnus + homer + datardos, > **The agent that authored this never touched a VPS.** Every step that changes a
> R3, enforce+ACL+TLS) — see report 0011. The runbook below was authored before any > remote host is marked **HUMAN** and is executed by the operator. `deploy-cluster.sh`
> VPS existed and has since been **corrected against the real deploy** (report 0012): > defaults to a dry run.
> the start ordering, the R1→R3 reality, and the live user-add path were all wrong
> or missing. Steps that change a remote host are marked **HUMAN**; `deploy-cluster.sh`
> still defaults to a dry run.
## Files ## Files
@@ -25,22 +22,18 @@ Generated keys/secrets (`out/`, `build/`, `secrets/`) are **gitignored** — the
secret and never leave the operator's trusted machine except over the secure secret and never leave the operator's trusted machine except over the secure
rsync channel. rsync channel.
## Topology (as deployed, report 0011) ## Topology
| Node | SSH | Public IP | Role | | Node | SSH | Public IP | WireGuard IP | Role |
|---|---|---|---| |---|---|---|---|---|
| magnus | `magnus` (root) | `135.125.201.30` | node — **= organic-machine.com = `om`**, the critical host (caddy + gitea + registry-api + monitoring); the bus runs alongside, untouched | | magnus | `magnus` | `<MAGNUS_PUBLIC_IP>` | `<MAGNUS_WG_IP>` | seed (first up) |
| homer | `homer` (ubuntu+sudo) | `141.94.69.66` | node | | homer | `homer` | `141.94.69.66` | `<HOMER_WG_IP>` | replica |
| datardos | `dd` (ubuntu+sudo) | `51.91.100.142` | node | | datardos | `dd` | `51.91.100.142` | `<DATARDOS_WG_IP>` (10.21.0.x) | replica |
`ROUTE_NETWORK=public`, **not `wg`**: there is no WireGuard mesh between the three The route layer (server-to-server) prefers the **WireGuard mesh**
nodes (homer and datardos do not even have the `wg` binary; om's only WG peers are (`ROUTE_NETWORK=wg`); the client data plane and the HTTP control plane are reached
the operator's PCs). The server-to-server routes therefore travel over the public over the public IPs. The route CA is **separate** from the client CA, so a client
IPs, protected by the **separate cluster route CA** (mutual route TLS) — a client cert can never be presented to the route port.
data-plane cert can never be presented to the route port. The client data plane and
the HTTP control plane are also reached over the public IPs. There is no fixed
"seed" node: with R3 the three are peers (see "Bring up" for why a lone node cannot
self-serve).
## Prerequisites (HUMAN, once) ## Prerequisites (HUMAN, once)
@@ -100,48 +93,25 @@ SEED
> The KV written here lives in `./local_files/jetstream`, which the cluster unit > The KV written here lives in `./local_files/jetstream`, which the cluster unit
> reuses (`--nats-store` default), so the admin is present when the enforce cluster > reuses (`--nats-store` default), so the admin is present when the enforce cluster
> starts. This loopback bootstrap is needed ONLY for the very first admin (the > starts. Additional users are added the same loopback way until a
> chicken-and-egg). **Every user after that is added with the cluster live** — no > `user add --store kv` exists (see GAP in report 0009).
> stop-seed-restart — via `user add --store kv` (see "Add users to the live
> cluster" below, report 0012).
## Bring up (HUMAN) ## Bring up (HUMAN — staggered)
> **CORRECTION (report 0012).** The original instruction — "start magnus alone and Bring up the seed first, then the replicas one at a time, checking each joins.
> verify healthz, then add the others" — is **WRONG and will look like a hung
> deploy.** A 3-node JetStream cluster forms a RAFT meta-group that needs a quorum
> (2 of 3) to elect a leader. A single started node has no quorum, so its JetStream
> meta never becomes current: `--store kv` blocks creating the KV buckets and
> **`/healthz` never returns ok** until a second node joins. Waiting for magnus to
> "go green" before starting the others therefore deadlocks the rollout.
Start the nodes so a quorum forms. On a **clean cluster** the simplest correct
procedure is to start all three close together and let the meta-group converge:
```bash ```bash
# Start all three (order does not matter); each blocks on the others until a # 1. Seed node (after the seed step above).
# 2/3 quorum elects a JetStream meta leader, then the KV buckets are created. ssh root@magnus 'systemctl enable --now membershipd-cluster'
for h in magnus homer datardos; do ssh "$h" 'sudo systemctl enable --now membershipd-cluster'; done ssh root@magnus 'curl -fsS https://127.0.0.1:8470/healthz --cacert /opt/unibus/tls/ca.crt'
# Only NOW does healthz return ok — once the meta-group has a leader (give it # 2. Replicas, one at a time.
# ~10-30s on a cold start). Poll, do not assume the first node is broken. ssh root@homer 'systemctl enable --now membershipd-cluster'
for h in magnus homer datardos; do ssh root@datardos 'systemctl enable --now membershipd-cluster'
echo "== $h =="; ssh "$h" 'curl -fsS https://127.0.0.1:8470/healthz --cacert /opt/unibus/tls/ca.crt || echo "(not ready yet — needs quorum)"'
done
``` ```
A **staggered** start also works, but only because `membershipd`'s KV open RETRIES > Initial rollout runs at **R1** (`KV_REPLICAS=1` in `nodes.env`): the buckets live
the bucket creation for a 120s bootstrap budget (issue 0006g, fix #3): the first > on the seed only. This is NOT HA yet — see "Scale to R3".
node sits in that retry loop — NOT serving healthz — until the second node makes a
quorum, then both converge and the third catches up. Either way, a lone node never
self-serves; do not gate the next node's start on the previous one's healthz.
> A cold multi-node start only converges because of **three cold-start fixes**
> (report 0011): route pooling off (`PoolSize=-1`), `NoAdvertise=true` (Docker
> bridge IPs not gossiped), and the KV-open retry loop above. Without them the
> meta-group re-elects leaders forever and bucket creation hangs. If a fresh
> cluster will not form, confirm the running binary contains these fixes before
> touching config.
## Promote an existing single-node (SQLite) deployment (HUMAN, optional) ## Promote an existing single-node (SQLite) deployment (HUMAN, optional)
@@ -167,80 +137,11 @@ ssh root@magnus 'nats --server nats://127.0.0.1:4250 server list' # 3 servers,
A healthy cluster shows 3 routed servers and a JetStream meta-group with a leader. A healthy cluster shows 3 routed servers and a JetStream meta-group with a leader.
## Add users to the live cluster (HUMAN — `user add --store kv`) ## Scale to R3 (HUMAN — real HA)
With the cluster up, add (and revoke) bus users **without stopping anything**, Once all three nodes are up and routed, raise the replication factor of every
directly against the replicated KV allowlist. This replaces the stop-seed-restart control-plane stream from 1 to 3 IN PLACE (no data loss), then flip `KV_REPLICAS=3`
procedure the original runbook implied for every user beyond the first admin. in `nodes.env` so future (re)deploys keep it:
The mechanism is the cluster's own **privileged internal connection**: under
`enforce` every bus user is confined by the per-subject ACL to its own rooms, so no
ordinary identity may write the control-plane buckets. The only identity the
authenticator grants full JetStream permissions is `membershipd`'s internal service
identity. The unit persists that identity to `${INTERNAL_ID_FILE}`
(`/opt/unibus/secrets/internal.id`, 0600) via `--internal-id-file`, so the same key
is available to the CLI. Run the CLI **on a node, over loopback** (the data-plane
TLS cert SAN covers `127.0.0.1`); reading the identity file requires root on that
node, which already implies full control of it, so this adds no practical exposure.
```bash
# Add a member to the live cluster's replicated allowlist (run on any node).
ssh root@magnus 'sudo /opt/unibus/membershipd user add --store kv \
--handle alice --role member --sign-pub <64-hex-ed25519-pub>'
# -> added user "alice" (...) role=member
# -> KV_UNIBUS_users: leader=<node> followers_current=2/2 msgs=N (replicated, HA)
# List / revoke against the same live KV:
ssh root@magnus 'sudo /opt/unibus/membershipd user list --store kv'
ssh root@magnus 'sudo /opt/unibus/membershipd user revoke --store kv <64-hex-ed25519-pub>'
```
Defaults assume an on-node invocation (`--nats-url nats://127.0.0.1:4250`,
`--internal-id-file /opt/unibus/secrets/internal.id`, `--ca /opt/unibus/tls/ca.crt`,
`--kv-replicas 3`). Semantics:
- **Idempotent / non-destructive**: re-adding the same key is an explicit
`already registered` error (exit 1), never a silent overwrite — a re-add cannot
flip a member to admin. To replace a user, `revoke` then add.
- **HA**: the write commits through the JetStream quorum, so it succeeds even with
one node down (2/3); the printed `followers_current` shows replication.
- **No hard delete**: `revoke` flips status to `revoked` (denied on both planes,
auditable); the KV has no row deletion, matching the SQLite store.
> **Rollout note (report 0012):** the live verification deployed this binary +
> `--internal-id-file` to **datardos only** (the non-critical node). magnus and
> homer still run the 0011 binary. To make the capability available (and the unit)
> on all three — recommended, the posture is identical so there is no urgency — roll
> the new binary with backups, one node at a time, verifying healthz between each:
> ```bash
> for h in homer magnus; do
> ssh "$h" 'sudo cp -a /opt/unibus/membershipd /opt/unibus/membershipd.bak' # backup
> scp build/membershipd "$h:/tmp/m" && ssh "$h" 'sudo install -o ubuntu -g ubuntu -m0775 /tmp/m /opt/unibus/membershipd'
> # add INTERNAL_ID_FILE=/opt/unibus/secrets/internal.id to /opt/unibus/cluster.env
> # add `--internal-id-file ${INTERNAL_ID_FILE} \` to the unit before `--store kv`
> ssh "$h" 'sudo systemctl daemon-reload && sudo systemctl restart membershipd-cluster'
> ssh "$h" 'curl -fsS https://127.0.0.1:8470/healthz --cacert /opt/unibus/tls/ca.crt' # green before next
> done
> ```
> (`deploy-cluster.sh` + the unit template already emit `INTERNAL_ID_FILE` and the
> flag, so a fresh `./deploy-cluster.sh --yes` is correct for all three.)
## Replication: go straight to R3 (HUMAN — real HA)
> **CORRECTION (report 0012).** The original "start at R1, then scale to R3" plan
> assumed R1 is a usable interim state. **It is not, in this cluster.** At R1 all six
> control-plane buckets (`KV_UNIBUS_users/rooms/members/room_keys/rooms_by_member`
> + `KV_UNIBUS_nonces`) live on a SINGLE node — a hard **SPOF for authentication**:
> if that node dies, the nonce/KV control plane is unreachable and EVERY
> authenticated request fails closed (auth DoS). Worse, the cold multi-node start
> only converges at all because of the three cold-start fixes (see "Bring up"); the
> real deploy never ran a healthy R1 and **jumped straight to R3 once the cluster
> formed.** Treat R1 as a transient artifact of bucket creation, not a milestone.
The deployed config already sets `KV_REPLICAS=3` in `nodes.env`. If buckets were
created at R1 (e.g. only one node was up when `--store kv` first opened them), raise
every control-plane stream to R3 IN PLACE (no data loss) once all three nodes are
routed:
```bash ```bash
for s in KV_UNIBUS_users KV_UNIBUS_rooms KV_UNIBUS_members KV_UNIBUS_room_keys \ for s in KV_UNIBUS_users KV_UNIBUS_rooms KV_UNIBUS_members KV_UNIBUS_room_keys \
@@ -250,32 +151,27 @@ done
# (also OBJ_UNIBUS_blobs if the object store is in use) # (also OBJ_UNIBUS_blobs if the object store is in use)
``` ```
After this each bucket shows `followers_current=2/2` (quorum 2/3). The Until this is done, R1 means the seed node is a **single point of failure for
`user add --store kv` command prints that figure for `KV_UNIBUS_users` on every add, authentication**: if it dies, the nonce/KV control plane is unreachable and every
which is a cheap live HA check. authenticated request fails closed (auth DoS). R1 is a rollout step, not HA.
## Chaos test (HUMAN — requires the 3 live VPS) ## Chaos test (HUMAN — requires the 3 live VPS; NOT run here)
Validate quorum tolerance after R3: Validate quorum tolerance after R3:
```bash ```bash
# Kill one node; the cluster keeps serving (quorum 2/3). On ubuntu nodes use sudo. # Kill one node; the cluster keeps serving (quorum 2/3).
ssh dd 'sudo systemctl stop membershipd-cluster' ssh root@datardos 'systemctl stop membershipd-cluster'
# -> clients fail over (multiple seed URLs); reads/writes still succeed. # -> clients fail over (multiple seed URLs); reads/writes still succeed.
ssh dd 'sudo systemctl start membershipd-cluster' # rejoins, catches up ssh root@datardos 'systemctl start membershipd-cluster' # rejoins, catches up
# Kill two nodes; quorum is LOST — the control plane should fail CLOSED (deny), # Kill two nodes; quorum is LOST — the control plane should fail CLOSED (deny),
# never fail open. Verify a request is rejected, not silently served. # never fail open. Verify a request is rejected, not silently served.
``` ```
> **Validated (report 0012).** The 0011 chaos run checked only the control plane This network-level chaos test (kill 1/3, kill 2/3, partition/split-brain) is part
> (healthz + meta/stream-leader failover + KV readable with 2/3). Report 0012 added of the deploy validation (issue 0003f) and runs against the real VPS — it is
> the missing data-plane proofs against the live cluster: a real authenticated deliberately out of scope for the authoring agent.
> client (`cmd/clientcheck`, operator identity, nkey+TLS) creating an E2E room and
> publishing/subscribing — including a node stopped mid-stream, where the client
> failed over to a survivor and kept receiving with zero loss (quorum 2/3) — and
> `user add --store kv` committing with one node (the KV leader) down. The kill-2/3
> fail-closed case remains a documented manual step.
## Rollback ## Rollback
@@ -283,61 +179,3 @@ ssh dd 'sudo systemctl start membershipd-cluster' # rejoins, catches up
the unit and start it without `--store kv`/`--cluster-name`; the KV buckets remain the unit and start it without `--store kv`/`--cluster-name`; the KV buckets remain
for a later retry. To rotate the cluster CA, re-run `generate-cluster-certs.sh for a later retry. To rotate the cluster CA, re-run `generate-cluster-certs.sh
--force` and re-stage (every node must get the new `cluster-ca.crt` together). --force` and re-stage (every node must get the new `cluster-ca.crt` together).
## NATS server metrics (loopback monitoring — optional)
The embedded NATS server can expose its own monitoring HTTP endpoint so a local
scraper reads server-level metrics that `/healthz` does not surface: msgs/s,
connections, slow consumers, memory, KV bucket message counts, the RAFT leader per
stream and per-stream restarts. This feeds the `unibus-nats` dashboard in
`fleet_monitoring` (the scraper hits `127.0.0.1:8222/varz|/connz|/jsz` over
loopback and pushes to VictoriaMetrics).
The endpoint is opened by the **dedicated** environment toggle `UNIBUS_NATS_MONITOR=1`
(0.11.0+ binary). It is **decoupled** from `UNIBUS_NATS_DEBUG`: it opens the
monitoring endpoint WITHOUT enabling the verbose nats-server debug log, so no room
subjects or routing metadata leak to journald (keeps the hardened posture, issue
0007). The endpoint binds `127.0.0.1:8222` **only** — the binary hardcodes the
loopback bind, so it is never reachable from the network and needs no auth. Never
use `UNIBUS_NATS_DEBUG` in production just to get the endpoint.
### Enable it (HUMAN — requires the 0.11.0+ binary on the node)
The clean way is the additive systemd drop-in in this directory:
```bash
# On each node, AFTER the 0.11.0+ binary is in /opt/unibus/membershipd:
ssh <node> 'sudo mkdir -p /etc/systemd/system/membershipd-cluster.service.d'
scp membershipd-cluster.service.d/nats-monitor.conf <node>:/tmp/nats-monitor.conf
ssh <node> 'sudo cp /tmp/nats-monitor.conf /etc/systemd/system/membershipd-cluster.service.d/ \
&& sudo systemctl daemon-reload && sudo systemctl restart membershipd-cluster'
```
(Equivalently, add `UNIBUS_NATS_MONITOR=1` to `/opt/unibus/cluster.env`, which the
unit already sources via `EnvironmentFile`; the drop-in is preferred because it is
self-documenting and does not edit the generated env file.)
### Rolling restart with the R3 reconvergence gate (CRITICAL)
`systemctl restart membershipd-cluster` restarts that node's JetStream RAFT member.
**Never restart two nodes at once** — that would drop the cluster below quorum
(2/3) and fail the control plane closed. Roll **one node at a time**, in the order
`magnus → homer → datardos`, and between each node wait until the cluster has
reconverged to R3 (every control-plane bucket back to `followers_current=2/2`):
```bash
# After restarting ONE node, gate on R3 reconvergence before touching the next:
ssh root@magnus 'for s in KV_UNIBUS_users KV_UNIBUS_rooms KV_UNIBUS_members \
KV_UNIBUS_room_keys KV_UNIBUS_rooms_by_member KV_UNIBUS_nonces; do
nats --server nats://127.0.0.1:4250 stream info "$s" -j \
| jq -r --arg s "$s" \"\\($s): replicas=\\(.cluster.replicas|length) leader=\\(.cluster.leader)\"
done'
# Proceed to the next node ONLY when all six show 3 replicas with a leader
# (i.e. 2/2 followers current). Also confirm healthz is green on the just-restarted
# node first:
ssh <node> 'curl -fsS https://127.0.0.1:8470/healthz --cacert /opt/unibus/tls/ca.crt'
```
This restart is normally **not** done as a standalone step: the 0.11.0 binary that
carries the flag is rolled to the three nodes in the consolidated rollout, and the
drop-in is installed during that same rolling restart.
+8 -12
View File
@@ -97,7 +97,6 @@ TLS_KEY=${REMOTE_DIR}/tls/server-${name}.key
ROUTE_TLS_CERT=${REMOTE_DIR}/tls/route-${name}.crt ROUTE_TLS_CERT=${REMOTE_DIR}/tls/route-${name}.crt
ROUTE_TLS_KEY=${REMOTE_DIR}/tls/route-${name}.key ROUTE_TLS_KEY=${REMOTE_DIR}/tls/route-${name}.key
ROUTE_TLS_CA=${REMOTE_DIR}/tls/cluster-ca.crt ROUTE_TLS_CA=${REMOTE_DIR}/tls/cluster-ca.crt
INTERNAL_ID_FILE=${REMOTE_DIR}/secrets/internal.id
EOF EOF
run ssh "$target" "mkdir -p ${REMOTE_DIR}/tls ${REMOTE_DIR}/secrets" run ssh "$target" "mkdir -p ${REMOTE_DIR}/tls ${REMOTE_DIR}/secrets"
@@ -115,16 +114,13 @@ if [[ $APPLY -eq 0 ]]; then
fi fi
cat <<'NEXT' cat <<'NEXT'
HUMAN — bring up (see README "Bring up" — a LONE node has no quorum and never HUMAN — staggered start (do NOT enable all at once; see README "Bring up"):
serves healthz, so do NOT gate the next node on the previous one going green): 1. Seed node first (e.g. magnus):
1. Seed the FIRST admin into the KV via the loopback bootstrap (README ssh root@magnus 'systemctl enable --now membershipd-cluster'
"Seed the first admin"); this is needed only for the chicken-and-egg admin. ssh root@magnus '/opt/unibus/membershipd user add --admin ...' # seed admin
2. Start all three so a 2/3 quorum forms (order does not matter); healthz 2. Then the other two, one at a time, checking quorum after each:
turns ok only once the meta-group elects a leader (~10-30s cold): ssh root@homer 'systemctl enable --now membershipd-cluster'
for h in magnus homer datardos; do ssh "$h" 'sudo systemctl enable --now membershipd-cluster'; done ssh root@datardos 'systemctl enable --now membershipd-cluster'
3. Verify posture + quorum (README "Verify"). 3. Verify posture + quorum (README "Verify").
4. Ensure R3 on every control-plane stream (README "Replication: go straight to 4. Scale replicas 1 -> 3 once all three are up (README "Scale to R3").
R3"); R1 is a SPOF, not a milestone.
5. Add further users with the cluster LIVE — no restart — via
`membershipd user add --store kv` (README "Add users to the live cluster").
NEXT NEXT
@@ -33,7 +33,6 @@ ExecStart=/opt/unibus/membershipd \
--route-tls-cert ${ROUTE_TLS_CERT} \ --route-tls-cert ${ROUTE_TLS_CERT} \
--route-tls-key ${ROUTE_TLS_KEY} \ --route-tls-key ${ROUTE_TLS_KEY} \
--route-tls-ca ${ROUTE_TLS_CA} \ --route-tls-ca ${ROUTE_TLS_CA} \
--internal-id-file ${INTERNAL_ID_FILE} \
--store kv \ --store kv \
--kv-replicas ${KV_REPLICAS} --kv-replicas ${KV_REPLICAS}
# Restart=always (NOT on-failure): a clean SIGTERM exits success, and on-failure # Restart=always (NOT on-failure): a clean SIGTERM exits success, and on-failure
@@ -1,27 +0,0 @@
# Drop-in: enable the embedded NATS server monitoring HTTP endpoint so a local
# metrics scraper can read /varz, /connz and /jsz for server-level metrics
# (msgs/s, connections, KV bucket msgs, RAFT leader per stream, restarts).
#
# ADDITIVE and minimal: it only sets one environment variable; the base unit
# (membershipd-cluster.service) is otherwise unchanged.
#
# UNIBUS_NATS_MONITOR is DECOUPLED from UNIBUS_NATS_DEBUG: it opens the monitoring
# endpoint WITHOUT enabling the verbose nats-server debug log, so no room subjects
# or routing metadata are written to journald (keeps the hardened posture, issue
# 0007). Do NOT use UNIBUS_NATS_DEBUG in production just to get the endpoint.
#
# The endpoint binds 127.0.0.1:8222 ONLY — the binary hardcodes the loopback bind,
# so it is never reachable from the network and needs no auth. The scraper runs on
# the same host and reads it over loopback.
#
# Requires the 0.11.0+ membershipd binary (the one that honors UNIBUS_NATS_MONITOR).
# Install on a node:
# sudo mkdir -p /etc/systemd/system/membershipd-cluster.service.d
# sudo cp nats-monitor.conf /etc/systemd/system/membershipd-cluster.service.d/
# sudo systemctl daemon-reload && sudo systemctl restart membershipd-cluster
#
# Restarting a node restarts its JetStream RAFT member, so roll ONE node at a time
# and wait for R3 reconvergence (followers 2/2) before touching the next. See the
# "NATS server metrics" section of this directory's README for the full runbook.
[Service]
Environment=UNIBUS_NATS_MONITOR=1
+8 -21
View File
@@ -2,10 +2,10 @@
# #
# This file is SOURCED by generate-cluster-certs.sh and deploy-cluster.sh. # This file is SOURCED by generate-cluster-certs.sh and deploy-cluster.sh.
# #
# HUMAN: fill in every placeholder with the real value before running the # HUMAN: fill in every <PLACEHOLDER> with the real value before running the
# scripts. The public IPs known at authoring time are pre-filled; the WireGuard # scripts. The public IPs known at authoring time are pre-filled; the WireGuard
# mesh IPs and magnus's public IP must be supplied. The scripts refuse to run # mesh IPs and magnus's public IP must be supplied. The scripts refuse to run
# while any unfilled placeholder remains. # while any <PLACEHOLDER> remains.
# Cluster identity (must be identical on every node). # Cluster identity (must be identical on every node).
CLUSTER_NAME="unibus" CLUSTER_NAME="unibus"
@@ -16,7 +16,7 @@ CLUSTER_USER="unibus-cluster"
# KV/nonce replication factor. START AT 1 for the initial 1->3 rollout, then raise # KV/nonce replication factor. START AT 1 for the initial 1->3 rollout, then raise
# to 3 IN PLACE (see README "Scale to R3") once all three nodes have joined. Only # to 3 IN PLACE (see README "Scale to R3") once all three nodes have joined. Only
# set this to 3 here after the third node is up and you re-run the KV update. # set this to 3 here after the third node is up and you re-run the KV update.
KV_REPLICAS=3 KV_REPLICAS=1
# Ports (same on every node; the route port is server-to-server only). # Ports (same on every node; the route port is server-to-server only).
NATS_CLIENT_PORT=4250 NATS_CLIENT_PORT=4250
@@ -30,28 +30,15 @@ SSH_USER="root"
# Which address family the inter-node routes use. "wg" builds --routes from the # Which address family the inter-node routes use. "wg" builds --routes from the
# WireGuard mesh IPs (private server-to-server links, preferred); "public" uses # WireGuard mesh IPs (private server-to-server links, preferred); "public" uses
# the public IPs. The route layer is always mutual-TLS regardless. # the public IPs. The route layer is always mutual-TLS regardless.
# ROUTE_NETWORK="wg"
# DEPLOY DECISION (2026-06-07): set to "public". No WireGuard mesh exists between
# the three cluster nodes — homer and datardos do not even have the `wg` binary
# installed, and om's only WG peers are the operator's personal PCs, not the VPS.
# Rather than stand up a fresh mesh blindly, the routes go over the public IPs,
# still protected by the separate cluster route CA (mutual-TLS). On magnus (the
# only node with ufw active) the route port 6250 is restricted to the homer and
# datardos public IPs; homer/datardos run ufw inactive (Docker hosts) and rely on
# the route mutual-TLS for 6250.
ROUTE_NETWORK="public"
# One row per node: NAME SSH_HOST PUBLIC_IP WG_IP # One row per node: NAME SSH_HOST PUBLIC_IP WG_IP
# NAME -> --server-name and the per-node cert filenames (unique). # NAME -> --server-name and the per-node cert filenames (unique).
# SSH_HOST -> the `ssh ALIAS` alias (see ~/.ssh/config). # SSH_HOST -> the `ssh <SSH_HOST>` alias (see ~/.ssh/config).
# PUBLIC_IP -> public address; goes in the cert SANs (client-facing data plane). # PUBLIC_IP -> public address; goes in the cert SANs (client-facing data plane).
# WG_IP -> WireGuard mesh address; cert SAN + route target when ROUTE_NETWORK=wg. # WG_IP -> WireGuard mesh address; cert SAN + route target when ROUTE_NETWORK=wg.
# NOTE: with ROUTE_NETWORK=public and no WireGuard mesh, the WG_IP column is set to
# each node's public IP so the cert SAN covers the address actually used by the
# public routes and no unfilled placeholder remains (scripts refuse to run otherwise).
# magnus == organic-machine.com == om (135.125.201.30); SSH alias `magnus` enters as root.
CLUSTER_NODES=( CLUSTER_NODES=(
"magnus magnus 135.125.201.30 135.125.201.30" "magnus magnus <MAGNUS_PUBLIC_IP> <MAGNUS_WG_IP>"
"homer homer 141.94.69.66 141.94.69.66" "homer homer 141.94.69.66 <HOMER_WG_IP>"
"datardos dd 51.91.100.142 51.91.100.142" "datardos dd 51.91.100.142 <DATARDOS_WG_IP>"
) )
@@ -1,78 +0,0 @@
---
issue: 0007
title: Cifrado at-rest del control plane (JetStream KV / SQLite en disco)
status: spec
created: 2026-06-07
domain: security
scope: unibus (pkg/embeddednats, cmd/membershipd, deploy/cluster) + procedimiento de migración del store existente
---
# Objetivo
Cifrar en reposo el almacenamiento del plano de control para que un nodo comprometido
(root en el VPS) o un disco robado no exponga los metadatos de control en claro.
Estado actual (auditado el 07/06/2026, report 0012 y siguientes):
- **Contenido de los mensajes**: cifrado E2E por room (megolm/olm). El servidor nunca ve el
plaintext; no vive en el plano de control. **No es el objeto de este issue.**
- **Claves de room** (`UNIBUS_room_keys`): guardadas **selladas** (sealed box X25519, cifradas
para cada miembro). El servidor las almacena y reparte pero no puede abrirlas. **Ya protegidas.**
- **Metadatos de control** (`UNIBUS_rooms`, `UNIBUS_members`, `UNIBUS_rooms_by_member`,
`UNIBUS_users`): se serializan con `json.Marshal` y se escriben **en claro** en el store. En
cluster ese store es el directorio `local_files/jetstream/` de cada nodo; en single-node es el
archivo SQLite `local_files/unibus.db`. Hoy **no hay cifrado at-rest**: con root en un nodo se
pueden leer subjects de salas, la pertenencia (quién está en qué sala con qué rol), los handles
y roles de los usuarios, y las claves públicas (signPub/kexPub). No se exponen mensajes (E2E) ni
se pueden descifrar salas (claves selladas), pero sí toda la topología.
Tras este issue, los buckets/archivos del control plane quedan cifrados en disco con una clave por
nodo gestionada fuera de git. El modelo de amenaza pasa de "root del nodo ve la topología" a "root
del nodo necesita además la clave at-rest (que puede vivir en un secreto separado / TPM / variable
de entorno inyectada) para leer cualquier cosa".
# Contexto técnico
- NATS Server / JetStream soporta **encryption at-rest** nativo: se configura una cifra
(`aes` o `chacha20`) y una clave; JetStream cifra los ficheros de los streams/KV en disco. El
bus usa un NATS **embebido** (`pkg/embeddednats`), así que la activación es por opciones del
servidor embebido, no por un `nats-server.conf` externo.
- Para el backend SQLite (single-node) el equivalente sería SQLCipher o cifrado a nivel de
archivo/FS; queda como sub-tarea de menor prioridad porque el despliegue real es cluster (KV).
# Tareas
1. Confirmar la API de encryption-at-rest del NATS embebido en la versión usada (opción de
servidor para cipher + clave; cómo se pasa la clave de forma que no quede en argv ni en git).
2. Activar el cifrado en `pkg/embeddednats` detrás de una opción de configuración. La clave se
inyecta por archivo (`--jetstream-encryption-key-file`, 0600, junto a las claves TLS del nodo)
o variable de entorno desde el unit systemd; nunca en argv ni commiteada.
3. `cmd/membershipd`: flag/env para la clave + reflejar el estado en la posture publicada en
`/healthz` (p.ej. `"at_rest":true`) para que el monitor lo verifique.
4. `deploy/cluster`: provisionar la clave at-rest por nodo (generación + `pass`/secrets gitignored)
y cablearla en `cluster.env` + el unit. Documentar en el runbook.
5. **Migración del store existente** (gotcha crítico): JetStream no re-cifra retroactivamente los
datos ya escritos en claro. Diseñar y documentar el procedimiento seguro para el cluster en
producción (probable: backup → exportar snapshot del control plane → parar nodo → recrear el
store con la clave activa → re-importar; o rotación nodo a nodo aprovechando la replicación R3).
Respetar la regla de migraciones (aditivo, sin pérdida de datos).
6. Tests: arrancar un nodo con clave at-rest, escribir un user/room, y verificar que el fichero en
disco **no** contiene en claro un subject/handle conocido (grep negativo), y que el nodo sigue
leyéndolos con la clave. Verificar que sin la clave el store no se abre.
# Definition of Done
- Cifrado at-rest activo en los 3 nodos del cluster; `/healthz` lo refleja en la posture.
- Evidencia ejecutable: un valor conocido (subject de sala / handle de usuario) **no** aparece en
claro al hacer `grep` sobre `local_files/jetstream/`; el nodo lo sigue sirviendo con la clave.
- Procedimiento de migración probado sobre datos reales sin pérdida (snapshot/restore verificado).
- La clave at-rest nunca está en git ni en argv; vive en archivo 0600 / secreto inyectado.
- No baja ninguna otra capa de seguridad (enforce + ACL + TLS + E2E + sealed keys intactas).
# Notas
Aditivo y ortogonal al resto de la seguridad: TLS protege en tránsito, E2E el contenido, las claves
de room van selladas; este issue cierra el último hueco (metadatos de control en claro en disco)
para el modelo de amenaza "VPS comprometido / disco robado". Prioridad media: el despliegue ya es
seguro frente a ataques de red (enforce+TLS+ACL); esto endurece frente a compromiso físico/root del
host. Relacionado con el endurecimiento de los issues 0004/0005/0006.
+37
View File
@@ -0,0 +1,37 @@
#!/usr/bin/env bash
# Regenera el binding gomobile (unibus.aar) a partir de ./mobile sobre pkg/client.
#
# El .aar (~38 MB, con libgojni.so para 4 ABIs) NO se versiona: es un artefacto
# de build reproducible. Este script lo regenera. Requisitos:
# - Go con gomobile/gobind instalados:
# go install golang.org/x/mobile/cmd/gomobile@latest
# go install golang.org/x/mobile/cmd/gobind@latest
# gomobile init
# - Android NDK (este repo usó 26.3.11579264 dentro del Android SDK).
#
# En un worktree fuera del árbol del registry, pkg/client importa
# "fn-registry/functions/cybersecurity" vía el `replace` del go.mod. Si ese
# replace relativo no resuelve (p. ej. worktree en /tmp), crea un go.work local
# (gitignored) con: replace fn-registry => /ruta/absoluta/a/fn_registry
set -euo pipefail
cd "$(dirname "$0")/.."
: "${ANDROID_HOME:=$HOME/android-sdk}"
: "${ANDROID_NDK_HOME:=$ANDROID_HOME/ndk/26.3.11579264}"
export ANDROID_HOME ANDROID_NDK_HOME
export PATH="$HOME/go/bin:$PATH"
OUT="android/app/libs/unibus.aar"
mkdir -p "$(dirname "$OUT")"
echo "==> gomobile bind -> $OUT"
gomobile bind \
-target=android \
-androidapi 21 \
-javapkg com.unibus.core \
-o "$OUT" \
./mobile
echo "==> OK: $OUT"
ls -lh "$OUT"
+236
View File
@@ -0,0 +1,236 @@
// Package mobile exposes a flat, gomobile-friendly API over the unibus client
// so an Android app can join rooms, publish, and receive messages with the same
// end-to-end encryption as any native Go peer.
//
// gomobile only supports a limited set of types across the binding boundary
// (string, []byte, int, bool, error, named structs, and interfaces). This layer
// translates the richer client API into those primitives and delivers incoming
// frames through a Java/Kotlin-implemented FrameListener callback. No protocol
// or cryptography is reimplemented here: every call delegates to pkg/client,
// which is the single source of truth shared with every other peer on the bus.
package mobile
import (
"encoding/base64"
"encoding/json"
"fmt"
"time"
"github.com/enmanuel/unibus/pkg/client"
"github.com/enmanuel/unibus/pkg/frame"
"github.com/enmanuel/unibus/pkg/room"
)
// FrameListener receives decrypted messages for a subscribed room. The Android
// side implements this interface.
//
// IMPORTANT (threading): OnFrame is invoked from a NATS delivery goroutine, NOT
// the Android main thread. A Kotlin implementation MUST hop back to the UI
// thread before touching any Compose state or Android view — for example with
// `withContext(Dispatchers.Main)` from a coroutine, or by posting to a
// MutableStateFlow that the UI collects. Touching views directly from here
// crashes with CalledFromWrongThreadException.
type FrameListener interface {
OnFrame(roomID string, sender string, msgID string, text string)
}
// Session is a connected unibus peer. Create it with NewSession and close it
// with Close when the app stops.
type Session struct {
c *client.Client
}
// GenerateIdentity creates (or loads) the long-term keypair stored at path.
// Call it once on first launch. The resulting file holds the peer's private
// Ed25519 and X25519 keys and must be kept private to the app sandbox
// (use Context.getFilesDir() on Android).
func GenerateIdentity(path string) error {
_, err := client.LoadOrCreateIdentity(path)
return err
}
// NewSession loads the identity at idPath and connects to the bus. natsURL is
// the data plane (for example tls://host:4250) and ctrlURL is the control plane
// HTTP endpoint (for example https://host:8470). caPath is the path to the bus
// CA certificate (ca.crt) bundled with the app: when set, the session connects
// securely (TLS pinned to that CA + nkey authentication on the data plane),
// matching a bus running with auth + TLS. Pass an empty caPath to connect in
// plaintext to an unsecured (dev) bus.
func NewSession(idPath, natsURL, ctrlURL, caPath string) (*Session, error) {
id, err := client.LoadOrCreateIdentity(idPath)
if err != nil {
return nil, err
}
c, err := client.Connect(natsURL, ctrlURL, id, caPath)
if err != nil {
return nil, err
}
return &Session{c: c}, nil
}
// EndpointID returns this peer's stable endpoint identifier, derived from its
// signing public key. It is the value that appears as the sender of frames.
func (s *Session) EndpointID() string {
return s.c.Endpoint().ID
}
// ConnectedServer returns the NATS URL the session is currently connected to,
// useful for surfacing a "connected to" hint in the UI.
func (s *Session) ConnectedServer() string {
return s.c.ConnectedServer()
}
// IsConnected reports whether the underlying NATS connection is live.
func (s *Session) IsConnected() bool {
return s.c.IsConnected()
}
// CreateRoom opens a room on the given subject. mode is "matrix" for the
// encrypted, persisted and signed policy, or "nats" for plain cleartext. It
// returns the room id used by Join, Publish and Subscribe.
//
// On a secured bus, call RefreshSession after CreateRoom and before
// Subscribe/Publish so the bus re-derives this peer's per-subject permissions
// from its new membership (issue 0006e).
func (s *Session) CreateRoom(subject, mode string) (string, error) {
p := room.ModeNATS
if mode == "matrix" {
p = room.ModeMatrix
}
return s.c.CreateRoom(subject, p)
}
// Join fetches the room key when the room is encrypted and prepares the session
// to publish to and receive from the room.
func (s *Session) Join(roomID string) error {
return s.c.Join(roomID)
}
// RefreshSession reconnects the data plane so the bus re-derives this peer's
// per-subject permissions from its current room membership.
//
// Membership-change contract (issue 0006e): a secured bus (--bus-auth enforce)
// freezes a connection's permissions at connect time. After ANY membership change
// — a room you just created, were invited to, or joined — call RefreshSession
// BEFORE Publish/Subscribe on that room, or the bus denies the new room's subject.
// It also drops active subscriptions, so re-Subscribe afterwards. On an unsecured
// bus it is a harmless reconnect. A mobile/gateway caller wires this exactly like
// cmd/chat and cmd/worker do: CreateRoom -> RefreshSession -> Subscribe/Publish.
func (s *Session) RefreshSession() error {
return s.c.RefreshSession()
}
// Publish sends a UTF-8 text message to the room.
func (s *Session) Publish(roomID, text string) error {
return s.c.Publish(roomID, []byte(text))
}
// Subscribe streams decrypted messages of the room to the listener until the
// session is closed. See FrameListener for the threading contract.
func (s *Session) Subscribe(roomID string, l FrameListener) error {
_, err := s.c.Subscribe(roomID, func(f frame.Frame, plaintext []byte) {
l.OnFrame(roomID, f.Sender, f.MsgID, string(plaintext))
})
return err
}
// roomJSON is the flat shape returned by ListRoomsJSON for each room the peer
// belongs to. It mirrors the fields the UI needs to render a room list item.
type roomJSON struct {
RoomID string `json:"room_id"`
Subject string `json:"subject"`
Epoch int `json:"epoch"`
Encrypted bool `json:"encrypted"`
Role string `json:"role"`
}
// ListRoomsJSON returns the peer's rooms as a JSON array string. gomobile does
// not bind slices of structs cleanly across the boundary, so the list is
// marshalled to JSON and the Kotlin side decodes it (kotlinx.serialization).
// Each element is a roomJSON object.
func (s *Session) ListRoomsJSON() (string, error) {
refs, err := s.c.ListMyRooms()
if err != nil {
return "", err
}
out := make([]roomJSON, 0, len(refs))
for _, r := range refs {
out = append(out, roomJSON{
RoomID: r.RoomID,
Subject: r.Subject,
Epoch: r.Epoch,
Encrypted: r.Policy.Encrypt,
Role: r.Role,
})
}
b, err := json.Marshal(out)
if err != nil {
return "", err
}
return string(b), nil
}
// cardJSON is the portable, copy-pasteable public identity a peer shares so a
// room owner can invite it to an encrypted room. It carries no secret: only the
// endpoint id and the two public keys (signing + key-exchange), base64-encoded
// for transport over text or a QR code.
type cardJSON struct {
ID string `json:"id"`
SignPub string `json:"sign_pub"` // base64 std of the Ed25519 public key
KexPub string `json:"kex_pub"` // base64 std of the X25519 public key
}
// Card returns this peer's public identity as a portable JSON string. Share it
// (paste, QR) with a room owner so they can Invite you to an encrypted room. It
// contains no private key and is safe to transmit in the clear.
func (s *Session) Card() string {
ep := s.c.Endpoint()
b, _ := json.Marshal(cardJSON{
ID: ep.ID,
SignPub: base64.StdEncoding.EncodeToString(ep.SignPub),
KexPub: base64.StdEncoding.EncodeToString(ep.KexPub),
})
return string(b)
}
// Invite adds the holder of peerCard to roomID. peerCard is the JSON string the
// invitee produced with Card(). For encrypted rooms this seals the current room
// key to the invitee's X25519 public key and signs the request; the caller must
// be the room owner.
func (s *Session) Invite(roomID, peerCard string) error {
var card cardJSON
if err := json.Unmarshal([]byte(peerCard), &card); err != nil {
return fmt.Errorf("mobile: bad peer card: %w", err)
}
signPub, err := base64.StdEncoding.DecodeString(card.SignPub)
if err != nil {
return fmt.Errorf("mobile: bad sign_pub in card: %w", err)
}
kexPub, err := base64.StdEncoding.DecodeString(card.KexPub)
if err != nil {
return fmt.Errorf("mobile: bad kex_pub in card: %w", err)
}
return s.c.Invite(roomID, client.Endpoint{ID: card.ID, SignPub: signPub, KexPub: kexPub})
}
// Kick removes endpointID from roomID and, for encrypted rooms, rotates the room
// key to a new epoch so the removed peer cannot decrypt messages published after
// the kick (forward secrecy). The caller must be the room owner.
func (s *Session) Kick(roomID, endpointID string) error {
return s.c.Kick(roomID, endpointID)
}
// Request performs an RPC request/reply against subject and returns the reply
// payload as text. timeoutMs bounds the wait in milliseconds.
func (s *Session) Request(subject, text string, timeoutMs int) (string, error) {
out, err := s.c.Request(subject, []byte(text), time.Duration(timeoutMs)*time.Millisecond)
if err != nil {
return "", err
}
return string(out), nil
}
// Close disconnects the peer from the bus.
func (s *Session) Close() error {
return s.c.Close()
}
-70
View File
@@ -456,23 +456,6 @@ type memberRoomJSON struct {
Role string `json:"role"` Role string `json:"role"`
} }
// userJSON mirrors the server's wire type on the admin user-management endpoints.
type userJSON struct {
SignPub string `json:"sign_pub"`
Handle string `json:"handle"`
Role string `json:"role"`
Status string `json:"status"`
CreatedAt string `json:"created_at"`
RevokedAt string `json:"revoked_at,omitempty"`
}
// addUserReq is the POST /users body (mirror of the server type).
type addUserReq struct {
SignPub string `json:"sign_pub"`
Handle string `json:"handle"`
Role string `json:"role"`
}
// ---- room operations ------------------------------------------------------ // ---- room operations ------------------------------------------------------
// RoomRef is a room this peer belongs to, returned by ListMyRooms. It is the // RoomRef is a room this peer belongs to, returned by ListMyRooms. It is the
@@ -507,59 +490,6 @@ func (c *Client) ListMyRooms() ([]RoomRef, error) {
return out, nil return out, nil
} }
// ---- user administration (admin-only) ------------------------------------
// UserInfo is a bus user as returned by the admin user-management endpoints. It
// is a flat view (no nested types) for the admin panel: the signing key
// (lowercase hex), handle, role ("admin"|"member"), status ("active"|"revoked"),
// and timestamps. RevokedAt is empty for an active user.
type UserInfo struct {
SignPub string
Handle string
Role string
Status string
CreatedAt string
RevokedAt string
}
// ListUsers returns the full bus allowlist, including revoked users. The caller
// must be signing as an admin: a non-admin signer is rejected by the server with
// 403, surfaced here as an error.
func (c *Client) ListUsers() ([]UserInfo, error) {
var resp []userJSON
if err := c.doJSON("GET", "/users", nil, &resp); err != nil {
return nil, err
}
out := make([]UserInfo, 0, len(resp))
for _, u := range resp {
out = append(out, UserInfo{
SignPub: u.SignPub,
Handle: u.Handle,
Role: u.Role,
Status: u.Status,
CreatedAt: u.CreatedAt,
RevokedAt: u.RevokedAt,
})
}
return out, nil
}
// AddUser registers a bus user from their Ed25519 signing public key (64-hex).
// role is "admin" or "member" (empty defaults to member, matching the server).
// The caller must be signing as an admin. Re-adding an already-registered key
// returns an error (the server replies 409 and leaves the existing row
// untouched — no silent role/status change).
func (c *Client) AddUser(signPub, handle, role string) error {
return c.doJSON("POST", "/users", addUserReq{SignPub: signPub, Handle: handle, Role: role}, nil)
}
// RevokeUser revokes a bus user by their signing public key (64-hex). Revocation
// is a status flip (no hard delete): the identity stays auditable and is denied
// on both planes immediately. The caller must be signing as an admin.
func (c *Client) RevokeUser(signPub string) error {
return c.doJSON("POST", "/users/"+signPub+"/revoke", nil, nil)
}
// newRoomKey returns 32 random bytes for a symmetric room key. // newRoomKey returns 32 random bytes for a symmetric room key.
func newRoomKey() ([]byte, error) { func newRoomKey() ([]byte, error) {
k := make([]byte, 32) k := make([]byte, 32)
+5 -21
View File
@@ -33,17 +33,11 @@ type identityFile struct {
KexPriv string `json:"kex_priv"` KexPriv string `json:"kex_priv"`
} }
// LoadIdentity loads an existing identity from path. Unlike LoadOrCreateIdentity // LoadOrCreateIdentity loads the identity at path, or generates and persists a
// it NEVER creates one: a missing or unreadable file is an error. It is for // new one if the file does not exist. The file is written with 0600
// callers that must consume a specific, pre-provisioned identity rather than mint // permissions because it holds private keys.
// a fresh one — for example membershipd's persisted internal service identity, func LoadOrCreateIdentity(path string) (cs.Identity, error) {
// which `membershipd user add --store kv` reads to present the privileged nkey if data, err := os.ReadFile(path); err == nil {
// the cluster authenticator recognizes.
func LoadIdentity(path string) (cs.Identity, error) {
data, err := os.ReadFile(path)
if err != nil {
return cs.Identity{}, fmt.Errorf("client: read identity %q: %w", path, err)
}
var f identityFile var f identityFile
if err := json.Unmarshal(data, &f); err != nil { if err := json.Unmarshal(data, &f); err != nil {
return cs.Identity{}, fmt.Errorf("client: parse identity %q: %w", path, err) return cs.Identity{}, fmt.Errorf("client: parse identity %q: %w", path, err)
@@ -53,16 +47,6 @@ func LoadIdentity(path string) (cs.Identity, error) {
return cs.Identity{}, fmt.Errorf("client: decode identity %q: %w", path, err) return cs.Identity{}, fmt.Errorf("client: decode identity %q: %w", path, err)
} }
return id, nil return id, nil
}
// LoadOrCreateIdentity loads the identity at path, or generates and persists a
// new one if the file does not exist. The file is written with 0600
// permissions because it holds private keys. A file that exists but is
// unreadable or corrupt is an error (NOT silently regenerated), so a damaged
// identity surfaces instead of minting a new key that cannot decrypt old data.
func LoadOrCreateIdentity(path string) (cs.Identity, error) {
if _, statErr := os.Stat(path); statErr == nil {
return LoadIdentity(path)
} }
id, err := cs.GenerateIdentity() id, err := cs.GenerateIdentity()
-99
View File
@@ -1,99 +0,0 @@
package client_test
import (
"encoding/hex"
"strings"
"testing"
"github.com/enmanuel/unibus/pkg/client"
"github.com/enmanuel/unibus/pkg/membership"
)
// findUserInfo returns the row with the given signing key (case-insensitive).
func findUserInfo(users []client.UserInfo, signPub string) (client.UserInfo, bool) {
want := strings.ToLower(signPub)
for _, u := range users {
if strings.ToLower(u.SignPub) == want {
return u, true
}
}
return client.UserInfo{}, false
}
// TestClientUsersAdminAPI drives the admin user-management API through the real
// pkg/client methods against an in-process membershipd under enforce: an admin
// client adds a user, lists it, revokes it, and sees the status flip — and a
// non-admin client is denied. This is the path the admin panel uses, so it locks
// the client/server contract the panel depends on.
func TestClientUsersAdminAPI(t *testing.T) {
h := newHarnessMode(t, membership.AuthEnforce)
waitHealth(t, h.ctrlURL)
admin, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
if err != nil {
t.Fatalf("connect admin: %v", err)
}
defer admin.Close()
registerClient(t, h, admin, "admin", membership.RoleAdmin)
member, err := client.New(h.natsURL, h.ctrlURL, mustIdentity(t))
if err != nil {
t.Fatalf("connect member: %v", err)
}
defer member.Close()
registerClient(t, h, member, "member", membership.RoleMember)
// A brand-new identity the admin will register over HTTP.
carol := mustIdentity(t)
carolPub := hex.EncodeToString(carol.SignPub)
// Admin adds carol as a member.
if err := admin.AddUser(carolPub, "carol", membership.RoleMember); err != nil {
t.Fatalf("admin AddUser: %v", err)
}
// Admin lists: carol present and active.
users, err := admin.ListUsers()
if err != nil {
t.Fatalf("admin ListUsers: %v", err)
}
row, ok := findUserInfo(users, carolPub)
if !ok {
t.Fatalf("carol missing from list after add: %+v", users)
}
if row.Status != membership.StatusActive || row.Role != membership.RoleMember {
t.Fatalf("carol row wrong after add: %+v", row)
}
// Re-adding the same key is a conflict surfaced as an error (no silent upsert).
if err := admin.AddUser(carolPub, "carol-again", membership.RoleAdmin); err == nil {
t.Fatalf("re-adding carol should error (409), got nil")
}
// Admin revokes carol; list shows the status flip (no hard delete).
if err := admin.RevokeUser(carolPub); err != nil {
t.Fatalf("admin RevokeUser: %v", err)
}
users, err = admin.ListUsers()
if err != nil {
t.Fatalf("admin ListUsers after revoke: %v", err)
}
row, ok = findUserInfo(users, carolPub)
if !ok {
t.Fatalf("carol vanished after revoke (should be a status flip): %+v", users)
}
if row.Status != membership.StatusRevoked {
t.Fatalf("carol should be revoked, got status %q", row.Status)
}
// A non-admin (member) is denied on every user-management method.
if _, err := member.ListUsers(); err == nil {
t.Fatalf("non-admin ListUsers should error (403), got nil")
}
if err := member.AddUser(carolPub, "x", membership.RoleMember); err == nil {
t.Fatalf("non-admin AddUser should error (403), got nil")
}
if err := member.RevokeUser(carolPub); err == nil {
t.Fatalf("non-admin RevokeUser should error (403), got nil")
}
}
+1 -60
View File
@@ -9,7 +9,6 @@ import (
"crypto/tls" "crypto/tls"
"fmt" "fmt"
"net/url" "net/url"
"os"
"time" "time"
server "github.com/nats-io/nats-server/v2/server" server "github.com/nats-io/nats-server/v2/server"
@@ -103,38 +102,10 @@ func StartHostAuth(storeDir, host string, port int, auth server.Authentication)
return StartServer(ServerConfig{StoreDir: storeDir, Host: host, Port: port, Auth: auth}) return StartServer(ServerConfig{StoreDir: storeDir, Host: host, Port: port, Auth: auth})
} }
// natsLogOpts maps the two independent environment toggles to the embedded
// nats-server logging and monitoring flags. It is a pure function (no I/O) so the
// decoupling between the two toggles can be unit-tested directly.
//
// - UNIBUS_NATS_DEBUG="1" enables the nats-server logger (route/RAFT/JetStream
// errors); "2" additionally enables protocol tracing. Off by default so the
// server stays silent (NoLog) and production behavior is unchanged.
// - UNIBUS_NATS_MONITOR="1" opens the monitoring HTTP endpoint (loopback only)
// for a local metrics scraper to read /varz, /connz and /jsz.
//
// The two are DECOUPLED on purpose: enabling the monitoring endpoint must NOT turn
// on the verbose debug log, which would write room subjects and routing metadata
// to journald in clear and regress the hardened posture (issue 0007). The reverse
// coupling is kept for backward compatibility: debug mode still exposes the
// monitoring endpoint as well (debug implies monitor), so existing debugging
// workflows are unchanged.
func natsLogOpts(debugEnv, monitorEnv string) (noLog, debug, trace, monitor bool) {
debug = debugEnv == "1" || debugEnv == "2"
trace = debugEnv == "2"
monitor = monitorEnv == "1" || debug
noLog = !debug
return noLog, debug, trace, monitor
}
// StartServer launches an embedded nats-server with JetStream from cfg. It // StartServer launches an embedded nats-server with JetStream from cfg. It
// blocks until the server is ready to accept connections (up to 5s) and returns // blocks until the server is ready to accept connections (up to 5s) and returns
// the running server; the caller must Shutdown it. // the running server; the caller must Shutdown it.
func StartServer(cfg ServerConfig) (*server.Server, error) { func StartServer(cfg ServerConfig) (*server.Server, error) {
// Map the two independent env toggles to the nats-server logging + monitoring
// flags. See natsLogOpts for the decoupling rationale (issue 0007).
noLog, debugNATS, traceNATS, monitorNATS := natsLogOpts(
os.Getenv("UNIBUS_NATS_DEBUG"), os.Getenv("UNIBUS_NATS_MONITOR"))
opts := &server.Options{ opts := &server.Options{
JetStream: true, JetStream: true,
StoreDir: cfg.StoreDir, StoreDir: cfg.StoreDir,
@@ -143,20 +114,9 @@ func StartServer(cfg ServerConfig) (*server.Server, error) {
ServerName: cfg.ServerName, ServerName: cfg.ServerName,
DontListen: false, DontListen: false,
// Keep the embedded server quiet by default; the host app logs the URLs. // Keep the embedded server quiet by default; the host app logs the URLs.
NoLog: noLog, NoLog: true,
Debug: debugNATS,
Trace: traceNATS,
Logtime: true,
NoSigs: true, NoSigs: true,
} }
if monitorNATS {
// Expose the nats-server monitoring endpoint on LOOPBACK ONLY (never public):
// the operator (or a local metrics scraper) inspects /varz, /connz, /jsz,
// /routez. The 127.0.0.1 bind is mandatory because this endpoint has no auth;
// it must stay unreachable from the network.
opts.HTTPHost = "127.0.0.1"
opts.HTTPPort = 8222
}
if cfg.Auth != nil { if cfg.Auth != nil {
opts.CustomClientAuthentication = cfg.Auth opts.CustomClientAuthentication = cfg.Auth
// A CustomClientAuthentication alone does not make the server advertise a // A CustomClientAuthentication alone does not make the server advertise a
@@ -181,10 +141,6 @@ func StartServer(cfg ServerConfig) (*server.Server, error) {
return nil, fmt.Errorf("embeddednats: new server: %w", err) return nil, fmt.Errorf("embeddednats: new server: %w", err)
} }
if debugNATS {
ns.ConfigureLogger()
}
go ns.Start() go ns.Start()
if !ns.ReadyForConnections(5 * time.Second) { if !ns.ReadyForConnections(5 * time.Second) {
@@ -206,21 +162,6 @@ func applyClusterOpts(opts *server.Options, c *ClusterConfig) error {
Port: c.Port, Port: c.Port,
Username: c.Username, Username: c.Username,
Password: c.Password, Password: c.Password,
// Disable route connection pooling (nats-server 2.10+ defaults to a pool of
// 3 connections per peer). On a small cluster the pool churns with
// "duplicate route"/"client closed" reconnects that interrupt the meta-group
// RAFT heartbeats, causing perpetual leader re-elections so the JetStream
// meta never becomes current and stream/KV creation hangs (issue 0006g).
// PoolSize=-1 forces the classic single route per peer, which is stable for
// the 3-node unibus cluster.
PoolSize: -1,
// NoAdvertise stops the server from gossiping its locally-discovered IPs to
// peers. The cluster nodes are Docker hosts, so without this NATS advertises
// the docker bridge addresses (172.x / 10.0.x) as reachable routes; peers
// then try to dial those private, mutually-unreachable IPs, churning the
// route layer and destabilizing the JetStream meta-group. With NoAdvertise
// the nodes use ONLY the explicit public-IP routes we configure (issue 0006g).
NoAdvertise: true,
} }
if c.TLS != nil { if c.TLS != nil {
opts.Cluster.TLSConfig = c.TLS opts.Cluster.TLSConfig = c.TLS
-134
View File
@@ -1,134 +0,0 @@
package embeddednats
import (
"io"
"net"
"net/http"
"testing"
"time"
)
// TestNatsLogOptsDecoupled is the core regression guard for issue 0007: turning
// on the monitoring endpoint must NEVER turn on the verbose nats-server debug log
// (which would leak room subjects/routing metadata to journald). It also checks
// the backward-compatible coupling (debug still implies monitoring) and the quiet
// default.
func TestNatsLogOptsDecoupled(t *testing.T) {
cases := []struct {
name string
debugEnv, monitorEnv string
noLog, debug, trace, monitor bool
}{
{"default off — quiet, no monitor", "", "", true, false, false, false},
{"monitor only — endpoint on, log stays quiet", "", "1", true, false, false, true},
{"debug implies monitor", "1", "", false, true, false, true},
{"trace implies debug+monitor", "2", "", false, true, true, true},
{"both set", "1", "1", false, true, false, true},
{"monitor garbage value ignored", "", "yes", true, false, false, false},
{"debug garbage value ignored", "true", "", true, false, false, false},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
noLog, debug, trace, monitor := natsLogOpts(c.debugEnv, c.monitorEnv)
if noLog != c.noLog || debug != c.debug || trace != c.trace || monitor != c.monitor {
t.Fatalf("natsLogOpts(%q,%q) = (noLog=%v debug=%v trace=%v monitor=%v), want (noLog=%v debug=%v trace=%v monitor=%v)",
c.debugEnv, c.monitorEnv, noLog, debug, trace, monitor,
c.noLog, c.debug, c.trace, c.monitor)
}
})
}
// Explicit golden assertion of the security property: monitor on, log off.
noLog, debug, _, monitor := natsLogOpts("", "1")
if !monitor {
t.Fatal("UNIBUS_NATS_MONITOR=1 must open the monitoring endpoint")
}
if debug || !noLog {
t.Fatalf("UNIBUS_NATS_MONITOR=1 must NOT enable the debug log (got debug=%v noLog=%v)", debug, noLog)
}
}
// TestMonitorEndpointLoopback boots a real embedded server with
// UNIBUS_NATS_MONITOR=1 (and DEBUG explicitly off) and proves the monitoring HTTP
// endpoint answers on loopback only — the exact contract the metrics scraper
// relies on. The pure decoupling check above already guarantees the log stays out
// of debug mode for this same env combination.
func TestMonitorEndpointLoopback(t *testing.T) {
t.Setenv("UNIBUS_NATS_DEBUG", "")
t.Setenv("UNIBUS_NATS_MONITOR", "1")
ns, err := StartServer(ServerConfig{
StoreDir: t.TempDir(),
Host: "127.0.0.1",
Port: freeLoopbackPort(t),
})
if err != nil {
t.Fatalf("start server with monitoring: %v", err)
}
defer func() { ns.Shutdown(); ns.WaitForShutdown() }()
addr := ns.MonitorAddr()
if addr == nil {
t.Fatal("monitoring endpoint not open with UNIBUS_NATS_MONITOR=1 (MonitorAddr is nil)")
}
if !addr.IP.IsLoopback() {
t.Fatalf("monitoring endpoint bound to %s, must be loopback only", addr.IP)
}
if addr.Port != 8222 {
t.Fatalf("monitoring endpoint on port %d, want the fixed loopback port 8222", addr.Port)
}
// /varz must answer 200 with a non-empty body on loopback.
url := "http://" + addr.String() + "/varz"
var resp *http.Response
deadline := time.Now().Add(3 * time.Second)
for time.Now().Before(deadline) {
resp, err = http.Get(url) //nolint:gosec // loopback monitoring endpoint, no auth by design
if err == nil {
break
}
time.Sleep(50 * time.Millisecond)
}
if err != nil {
t.Fatalf("GET %s: %v", url, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Fatalf("GET %s -> %d, want 200", url, resp.StatusCode)
}
body, _ := io.ReadAll(resp.Body)
if len(body) == 0 {
t.Fatalf("GET %s returned an empty body", url)
}
}
// TestMonitorDisabledByDefault proves a server started without either toggle does
// NOT open the monitoring endpoint, so production stays closed unless opted in.
func TestMonitorDisabledByDefault(t *testing.T) {
t.Setenv("UNIBUS_NATS_DEBUG", "")
t.Setenv("UNIBUS_NATS_MONITOR", "")
ns, err := StartServer(ServerConfig{
StoreDir: t.TempDir(),
Host: "127.0.0.1",
Port: freeLoopbackPort(t),
})
if err != nil {
t.Fatalf("start server: %v", err)
}
defer func() { ns.Shutdown(); ns.WaitForShutdown() }()
if addr := ns.MonitorAddr(); addr != nil {
t.Fatalf("monitoring endpoint open (%s) without UNIBUS_NATS_MONITOR — must stay closed by default", addr)
}
}
func freeLoopbackPort(t *testing.T) int {
t.Helper()
l, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("free port: %v", err)
}
defer l.Close()
return l.Addr().(*net.TCPAddr).Port
}
+5 -28
View File
@@ -85,18 +85,8 @@ func OpenJetStream(js jetstream.JetStream, cfg JetStreamConfig) (Store, error) {
if opTimeout <= 0 { if opTimeout <= 0 {
opTimeout = defaultKVOpTime opTimeout = defaultKVOpTime
} }
// Bootstrap budget for creating/opening the buckets. On a single node JetStream ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
// is ready the instant the server starts, so the first attempt succeeds. On a defer cancel()
// COLD multi-node cluster the JetStream meta-group must first elect a leader and
// each node must establish contact with it before its $JS.API responds. A KV
// op is a NATS request/reply: if it is published before the node's JetStream is
// ready the request is dropped (not queued), and a single long-context call then
// just blocks until it times out (issue 0006g). So we RETRY each bucket op with
// short per-attempt contexts until it succeeds or the overall bootstrap budget
// is exhausted; once the cluster is ready the next retry lands and the buckets
// are created, after which they persist and every node opens them quickly.
bootstrapBudget := 120 * time.Second
deadline := time.Now().Add(bootstrapBudget)
s := &jetstreamStore{opTimeout: opTimeout} s := &jetstreamStore{opTimeout: opTimeout}
for _, b := range []struct { for _, b := range []struct {
@@ -109,27 +99,14 @@ func OpenJetStream(js jetstream.JetStream, cfg JetStreamConfig) (Store, error) {
{bucketRoomKeys, &s.keys}, {bucketRoomKeys, &s.keys},
{bucketUsers, &s.users}, {bucketUsers, &s.users},
} { } {
var kv jetstream.KeyValue kv, err := js.CreateOrUpdateKeyValue(ctx, jetstream.KeyValueConfig{
var lastErr error
for {
opCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
kv, lastErr = js.CreateOrUpdateKeyValue(opCtx, jetstream.KeyValueConfig{
Bucket: b.name, Bucket: b.name,
Replicas: cfg.Replicas, Replicas: cfg.Replicas,
History: 1, History: 1,
Storage: jetstream.FileStorage, Storage: jetstream.FileStorage,
}) })
cancel() if err != nil {
if lastErr == nil { return nil, fmt.Errorf("membership: open KV bucket %q (replicas=%d): %w", b.name, cfg.Replicas, err)
break
}
if time.Now().After(deadline) {
return nil, fmt.Errorf("membership: open KV bucket %q (replicas=%d) after %s: %w", b.name, cfg.Replicas, bootstrapBudget, lastErr)
}
// JetStream not ready yet (no meta leader / request dropped). Wait and
// re-publish the op; in a cluster cold start this lands once the meta
// group settles.
time.Sleep(1 * time.Second)
} }
*b.dst = kv *b.dst = kv
} }
+7 -173
View File
@@ -213,12 +213,9 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
writeErr(w, http.StatusUnauthorized, "unauthorized: "+err.Error()) writeErr(w, http.StatusUnauthorized, "unauthorized: "+err.Error())
return return
} }
// Carry the authenticated signer's endpoint AND signing key into the handler. // Carry the authenticated signer's endpoint into the handler so room handlers
// Room handlers authorize by membership via the endpoint (audit H3); the // can authorize by membership (audit H3). Only set on a verified identity.
// user-management handlers authorize by role via the signing key (the endpoint s.mux.ServeHTTP(w, r.WithContext(withSigner(r.Context(), res.endpoint)))
// id is a one-way hash of the key, so it cannot be reversed to look the signer
// up in the user allowlist). Both are set only on a verified identity.
s.mux.ServeHTTP(w, r.WithContext(withSigner(r.Context(), res.endpoint, res.pubHex)))
} }
// isBodyTooLarge reports whether err is the sentinel returned by MaxBytesReader // isBodyTooLarge reports whether err is the sentinel returned by MaxBytesReader
@@ -232,19 +229,11 @@ func isBodyTooLarge(err error) bool {
// values cannot collide with keys set by other packages. // values cannot collide with keys set by other packages.
type ctxKey int type ctxKey int
const ( const ctxSignerEndpoint ctxKey = iota
ctxSignerEndpoint ctxKey = iota
ctxSignerPub
)
// withSigner returns a context carrying the authenticated signer's endpoint id // withSigner returns a context carrying the authenticated signer's endpoint id.
// and signing public key (lowercase hex). The endpoint authorizes room func withSigner(ctx context.Context, endpoint string) context.Context {
// membership; the signing key authorizes user-management by role, because the return context.WithValue(ctx, ctxSignerEndpoint, endpoint)
// endpoint id is a one-way hash of the key (base64url(sha256(signPub))) and so
// cannot be reversed to look the signer up in the user allowlist.
func withSigner(ctx context.Context, endpoint, pubHex string) context.Context {
ctx = context.WithValue(ctx, ctxSignerEndpoint, endpoint)
return context.WithValue(ctx, ctxSignerPub, pubHex)
} }
// signerEndpoint returns the authenticated signer's endpoint id and whether one // signerEndpoint returns the authenticated signer's endpoint id and whether one
@@ -256,16 +245,6 @@ func signerEndpoint(r *http.Request) (string, bool) {
return v, ok && v != "" return v, ok && v != ""
} }
// signerPubHex returns the authenticated signer's signing public key (lowercase
// hex) and whether one is present. Like signerEndpoint it is absent under
// AuthOff and on a soft-mode pass-through; the user-management handlers treat
// that absence as "no admin identity" and deny (default-deny), since a
// privilege-granting operation must never run without a verified admin.
func signerPubHex(r *http.Request) (string, bool) {
v, ok := r.Context().Value(ctxSignerPub).(string)
return v, ok && v != ""
}
// requireMember authorizes a room request by membership (audit H3): it returns // requireMember authorizes a room request by membership (audit H3): it returns
// the signer endpoint and true when the request may proceed, or writes 403 and // the signer endpoint and true when the request may proceed, or writes 403 and
// returns false when an authenticated signer is not a member of roomID. When no // returns false when an authenticated signer is not a member of roomID. When no
@@ -283,31 +262,6 @@ func (s *Server) requireMember(w http.ResponseWriter, r *http.Request, roomID st
return signer, true return signer, true
} }
// requireAdmin authorizes a user-management request: it returns the signer's
// signing-key hex and true ONLY when the authenticated signer is a user with
// role admin and active status; otherwise it writes 403 and returns false.
//
// Default-deny, with no dev relaxation: unlike requireMember (which allows a
// request when no authenticated signer is present, preserving AuthOff/dev
// behavior for room reads), this denies whenever the signer is absent or is not
// a verified active admin. The user-management endpoints grant and revoke bus
// access, so they must never be reachable without a verified admin identity —
// the store is consulted on every call so a just-revoked admin is denied
// immediately, and any store error fails closed.
func (s *Server) requireAdmin(w http.ResponseWriter, r *http.Request) (string, bool) {
pubHex, ok := signerPubHex(r)
if !ok {
writeErr(w, http.StatusForbidden, "forbidden: admin role required")
return "", false
}
u, err := s.store.GetUser(pubHex)
if err != nil || u.Role != RoleAdmin || u.Status != StatusActive {
writeErr(w, http.StatusForbidden, "forbidden: admin role required")
return "", false
}
return pubHex, true
}
// isAuthExempt lists requests that bypass control-plane auth even under enforce. // isAuthExempt lists requests that bypass control-plane auth even under enforce.
// Only the unauthenticated health probe qualifies: it carries no data and is // Only the unauthenticated health probe qualifies: it carries no data and is
// needed by load balancers / smoke checks / systemd before any identity exists. // needed by load balancers / smoke checks / systemd before any identity exists.
@@ -326,13 +280,6 @@ func (s *Server) routes() {
s.mux.HandleFunc("GET /rooms/{id}", s.handleGetRoom) s.mux.HandleFunc("GET /rooms/{id}", s.handleGetRoom)
s.mux.HandleFunc("POST /blobs", s.handlePutBlob) s.mux.HandleFunc("POST /blobs", s.handlePutBlob)
s.mux.HandleFunc("GET /blobs/{hash}", s.handleGetBlob) s.mux.HandleFunc("GET /blobs/{hash}", s.handleGetBlob)
// User-management (admin-only) — the HTTP-signed equivalent of the local
// `membershipd user` CLI, so the admin panel manages the bus allowlist by
// signing as an admin instead of needing direct store/KV access. All three
// pass through requireAdmin; they hit the same store the room handlers do.
s.mux.HandleFunc("GET /users", s.handleListUsers)
s.mux.HandleFunc("POST /users", s.handleAddUser)
s.mux.HandleFunc("POST /users/{signpub}/revoke", s.handleRevokeUser)
} }
// ---- wire types ----------------------------------------------------------- // ---- wire types -----------------------------------------------------------
@@ -410,27 +357,6 @@ type blobResp struct {
Hash string `json:"hash"` Hash string `json:"hash"`
} }
// userJSON is the wire representation of a bus user on the admin endpoints. It
// carries the full record the panel needs to render the allowlist, including
// status (so revoked users are visible) and the timestamps. revoked_at is
// omitted for an active user.
type userJSON struct {
SignPub string `json:"sign_pub"`
Handle string `json:"handle"`
Role string `json:"role"`
Status string `json:"status"`
CreatedAt string `json:"created_at"`
RevokedAt string `json:"revoked_at,omitempty"`
}
// addUserReq is the POST /users body: the new user's Ed25519 signing key
// (64-hex), human handle, and role. role is optional and defaults to member.
type addUserReq struct {
SignPub string `json:"sign_pub"`
Handle string `json:"handle"`
Role string `json:"role"`
}
// ---- helpers -------------------------------------------------------------- // ---- helpers --------------------------------------------------------------
func writeJSON(w http.ResponseWriter, code int, v any) { func writeJSON(w http.ResponseWriter, code int, v any) {
@@ -748,95 +674,3 @@ func (s *Server) handleGetBlob(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK) w.WriteHeader(http.StatusOK)
_, _ = w.Write(data) _, _ = w.Write(data)
} }
// ---- user-management handlers (admin-only) --------------------------------
// handleListUsers returns the full bus allowlist, including revoked users, so an
// admin sees the complete picture (a revoked identity stays auditable). Admin-only.
func (s *Server) handleListUsers(w http.ResponseWriter, r *http.Request) {
if _, ok := s.requireAdmin(w, r); !ok {
return
}
users, err := s.store.ListUsers()
if err != nil {
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
out := make([]userJSON, 0, len(users))
for _, u := range users {
out = append(out, userJSON{
SignPub: u.SignPub,
Handle: u.Handle,
Role: u.Role,
Status: u.Status,
CreatedAt: u.CreatedAt,
RevokedAt: u.RevokedAt,
})
}
writeJSON(w, http.StatusOK, out)
}
// handleAddUser registers a new bus user from an admin-supplied Ed25519 signing
// key. It mirrors the `membershipd user add` CLI: the key must be 64-hex, the
// role must be admin or member (empty defaults to member), and re-adding an
// already-registered key is a 409 that leaves the existing row untouched — no
// silent upsert that could flip a role or clobber status. Admin-only.
func (s *Server) handleAddUser(w http.ResponseWriter, r *http.Request) {
if _, ok := s.requireAdmin(w, r); !ok {
return
}
var req addUserReq
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeErr(w, http.StatusBadRequest, "bad json: "+err.Error())
return
}
if req.SignPub == "" || req.Handle == "" {
writeErr(w, http.StatusBadRequest, "sign_pub and handle required")
return
}
if err := ValidateSignPubHex(req.SignPub); err != nil {
writeErr(w, http.StatusBadRequest, err.Error())
return
}
role := req.Role
if role == "" {
role = RoleMember
}
if role != RoleAdmin && role != RoleMember {
writeErr(w, http.StatusBadRequest,
fmt.Sprintf("invalid role %q (want %q or %q)", role, RoleAdmin, RoleMember))
return
}
if err := s.store.AddUser(req.SignPub, req.Handle, role); err != nil {
if errors.Is(err, ErrUserExists) {
// Idempotency contract (mirrors the CLI): re-adding a key is an explicit,
// non-destructive conflict. To replace a user, revoke then add again.
writeErr(w, http.StatusConflict,
"user already registered (unchanged); revoke it first to replace")
return
}
writeServerErr(w, r, http.StatusInternalServerError, "internal error", err)
return
}
writeJSON(w, http.StatusCreated, map[string]string{"status": "added"})
}
// handleRevokeUser revokes a bus user by signing key. Revocation is a status
// flip (no hard delete) so the identity stays auditable and IsAuthorized denies
// it on both planes immediately. Revoking an unknown or already-revoked key is a
// 404. Admin-only.
func (s *Server) handleRevokeUser(w http.ResponseWriter, r *http.Request) {
if _, ok := s.requireAdmin(w, r); !ok {
return
}
signPub := r.PathValue("signpub")
if err := ValidateSignPubHex(signPub); err != nil {
writeErr(w, http.StatusBadRequest, err.Error())
return
}
if err := s.store.RevokeUser(signPub); err != nil {
writeServerErr(w, r, http.StatusNotFound, "no active user with that key", err)
return
}
writeJSON(w, http.StatusOK, map[string]string{"status": "revoked"})
}
-18
View File
@@ -2,7 +2,6 @@ package membership
import ( import (
"database/sql" "database/sql"
"encoding/hex"
"errors" "errors"
"fmt" "fmt"
"strings" "strings"
@@ -36,23 +35,6 @@ type User struct {
RevokedAt string // empty unless revoked RevokedAt string // empty unless revoked
} }
// ValidateSignPubHex ensures signPub is exactly a 32-byte Ed25519 public key in
// hex (64 hex chars). It is the single source of truth for that check, shared by
// the local admin CLI (which validates before seeding the first admin) and the
// HTTP user-management handlers (which validate an admin-supplied key before it
// reaches the store). Catching a malformed key here turns a silent "authorized
// nobody" into an explicit error at the boundary.
func ValidateSignPubHex(signPub string) error {
b, err := hex.DecodeString(signPub)
if err != nil {
return fmt.Errorf("sign-pub is not valid hex: %w", err)
}
if len(b) != 32 {
return fmt.Errorf("sign-pub must be a 32-byte Ed25519 public key (64 hex chars), got %d bytes", len(b))
}
return nil
}
// normalizeSignPub lowercases the hex key so lookups are case-insensitive: the // normalizeSignPub lowercases the hex key so lookups are case-insensitive: the
// primary key is stored lowercase and every query normalizes its input the same // primary key is stored lowercase and every query normalizes its input the same
// way, so a caller passing uppercase hex still matches. // way, so a caller passing uppercase hex still matches.
-164
View File
@@ -1,164 +0,0 @@
package membership
import (
"encoding/hex"
"encoding/json"
"net/http"
"testing"
"time"
cs "fn-registry/functions/cybersecurity"
)
// signedJSON is signedReq for a JSON body: it marshals v and signs the request
// as id with a distinct nonce. It returns the response status and body, reusing
// the auth_test harness so these tests exercise the real signed wire contract.
func signedJSON(t *testing.T, h *authHarness, method, path string, v any, id cs.Identity, n int) (int, string) {
t.Helper()
var body []byte
if v != nil {
b, err := json.Marshal(v)
if err != nil {
t.Fatalf("marshal body: %v", err)
}
body = b
}
return do(t, signedReq(t, h.ts.URL, method, path, body, id, time.Now().Unix(), nonceN(n)))
}
// TestUsersHTTP_NonAdminForbidden is the security spine: a REGISTERED but
// non-admin signer (bob, role member) is denied on every user-management
// endpoint. His signature clears auth (he is in the allowlist), so each request
// reaches the handler, where requireAdmin returns 403 — default-deny by role.
func TestUsersHTTP_NonAdminForbidden(t *testing.T) {
h := newAuthHarness(t, AuthEnforce)
bob, _ := cs.GenerateIdentity()
register(t, h, bob, "bob") // role member (see register in authz_test.go)
bobPub := hex.EncodeToString(bob.SignPub)
victim, _ := cs.GenerateIdentity()
victimPub := hex.EncodeToString(victim.SignPub)
checks := []struct {
name string
method string
path string
body any
}{
{"list users", "GET", "/users", nil},
{"add user", "POST", "/users", addUserReq{SignPub: victimPub, Handle: "mallory", Role: RoleMember}},
{"revoke user", "POST", "/users/" + bobPub + "/revoke", nil},
}
for i, c := range checks {
code, body := signedJSON(t, h, c.method, c.path, c.body, bob, i+1)
if code != http.StatusForbidden {
t.Fatalf("non-admin %s should be 403, got %d (%s)", c.name, code, body)
}
}
}
// TestUsersHTTP_AdminRoundtrip exercises the golden path end to end: alice (the
// seeded admin) adds carol, sees her in the list as active, revokes her, then
// sees her status flip to revoked (no hard delete — she stays in the list).
func TestUsersHTTP_AdminRoundtrip(t *testing.T) {
h := newAuthHarness(t, AuthEnforce)
carol, _ := cs.GenerateIdentity()
carolPub := hex.EncodeToString(carol.SignPub)
// Add carol as a member.
if code, body := signedJSON(t, h, "POST", "/users",
addUserReq{SignPub: carolPub, Handle: "carol", Role: RoleMember}, h.alice, 1); code != http.StatusCreated {
t.Fatalf("admin add carol should be 201, got %d (%s)", code, body)
}
// List: carol present and active; alice (the seed admin) also present.
users := listUsers(t, h, 2)
carolRow, ok := findUser(users, carolPub)
if !ok {
t.Fatalf("carol missing from list after add: %+v", users)
}
if carolRow.Status != StatusActive || carolRow.Role != RoleMember || carolRow.Handle != "carol" {
t.Fatalf("carol row wrong after add: %+v", carolRow)
}
if _, ok := findUser(users, h.alicePub); !ok {
t.Fatalf("seeded admin alice missing from list: %+v", users)
}
// Revoke carol.
if code, body := signedJSON(t, h, "POST", "/users/"+carolPub+"/revoke", nil, h.alice, 3); code != http.StatusOK {
t.Fatalf("admin revoke carol should be 200, got %d (%s)", code, body)
}
// List again: carol still present, now revoked (status flip, not delete).
users = listUsers(t, h, 4)
carolRow, ok = findUser(users, carolPub)
if !ok {
t.Fatalf("carol vanished from list after revoke (should be a status flip): %+v", users)
}
if carolRow.Status != StatusRevoked {
t.Fatalf("carol should be revoked, got status %q", carolRow.Status)
}
}
// TestUsersHTTP_Validation covers the input-validation contract: a malformed hex
// key is 400, an unknown role is 400, and re-adding an already-registered key is
// 409 (the existing row is left untouched — no silent upsert).
func TestUsersHTTP_Validation(t *testing.T) {
h := newAuthHarness(t, AuthEnforce)
good, _ := cs.GenerateIdentity()
goodPub := hex.EncodeToString(good.SignPub)
// Invalid hex (too short) -> 400.
if code, body := signedJSON(t, h, "POST", "/users",
addUserReq{SignPub: "abcd", Handle: "shorty", Role: RoleMember}, h.alice, 1); code != http.StatusBadRequest {
t.Fatalf("malformed sign_pub should be 400, got %d (%s)", code, body)
}
// Invalid role -> 400.
if code, body := signedJSON(t, h, "POST", "/users",
addUserReq{SignPub: goodPub, Handle: "weirdrole", Role: "superuser"}, h.alice, 2); code != http.StatusBadRequest {
t.Fatalf("invalid role should be 400, got %d (%s)", code, body)
}
// Re-adding the seeded admin's own key -> 409 (idempotency, no overwrite).
if code, body := signedJSON(t, h, "POST", "/users",
addUserReq{SignPub: h.alicePub, Handle: "alice-again", Role: RoleMember}, h.alice, 3); code != http.StatusConflict {
t.Fatalf("re-adding an existing key should be 409, got %d (%s)", code, body)
}
// And the existing row is untouched: alice is still an active admin.
u, err := h.store.GetUser(h.alicePub)
if err != nil {
t.Fatalf("get alice after conflicting re-add: %v", err)
}
if u.Role != RoleAdmin || u.Status != StatusActive || u.Handle != "alice" {
t.Fatalf("conflicting re-add mutated the existing row: %+v", u)
}
}
// listUsers signs a GET /users as alice and decodes the response.
func listUsers(t *testing.T, h *authHarness, n int) []userJSON {
t.Helper()
code, body := signedJSON(t, h, "GET", "/users", nil, h.alice, n)
if code != http.StatusOK {
t.Fatalf("admin list users should be 200, got %d (%s)", code, body)
}
var users []userJSON
if err := json.Unmarshal([]byte(body), &users); err != nil {
t.Fatalf("decode users: %v (%s)", err, body)
}
return users
}
// findUser returns the row with the given signing key (case-insensitive).
func findUser(users []userJSON, signPub string) (userJSON, bool) {
want := normalizeSignPub(signPub)
for _, u := range users {
if normalizeSignPub(u.SignPub) == want {
return u, true
}
}
return userJSON{}, false
}