1c9325104c
Add a dedicated UNIBUS_NATS_MONITOR=1 toggle that opens the embedded nats-server monitoring HTTP endpoint (127.0.0.1:8222, loopback only) so a local metrics scraper can read /varz, /connz and /jsz for server-level metrics (msgs/s, connections, KV bucket msgs, RAFT leader per stream, restarts). Previously the monitoring endpoint was only reachable via UNIBUS_NATS_DEBUG=1, which is coupled to the verbose nats-server debug log: enabling the endpoint also wrote routes/RAFT/room subjects to journald in clear, which regresses the hardened posture (issue 0007). The two concerns are now decoupled. The toggle computation is extracted to a pure function natsLogOpts(debugEnv, monitorEnv) (noLog, debug, trace, monitor): MONITOR=1 opens the endpoint while keeping the log quiet (NoLog true / Debug false). The inverse coupling is preserved for backward compatibility (DEBUG still implies MONITOR). The 127.0.0.1 bind stays hardcoded — the monitoring endpoint has no auth and must never be reachable from the network. Deploy wiring versioned: additive systemd drop-in membershipd-cluster.service.d/nats-monitor.conf (Environment=UNIBUS_NATS_MONITOR=1) plus a "NATS server metrics" section in the cluster README with the rolling activation runbook (magnus -> homer -> datardos) gated on R3 reconvergence (followers 2/2) between nodes. Tests: pure decoupling table (monitor on => log NOT debug; debug => monitor; default closed) + a real embedded server with MONITOR=1 asserting /varz answers 200 on loopback:8222, and a server without the flag with the endpoint closed. 100% additive: behavior is identical without the flag. Bump app.md 0.10.0 -> 0.11.0. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
135 lines
4.5 KiB
Go
135 lines
4.5 KiB
Go
package embeddednats
|
|
|
|
import (
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// TestNatsLogOptsDecoupled is the core regression guard for issue 0007: turning
|
|
// on the monitoring endpoint must NEVER turn on the verbose nats-server debug log
|
|
// (which would leak room subjects/routing metadata to journald). It also checks
|
|
// the backward-compatible coupling (debug still implies monitoring) and the quiet
|
|
// default.
|
|
func TestNatsLogOptsDecoupled(t *testing.T) {
|
|
cases := []struct {
|
|
name string
|
|
debugEnv, monitorEnv string
|
|
noLog, debug, trace, monitor bool
|
|
}{
|
|
{"default off — quiet, no monitor", "", "", true, false, false, false},
|
|
{"monitor only — endpoint on, log stays quiet", "", "1", true, false, false, true},
|
|
{"debug implies monitor", "1", "", false, true, false, true},
|
|
{"trace implies debug+monitor", "2", "", false, true, true, true},
|
|
{"both set", "1", "1", false, true, false, true},
|
|
{"monitor garbage value ignored", "", "yes", true, false, false, false},
|
|
{"debug garbage value ignored", "true", "", true, false, false, false},
|
|
}
|
|
for _, c := range cases {
|
|
t.Run(c.name, func(t *testing.T) {
|
|
noLog, debug, trace, monitor := natsLogOpts(c.debugEnv, c.monitorEnv)
|
|
if noLog != c.noLog || debug != c.debug || trace != c.trace || monitor != c.monitor {
|
|
t.Fatalf("natsLogOpts(%q,%q) = (noLog=%v debug=%v trace=%v monitor=%v), want (noLog=%v debug=%v trace=%v monitor=%v)",
|
|
c.debugEnv, c.monitorEnv, noLog, debug, trace, monitor,
|
|
c.noLog, c.debug, c.trace, c.monitor)
|
|
}
|
|
})
|
|
}
|
|
|
|
// Explicit golden assertion of the security property: monitor on, log off.
|
|
noLog, debug, _, monitor := natsLogOpts("", "1")
|
|
if !monitor {
|
|
t.Fatal("UNIBUS_NATS_MONITOR=1 must open the monitoring endpoint")
|
|
}
|
|
if debug || !noLog {
|
|
t.Fatalf("UNIBUS_NATS_MONITOR=1 must NOT enable the debug log (got debug=%v noLog=%v)", debug, noLog)
|
|
}
|
|
}
|
|
|
|
// TestMonitorEndpointLoopback boots a real embedded server with
|
|
// UNIBUS_NATS_MONITOR=1 (and DEBUG explicitly off) and proves the monitoring HTTP
|
|
// endpoint answers on loopback only — the exact contract the metrics scraper
|
|
// relies on. The pure decoupling check above already guarantees the log stays out
|
|
// of debug mode for this same env combination.
|
|
func TestMonitorEndpointLoopback(t *testing.T) {
|
|
t.Setenv("UNIBUS_NATS_DEBUG", "")
|
|
t.Setenv("UNIBUS_NATS_MONITOR", "1")
|
|
|
|
ns, err := StartServer(ServerConfig{
|
|
StoreDir: t.TempDir(),
|
|
Host: "127.0.0.1",
|
|
Port: freeLoopbackPort(t),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("start server with monitoring: %v", err)
|
|
}
|
|
defer func() { ns.Shutdown(); ns.WaitForShutdown() }()
|
|
|
|
addr := ns.MonitorAddr()
|
|
if addr == nil {
|
|
t.Fatal("monitoring endpoint not open with UNIBUS_NATS_MONITOR=1 (MonitorAddr is nil)")
|
|
}
|
|
if !addr.IP.IsLoopback() {
|
|
t.Fatalf("monitoring endpoint bound to %s, must be loopback only", addr.IP)
|
|
}
|
|
if addr.Port != 8222 {
|
|
t.Fatalf("monitoring endpoint on port %d, want the fixed loopback port 8222", addr.Port)
|
|
}
|
|
|
|
// /varz must answer 200 with a non-empty body on loopback.
|
|
url := "http://" + addr.String() + "/varz"
|
|
var resp *http.Response
|
|
deadline := time.Now().Add(3 * time.Second)
|
|
for time.Now().Before(deadline) {
|
|
resp, err = http.Get(url) //nolint:gosec // loopback monitoring endpoint, no auth by design
|
|
if err == nil {
|
|
break
|
|
}
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
if err != nil {
|
|
t.Fatalf("GET %s: %v", url, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Fatalf("GET %s -> %d, want 200", url, resp.StatusCode)
|
|
}
|
|
body, _ := io.ReadAll(resp.Body)
|
|
if len(body) == 0 {
|
|
t.Fatalf("GET %s returned an empty body", url)
|
|
}
|
|
}
|
|
|
|
// TestMonitorDisabledByDefault proves a server started without either toggle does
|
|
// NOT open the monitoring endpoint, so production stays closed unless opted in.
|
|
func TestMonitorDisabledByDefault(t *testing.T) {
|
|
t.Setenv("UNIBUS_NATS_DEBUG", "")
|
|
t.Setenv("UNIBUS_NATS_MONITOR", "")
|
|
|
|
ns, err := StartServer(ServerConfig{
|
|
StoreDir: t.TempDir(),
|
|
Host: "127.0.0.1",
|
|
Port: freeLoopbackPort(t),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("start server: %v", err)
|
|
}
|
|
defer func() { ns.Shutdown(); ns.WaitForShutdown() }()
|
|
|
|
if addr := ns.MonitorAddr(); addr != nil {
|
|
t.Fatalf("monitoring endpoint open (%s) without UNIBUS_NATS_MONITOR — must stay closed by default", addr)
|
|
}
|
|
}
|
|
|
|
func freeLoopbackPort(t *testing.T) int {
|
|
t.Helper()
|
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
|
if err != nil {
|
|
t.Fatalf("free port: %v", err)
|
|
}
|
|
defer l.Close()
|
|
return l.Addr().(*net.TCPAddr).Port
|
|
}
|