e7d59fd01d
The H1 fix bounds each request (1 MiB control / 16 MiB blob) and the per-IP rate
limiter throttles a single source, but neither bounds the AGGREGATE memory across
concurrent requests. The re-audit (report 0006, N2) drove RSS to ~1.42 GB with 40
concurrent 16 MiB uploads, and noted that a multi-IP (botnet) flood scales without
a ceiling because the rate limit is per-IP.
Fix: a global, non-blocking, byte-counting limiter (pkg/membership/inflight.go).
ServeHTTP reserves a POST's worst-case buffered size (its route ceiling) from the
limiter before reading the body, and releases it when the request finishes. When
the global cap (maxInflightBytes = 128 MiB) is reached, further POSTs are shed
with 503 (backpressure) rather than parking goroutines, so total bytes buffered
in flight stays bounded regardless of connection count or source-IP spread. GETs
carry no body and do not consume the budget.
The limiter is implemented inside unibus (not delegated to the fn-registry, where
a generic concurrency primitive would normally live) because functions/core pulls
transitive deps requiring CGO (mattn/go-sqlite3) and external modules that are
incompatible with unibus's CGO_ENABLED=0 build, and because this work is scoped
to the unibus sub-repo. The type/method comments document this.
Verification:
- pkg/membership/inflight_test.go: TestInflightLimiter{Basics,Disabled,Concurrent}
cover golden/edge/error/disabled/over-release and a -race concurrency invariant
(inFlight returns to 0, never exceeds cap).
- pkg/membership/dos_concurrency_test.go: TestReaudit_DoSConcurrency fires 40
concurrent 16 MiB uploads from distinct IPs (the multi-IP shape) against a 48 MiB
test cap -> 200=3 503=37, RSS delta ~93 MiB (bound 256 MiB), inFlight()==0, and a
fresh upload still 200. With the limiter disabled the test fails (200=40 503=0),
confirming it is a real regression guard.
- CGO_ENABLED=0 go build ./... && go vet ./... && go test -count=1 ./... green;
CGO_ENABLED=1 go test -race ./pkg/membership/ green.
Residual (documented): under enforce the body is buffered twice (auth verify +
handler), so real RSS is ~2x the reserved bytes; closing that fully means
streaming blobs to disk (overlaps H9 / issue 0002).
Refs: report 0006 N2, issue 0005c.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
149 lines
4.9 KiB
Go
149 lines
4.9 KiB
Go
package membership
|
|
|
|
import (
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// readRSSkBRaw reads VmRSS (kB) from /proc without a *testing.T, so it is safe to
|
|
// call from a sampling goroutine (vmRSSkB calls t.Skip, which may only run on the
|
|
// test's own goroutine). Returns 0 when unavailable.
|
|
func readRSSkBRaw() int64 {
|
|
b, err := os.ReadFile("/proc/self/status")
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
for _, line := range strings.Split(string(b), "\n") {
|
|
if strings.HasPrefix(line, "VmRSS:") {
|
|
f := strings.Fields(line)
|
|
if len(f) >= 2 {
|
|
v, _ := strconv.ParseInt(f[1], 10, 64)
|
|
return v
|
|
}
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// TestReaudit_DoSConcurrency ports the re-auditor's N2 (Medio-Alto) finding: the
|
|
// per-request body ceiling and the per-IP rate limit do not bound the AGGREGATE
|
|
// memory of many concurrent uploads. The auditor drove RSS to ~1.42 GB with 40
|
|
// concurrent 16 MiB blob uploads. With the global in-flight byte limiter, the
|
|
// number of simultaneously-buffered uploads is capped, so the resident set stays
|
|
// bounded regardless of how many connections arrive at once.
|
|
//
|
|
// Coverage:
|
|
// - golden: a normal upload succeeds, and the server is still healthy after the
|
|
// storm (the limiter did not wedge it);
|
|
// - edge : concurrency right at the cap is admitted;
|
|
// - error : a concurrent flood far past the cap sheds the excess with 503
|
|
// (backpressure) instead of buffering it all, and the RSS spike stays bounded
|
|
// and does NOT scale with the number of requests.
|
|
func TestReaudit_DoSConcurrency(t *testing.T) {
|
|
if runtime.GOOS != "linux" {
|
|
t.Skip("RSS probe is Linux-only")
|
|
}
|
|
srv := dosServer(t, AuthOff)
|
|
// Force a small aggregate cap so the bound is observable in a unit test: with
|
|
// a 16 MiB blob ceiling, 48 MiB admits ~3 concurrent uploads. Production uses
|
|
// maxInflightBytes (128 MiB); the mechanism under test is identical.
|
|
const cap = int64(48) << 20
|
|
srv.inflight = newInflightLimiter(cap)
|
|
|
|
const blob = maxBlobBytes // 16 MiB, the per-request ceiling
|
|
const n = 40 // the auditor's figure
|
|
|
|
// A spike bound: with the cap admitting ~3 concurrent 16 MiB uploads and a
|
|
// ~2x copy factor (auth buffer + handler buffer) plus Go runtime slack, the
|
|
// delta should stay well under this. Without the limiter, 40 concurrent
|
|
// uploads admitted at once would add hundreds of MB (the auditor saw ~1.4 GB).
|
|
const maxSpikeKB = int64(256) << 10 // 256 MiB
|
|
|
|
runtime.GC()
|
|
before := readRSSkBRaw()
|
|
|
|
// Sample peak RSS while the storm runs.
|
|
var peak int64
|
|
atomic.StoreInt64(&peak, before)
|
|
stop := make(chan struct{})
|
|
var sampler sync.WaitGroup
|
|
sampler.Add(1)
|
|
go func() {
|
|
defer sampler.Done()
|
|
for {
|
|
select {
|
|
case <-stop:
|
|
return
|
|
default:
|
|
if v := readRSSkBRaw(); v > atomic.LoadInt64(&peak) {
|
|
atomic.StoreInt64(&peak, v)
|
|
}
|
|
time.Sleep(2 * time.Millisecond)
|
|
}
|
|
}
|
|
}()
|
|
|
|
var got503, got200 int64
|
|
var wg sync.WaitGroup
|
|
for i := 0; i < n; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
req := httptest.NewRequest(http.MethodPost, "/blobs", &zeroReader{remaining: blob})
|
|
req.ContentLength = blob
|
|
// Distinct source IP per request: this is the multi-IP (botnet) shape the
|
|
// auditor flagged, where the per-IP rate limit gives no aggregate defense.
|
|
// The in-flight byte limiter is the global bound that must hold here.
|
|
req.RemoteAddr = "198.51.100." + strconv.Itoa(i%254+1) + ":1234"
|
|
rec := httptest.NewRecorder()
|
|
srv.ServeHTTP(rec, req)
|
|
switch rec.Code {
|
|
case http.StatusServiceUnavailable:
|
|
atomic.AddInt64(&got503, 1)
|
|
case http.StatusOK:
|
|
atomic.AddInt64(&got200, 1)
|
|
}
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
close(stop)
|
|
sampler.Wait()
|
|
|
|
runtime.GC()
|
|
delta := atomic.LoadInt64(&peak) - before
|
|
|
|
// Error path: the flood must have hit the cap and shed the excess with 503.
|
|
if got503 == 0 {
|
|
t.Fatalf("a concurrent flood of %d uploads past the cap should shed some with 503; got 200=%d 503=%d", n, got200, got503)
|
|
}
|
|
// The aggregate memory must stay bounded — not scale with n.
|
|
if delta > maxSpikeKB {
|
|
t.Fatalf("aggregate RSS spiked %d kB under %d concurrent uploads (bound %d kB): in-flight limiter not bounding memory", delta, n, maxSpikeKB)
|
|
}
|
|
// All reservations released after the storm.
|
|
if f := srv.inflight.inFlight(); f != 0 {
|
|
t.Fatalf("after the storm inFlight = %d, want 0 (reservations leaked)", f)
|
|
}
|
|
|
|
// Golden: the server is still healthy and serves a normal upload (from a fresh
|
|
// IP so the per-IP rate limiter, untouched here, is not what we measure).
|
|
rec := httptest.NewRecorder()
|
|
gReq := httptest.NewRequest(http.MethodPost, "/blobs", strings.NewReader("hello after storm"))
|
|
gReq.RemoteAddr = "203.0.113.9:9999"
|
|
srv.ServeHTTP(rec, gReq)
|
|
if rec.Code != http.StatusOK {
|
|
t.Fatalf("a normal upload after the storm should be 200, got %d (%s)", rec.Code, rec.Body.String())
|
|
}
|
|
|
|
t.Logf("N2 bound: %d uploads -> 200=%d 503=%d, RSS delta %d kB (bound %d kB), cap %d MiB",
|
|
n, got200, got503, delta, maxSpikeKB, cap>>20)
|
|
}
|