fix(crash): init data_table State.col_visible + col_order before render
Pressing Refresh Agents (or Test Connection — both trigger fetch + table re-render) crashed the app with Windows exit code 5 (access violation). Root cause: agents_tbl_state was default-constructed, so State.col_visible (std::vector<bool>) and State.col_order (std::vector<int>) were empty. render_grid_stage0 indexes them by column index up to N_COLS=11 without bounds checking → undefined behaviour → segfault on the first render after agents data populated. Fix: at first render of the agents panel, assign col_visible=true * N_COLS, fill col_order with [0..N_COLS), and ensure stages.size() >= 1. Same pattern tql_apply.cpp uses (col_visible.assign(eff_cols, true)). Diagnostic infra added (kept in place — minimal overhead): - FN_DBG macro: fprintf(stderr, ...) + fflush. Survives crashes that fn_log's buffered file output doesn't. - --auto-refresh CLI flag: triggers fetch_agents_async at frame 30, auto-exits at frame 180 (~3s @ 60Hz). Headless smoke for CI. - DBG breadcrumbs through main → load_apikey → fn::run_app → render → fetch_agents_async (thread enter/request/response/parse/exit) → render table (pre/post). Each step flushes stderr immediately. E2E regression guard: test_app_survives_auto_refresh_cycle. Runs the .exe with --auto-refresh, asserts exit 0, asserts the breadcrumb chain reaches both "fetch thread parsed" and "agents_panel POST-render" in stderr. 25 tests passing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -373,29 +373,47 @@ static std::vector<AgentRow> parse_agents(const std::string& body) {
|
||||
return rows;
|
||||
}
|
||||
|
||||
// FN_DBG: stderr + flush. Survives crashes (fn_log buffers).
|
||||
#define FN_DBG(...) do { fprintf(stderr, "[DBG] " __VA_ARGS__); fputc('\n', stderr); fflush(stderr); } while(0)
|
||||
|
||||
// Fetch agents in background thread
|
||||
static void fetch_agents_async(AppState& s) {
|
||||
if (s.fetching) return;
|
||||
FN_DBG("fetch_agents_async ENTER s.fetching=%d apikey_len=%zu",
|
||||
(int)s.fetching, strlen(s.apikey_buf));
|
||||
if (s.fetching) {
|
||||
FN_DBG("fetch_agents_async SKIP already fetching");
|
||||
return;
|
||||
}
|
||||
s.fetching = true;
|
||||
std::thread([&s]() {
|
||||
FN_DBG("fetch thread STARTED");
|
||||
fn_http::Request req;
|
||||
req.method = "GET";
|
||||
req.url = make_url(s, "/agents");
|
||||
req.bearer_token = s.apikey_buf;
|
||||
req.timeout_ms = 8000;
|
||||
FN_DBG("fetch thread requesting url=%s bearer_len=%zu", req.url.c_str(), req.bearer_token.size());
|
||||
auto res = fn_http::request(req);
|
||||
std::lock_guard<std::mutex> lk(s.agents_mu);
|
||||
if (!res.error.empty()) {
|
||||
s.agents_error = "Transport error: " + res.error;
|
||||
} else if (res.status != 200) {
|
||||
s.agents_error = "HTTP " + std::to_string(res.status);
|
||||
} else {
|
||||
s.agents = parse_agents(res.body);
|
||||
s.agents_error.clear();
|
||||
s.agents_fetched_ms = now_ms();
|
||||
FN_DBG("fetch thread response status=%d err=[%s] body_len=%zu",
|
||||
res.status, res.error.c_str(), res.body.size());
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(s.agents_mu);
|
||||
if (!res.error.empty()) {
|
||||
s.agents_error = "Transport error: " + res.error;
|
||||
} else if (res.status != 200) {
|
||||
s.agents_error = "HTTP " + std::to_string(res.status);
|
||||
} else {
|
||||
FN_DBG("fetch thread parsing body...");
|
||||
s.agents = parse_agents(res.body);
|
||||
s.agents_error.clear();
|
||||
s.agents_fetched_ms = now_ms();
|
||||
FN_DBG("fetch thread parsed %zu rows", s.agents.size());
|
||||
}
|
||||
}
|
||||
s.fetching = false;
|
||||
FN_DBG("fetch thread DONE");
|
||||
}).detach();
|
||||
FN_DBG("fetch_agents_async EXIT (thread detached)");
|
||||
}
|
||||
|
||||
// POST action to /agents/{id}/{action}
|
||||
@@ -484,6 +502,9 @@ static void start_status_sse(AppState& s) {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static bool g_self_test = false;
|
||||
static int g_auto_refresh_after_frames = 0; // >0: trigger fetch_agents_async after N frames
|
||||
static int g_auto_exit_after_frames = 0; // >0: exit after N frames (for headless test)
|
||||
static int g_frame_count = 0;
|
||||
|
||||
static bool run_self_test() {
|
||||
fn_log::log_info("[self-test] checking subsystems...");
|
||||
@@ -754,6 +775,20 @@ static void draw_agents_panel(AppState& s) {
|
||||
};
|
||||
static const int kSrcForEff[N_COLS] = { 0,1,2,3,4, 5,6,7,8,9,10 };
|
||||
|
||||
// First-render init for State: col_visible + col_order sized to N_COLS.
|
||||
// Without this, render_grid_stage0 indexes into empty std::vector<bool>
|
||||
// -> undefined behaviour -> Windows access-violation (exit 5).
|
||||
if ((int)s.agents_tbl_state.col_visible.size() != N_COLS) {
|
||||
s.agents_tbl_state.col_visible.assign(N_COLS, true);
|
||||
}
|
||||
if ((int)s.agents_tbl_state.col_order.size() != N_COLS) {
|
||||
s.agents_tbl_state.col_order.resize(N_COLS);
|
||||
for (int c = 0; c < N_COLS; ++c) s.agents_tbl_state.col_order[c] = c;
|
||||
}
|
||||
if ((int)s.agents_tbl_state.stages.size() < 1) {
|
||||
s.agents_tbl_state.stages.resize(1);
|
||||
}
|
||||
|
||||
// Build column specs (Badge for Status, Button for action columns)
|
||||
static data_table::TableInput main_t;
|
||||
if (main_t.column_specs.empty()) {
|
||||
@@ -851,12 +886,27 @@ static void draw_agents_panel(AppState& s) {
|
||||
|
||||
// Need at least 1 row for the API to be happy
|
||||
if (n_rows > 0) {
|
||||
static int dbg_first = 1;
|
||||
if (dbg_first) {
|
||||
FN_DBG("agents_panel PRE-render n_rows=%d cells=%zu specs=%zu eff_h=%p eff_t=%p src=%p vis_sz=%zu",
|
||||
n_rows, cells_ptr.size(), main_t.column_specs.size(),
|
||||
(void*)kHeaders, (void*)kTypes, (void*)kSrcForEff, visible_rows.size());
|
||||
for (int r = 0; r < n_rows && r < 2; ++r) {
|
||||
for (int c = 0; c < N_COLS; ++c) {
|
||||
const char* p = cells_ptr[r * N_COLS + c];
|
||||
FN_DBG(" cell[%d][%d]=%s", r, c, p ? p : "(null)");
|
||||
}
|
||||
}
|
||||
dbg_first = 0;
|
||||
}
|
||||
render_grid_stage0("##agents_tbl",
|
||||
s.agents_tbl_state,
|
||||
cells_ptr.empty() ? nullptr : cells_ptr.data(),
|
||||
n_rows, N_COLS, N_COLS,
|
||||
kHeaders, kTypes, kSrcForEff,
|
||||
visible_rows, main_t, &events);
|
||||
static int dbg_post = 1;
|
||||
if (dbg_post) { FN_DBG("agents_panel POST-render events=%zu", events.size()); dbg_post = 0; }
|
||||
} else {
|
||||
ImGui::TextDisabled("(no agents match filter)");
|
||||
}
|
||||
@@ -1040,6 +1090,20 @@ static void draw_status_feed_panel(AppState& s) {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void render() {
|
||||
g_frame_count++;
|
||||
if (g_frame_count <= 3) {
|
||||
FN_DBG("render frame=%d", g_frame_count);
|
||||
}
|
||||
|
||||
// Headless test: simulate Refresh button click after N frames
|
||||
if (g_auto_refresh_after_frames > 0 && g_frame_count == g_auto_refresh_after_frames) {
|
||||
FN_DBG("AUTO-REFRESH triggered at frame %d", g_frame_count);
|
||||
// Mark connected so the agents panel renders
|
||||
g_state.connected = true;
|
||||
// Simulate Connect-then-Refresh: populate base_url default, kick fetch.
|
||||
fetch_agents_async(g_state);
|
||||
}
|
||||
|
||||
draw_connection_panel(g_state);
|
||||
if (g_show_agents) draw_agents_panel(g_state);
|
||||
if (g_show_logs) draw_logs_panel(g_state);
|
||||
@@ -1052,6 +1116,12 @@ static void render() {
|
||||
fetch_agents_async(g_state);
|
||||
}
|
||||
}
|
||||
|
||||
// Headless test: exit after N frames
|
||||
if (g_auto_exit_after_frames > 0 && g_frame_count >= g_auto_exit_after_frames) {
|
||||
FN_DBG("AUTO-EXIT at frame %d", g_frame_count);
|
||||
std::exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -1149,6 +1219,7 @@ static int run_connect_test(const std::string& base_url) {
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
FN_DBG("main ENTER argc=%d", argc);
|
||||
// Self-test mode
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "--self-test") == 0) {
|
||||
@@ -1158,6 +1229,11 @@ int main(int argc, char** argv) {
|
||||
if (strcmp(argv[i], "--connect-test") == 0 && i + 1 < argc) {
|
||||
return run_connect_test(argv[i + 1]);
|
||||
}
|
||||
if (strcmp(argv[i], "--auto-refresh") == 0) {
|
||||
g_auto_refresh_after_frames = 30; // ~0.5s @ 60Hz
|
||||
g_auto_exit_after_frames = 180; // ~3s
|
||||
FN_DBG("auto-refresh mode enabled");
|
||||
}
|
||||
}
|
||||
|
||||
if (g_self_test) {
|
||||
@@ -1186,16 +1262,15 @@ int main(int argc, char** argv) {
|
||||
// Init DB and load saved base_url + apikey (env first, fallback to `pass agentes/api-key`).
|
||||
db_open(g_state);
|
||||
db_load_connection(g_state);
|
||||
FN_DBG("startup: db loaded base_url=%s", g_state.base_url);
|
||||
load_apikey(g_state);
|
||||
if (g_state.apikey_source == "missing") {
|
||||
fn_log::log_warn("[startup] apikey not found: AGENTS_API_KEY env empty and "
|
||||
"`pass agentes/api-key` failed. Check GPG agent is unlocked.");
|
||||
} else {
|
||||
fn_log::log_info("[startup] apikey loaded from %s", g_state.apikey_source.c_str());
|
||||
}
|
||||
FN_DBG("startup: apikey_source=%s apikey_len=%zu",
|
||||
g_state.apikey_source.c_str(), strlen(g_state.apikey_buf));
|
||||
|
||||
// Cleanup on exit
|
||||
FN_DBG("startup: calling fn::run_app");
|
||||
int ret = fn::run_app(cfg, render);
|
||||
FN_DBG("fn::run_app returned %d", ret);
|
||||
|
||||
// Persist state
|
||||
db_save_state(g_state, "log_autoscroll", g_state.log_autoscroll ? "1" : "0");
|
||||
|
||||
@@ -82,6 +82,41 @@ def test_connect_succeeds_with_valid_apikey():
|
||||
assert n > 0, f"expected at least 1 agent, got {n}"
|
||||
|
||||
|
||||
def test_app_survives_auto_refresh_cycle():
|
||||
"""Regression: app must NOT crash on Refresh Agents button click.
|
||||
|
||||
Bug history: v0.2 migration to data_table_cpp_viz left State.col_visible
|
||||
and State.col_order uninitialized — render_grid_stage0 indexed into empty
|
||||
std::vector<bool>, causing an access violation (Windows exit code 5).
|
||||
|
||||
The --auto-refresh CLI flag triggers fetch_agents_async + a full render
|
||||
cycle from a headless GLFW window, then exits at frame 180 (~3s @ 60Hz).
|
||||
Exit 0 means the agents panel rendered the live data without crashing.
|
||||
"""
|
||||
pass_check = subprocess.run(["pass", "agentes/api-key"],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
if pass_check.returncode != 0 or not pass_check.stdout.strip():
|
||||
pytest.skip("pass agentes/api-key not readable (GPG locked?)")
|
||||
|
||||
# WSL → Windows: launch the .exe and let it self-exit after 180 frames.
|
||||
r = subprocess.run(
|
||||
[str(_exe()), "--auto-refresh"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"app crashed (exit={r.returncode}); last stderr:\n"
|
||||
+ "\n".join(r.stderr.splitlines()[-20:])
|
||||
)
|
||||
# Sanity: stderr must show that fetch_agents reached the parse step.
|
||||
assert "fetch thread parsed" in r.stderr, (
|
||||
f"fetch never reached parse; stderr:\n{r.stderr[-1000:]}"
|
||||
)
|
||||
# Sanity: render must have completed at least once (POST-render logged).
|
||||
assert "agents_panel POST-render" in r.stderr, (
|
||||
f"render_grid_stage0 crashed before completing; stderr:\n{r.stderr[-1000:]}"
|
||||
)
|
||||
|
||||
|
||||
def test_connect_falls_back_to_pass_when_env_empty():
|
||||
"""When AGENTS_API_KEY env is empty, the .exe must fetch apikey via
|
||||
`wsl.exe pass agentes/api-key` (or `pass` on Linux). This is what makes
|
||||
|
||||
Reference in New Issue
Block a user