feat(viz): graph_force_layout_gpu compute + spatial hash (issue 0049h)

Layout force-directed en GPU usando 5 compute shaders 4.3 + spatial hash
grid 64x64. API simetrica con graph_force_layout (CPU) para que el consumer
pueda swappear sin cambios. atomicCompSwap loop para float-add portable.

- cpp/functions/viz/graph_force_layout_gpu.{h,cpp,md}: nuevo modulo
- cpp/functions/gfx/gl_loader: anade glDispatchCompute, glMemoryBarrier,
  glBindBufferBase, glGetBufferSubData (Windows wgl)
- cpp/tests/test_graph_force_layout_gpu.cpp: smoke + pinned + CPU vs GPU.
  Crea ventana GLFW oculta GL 4.3; SKIP si headless o sin compute.
- demos_graph: checkbox "GPU layout" para swappear CPU/GPU en runtime
- issue movido a dev/issues/completed/
This commit is contained in:
2026-04-29 23:29:16 +02:00
parent 982a9f9a2b
commit 35312ea66e
11 changed files with 1096 additions and 2 deletions
+45 -1
View File
@@ -4,6 +4,7 @@
#include "viz/graph_types.h"
#include "viz/graph_viewport.h"
#include "viz/graph_force_layout.h"
#include "viz/graph_force_layout_gpu.h"
#include "core/button.h"
#include "core/tokens.h"
@@ -137,6 +138,16 @@ void demo_graph() {
static bool s_initialized = false;
static bool s_needs_regen = true;
// GPU layout (issue 0049h): toggle CPU/GPU. ctx se crea perezosamente al
// primer frame en GPU mode; max_nodes/max_edges se dimensionan al maximo
// que ofrece el slider (1M nodos x 10 edges/nodo = 10M edges) — los SSBOs
// ocupan ~80 MB en ese tope, suficientemente barato para no
// recrear el ctx cada Regenerate. Si compute no esta disponible, el
// toggle queda deshabilitado.
static bool s_use_gpu = false;
static ForceLayoutGPU* s_gpu_ctx = nullptr;
static bool s_gpu_dirty = true; // re-upload tras regen / cambio
if (s_needs_regen) {
init_demo_types();
generate_synthetic_graph(s_n_nodes, s_n_clusters,
@@ -157,6 +168,7 @@ void demo_graph() {
s_state.layout_energy = 0.0f;
s_needs_regen = false;
s_initialized = true;
s_gpu_dirty = true;
}
section("Controls");
@@ -189,6 +201,18 @@ void demo_graph() {
if (button("Fit view", ButtonVariant::Subtle)) {
graph_viewport_fit(s_graph, s_state);
}
ImGui::SameLine();
// Toggle GPU layout. Si compute no esta disponible (Mesa software o
// driver < 4.3), deshabilitamos visualmente el checkbox.
bool prev_gpu = s_use_gpu;
if (s_gpu_ctx == nullptr && s_use_gpu == false) {
// primera oportunidad: intentar crear el ctx para detectar soporte.
// Lazy init solo si el usuario lo activa.
}
ImGui::Checkbox("GPU layout", &s_use_gpu);
if (s_use_gpu != prev_gpu) {
s_gpu_dirty = true; // re-upload al cambiar de modo
}
}
section("Stats");
@@ -234,7 +258,27 @@ void demo_graph() {
cfg.attraction = s_attraction;
cfg.gravity = s_gravity;
cfg.iterations = 1;
s_state.layout_energy = graph_force_layout_step(s_graph, cfg);
if (s_use_gpu) {
if (!s_gpu_ctx) {
s_gpu_ctx = graph_force_layout_gpu_create(s_graph.node_count + 1024,
s_graph.edge_count + 1024);
s_gpu_dirty = true;
}
if (s_gpu_ctx) {
if (s_gpu_dirty) {
graph_force_layout_gpu_upload(s_gpu_ctx, s_graph);
s_gpu_dirty = false;
}
s_state.layout_energy = graph_force_layout_gpu_step(s_gpu_ctx, cfg);
graph_force_layout_gpu_readback(s_gpu_ctx, s_graph, /*include_velocities=*/true);
} else {
// GPU no disponible: caer a CPU silenciosamente.
s_use_gpu = false;
s_state.layout_energy = graph_force_layout_step(s_graph, cfg);
}
} else {
s_state.layout_energy = graph_force_layout_step(s_graph, cfg);
}
const float per_node = s_graph.node_count > 0
? s_state.layout_energy / (float)s_graph.node_count