feat(viz): graph_force_layout_gpu compute + spatial hash (issue 0049h)

Layout force-directed en GPU usando 5 compute shaders 4.3 + spatial hash grid 64x64. API simetrica con graph_force_layout (CPU) para que el consumer pueda swappear sin cambios. atomicCompSwap loop para float-add portable. - cpp/functions/viz/graph_force_layout_gpu.{h,cpp,md}: nuevo modulo - cpp/functions/gfx/gl_loader: anade glDispatchCompute, glMemoryBarrier, glBindBufferBase, glGetBufferSubData (Windows wgl) - cpp/tests/test_graph_force_layout_gpu.cpp: smoke + pinned + CPU vs GPU. Crea ventana GLFW oculta GL 4.3; SKIP si headless o sin compute. - demos_graph: checkbox "GPU layout" para swappear CPU/GPU en runtime - issue movido a dev/issues/completed/
2026-04-29 23:29:16 +02:00
parent 982a9f9a2b
commit 35312ea66e
11 changed files with 1096 additions and 2 deletions
@@ -80,6 +80,22 @@ add_fn_test(test_graph_sources test_graph_sources.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_types.cpp)
 target_link_libraries(test_graph_sources PRIVATE SQLite::SQLite3)

+# --- Issue 0049h — graph_force_layout_gpu (compute + spatial hash) ----------
+# El test crea una ventana GLFW oculta a 4.3 core; si glfwInit/window/context
+# fallan (CI sin DISPLAY, Mesa sin compute), el test SKIPea. Linkamos contra
+# glfw + OpenGL para que se resuelvan los simbolos en cualquier caso.
+add_fn_test(test_graph_force_layout_gpu test_graph_force_layout_gpu.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_force_layout_gpu.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_force_layout.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_types.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/../functions/gfx/gl_loader.cpp)
+if(WIN32)
+    target_link_libraries(test_graph_force_layout_gpu PRIVATE glfw opengl32)
+else()
+    find_package(OpenGL REQUIRED)
+    target_link_libraries(test_graph_force_layout_gpu PRIVATE glfw OpenGL::GL)
+endif()
+
 # --- Issue 0049f — atlas de iconos Tabler para graph_renderer ---------------
 # graph_icons.cpp incluye gl_loader.h y referencia gl* — el atlas se puede
 # construir sin contexto via FN_GRAPH_ICONS_SKIP_GL=1 (set por el test), pero
@@ -0,0 +1,238 @@
+// Tests para `graph_force_layout_gpu` (issue 0049h).
+// El layout en GPU vive 100% en compute shaders, asi que no hay logica pura
+// que tester sin un contexto GL real. El test crea una ventana GLFW oculta a
+// 4.3 core; si la creacion falla (CI sin display, Mesa sin compute, etc.)
+// el test SKIPea con WARN para no bloquear CI.
+
+#define CATCH_CONFIG_MAIN
+#include "catch_amalgamated.hpp"
+
+#include "viz/graph_types.h"
+#include "viz/graph_force_layout.h"
+#include "viz/graph_force_layout_gpu.h"
+#include "gfx/gl_loader.h"
+
+#include <GLFW/glfw3.h>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+
+namespace {
+
+// RAII para inicializar GLFW + GL 4.3 hidden context. Si algo falla,
+// `ok==false` y el test SKIPea.
+struct GLContext {
+    GLFWwindow* win = nullptr;
+    bool ok = false;
+
+    GLContext() {
+        if (!glfwInit()) return;
+        glfwWindowHint(GLFW_VISIBLE,                GLFW_FALSE);
+        glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR,  4);
+        glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR,  3);
+        glfwWindowHint(GLFW_OPENGL_PROFILE,         GLFW_OPENGL_CORE_PROFILE);
+        glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT,  GLFW_TRUE);
+        win = glfwCreateWindow(64, 64, "fl_gpu_test", nullptr, nullptr);
+        if (!win) { glfwTerminate(); return; }
+        glfwMakeContextCurrent(win);
+        if (!fn::gfx::gl_loader_init()) { glfwDestroyWindow(win); glfwTerminate(); return; }
+        ok = true;
+    }
+    ~GLContext() {
+        if (win) glfwDestroyWindow(win);
+        glfwTerminate();
+    }
+};
+
+// Construye un grafo sintetico (anillo + cuerdas aleatorias) con N nodos y
+// E aristas. Usado en los tres tests.
+struct Graph {
+    std::vector<GraphNode> nodes;
+    std::vector<GraphEdge> edges;
+    GraphData data{};
+};
+
+Graph make_graph(int N, int extra_chords) {
+    Graph g;
+    g.nodes.reserve(N);
+    g.edges.reserve(N + extra_chords);
+    unsigned seed = 0xC0FFEEu;
+    auto rnd = [&]() {
+        seed = seed * 1664525u + 1013904223u;
+        return float((seed >> 8) & 0xFFFFFF) / float(1 << 24);
+    };
+    for (int i = 0; i < N; ++i) {
+        float angle = 6.2831853f * i / N;
+        GraphNode n = graph_node(80.0f * std::cos(angle) + (rnd() - 0.5f) * 5.0f,
+                                 80.0f * std::sin(angle) + (rnd() - 0.5f) * 5.0f);
+        g.nodes.push_back(n);
+    }
+    for (int i = 0; i < N; ++i) {
+        g.edges.push_back(graph_edge((uint32_t)i, (uint32_t)((i + 1) % N), 1.0f));
+    }
+    for (int k = 0; k < extra_chords; ++k) {
+        uint32_t a = uint32_t(rnd() * N);
+        uint32_t b = uint32_t(rnd() * N);
+        if (a == b) b = (b + 1) % N;
+        g.edges.push_back(graph_edge(a, b, 0.5f));
+    }
+    g.data.nodes = g.nodes.data();
+    g.data.node_count = (int)g.nodes.size();
+    g.data.node_capacity = (int)g.nodes.capacity();
+    g.data.edges = g.edges.data();
+    g.data.edge_count = (int)g.edges.size();
+    g.data.edge_capacity = (int)g.edges.capacity();
+    g.data.update_bounds();
+    return g;
+}
+
+float per_node_energy(float total, int N) {
+    return N > 0 ? total / (float)N : 0.0f;
+}
+
+} // namespace
+
+TEST_CASE("graph_force_layout_gpu — smoke + decreasing energy", "[graph_force_layout_gpu]") {
+    GLContext gl;
+    if (!gl.ok) {
+        WARN("No GL 4.3 context (CI/headless?). Skipping GPU layout test.");
+        SUCCEED("no GL context");
+        return;
+    }
+
+    auto g = make_graph(100, 100);
+
+    auto* ctx = graph_force_layout_gpu_create(g.data.node_count + 16,
+                                              g.data.edge_count + 16);
+    if (!ctx) {
+        WARN("Compute shaders no soportados por el driver. Skipping.");
+        SUCCEED("no compute support");
+        return;
+    }
+    REQUIRE(graph_force_layout_gpu_available());
+
+    graph_force_layout_gpu_upload(ctx, g.data);
+
+    ForceLayoutConfig cfg;
+    cfg.repulsion    = 200.0f;
+    cfg.attraction   = 0.05f;
+    cfg.damping      = 0.85f;
+    cfg.gravity      = 0.01f;
+    cfg.max_velocity = 20.0f;
+    cfg.iterations   = 1;
+
+    // Warmup + medicion ventana.
+    float e_warm = 0.0f;
+    for (int i = 0; i < 5; ++i) e_warm = graph_force_layout_gpu_step(ctx, cfg);
+
+    float e_after = e_warm;
+    for (int i = 0; i < 100; ++i) e_after = graph_force_layout_gpu_step(ctx, cfg);
+
+    // Tras 100 pasos la energia por nodo deberia ser <= a la del warmup.
+    // Comparamos por nodo para ser robustos al N concreto.
+    INFO("warm=" << e_warm << " after=" << e_after);
+    REQUIRE(per_node_energy(e_after, g.data.node_count) <=
+            per_node_energy(e_warm, g.data.node_count) + 1.0f);
+
+    graph_force_layout_gpu_readback(ctx, g.data);
+    // Ningun NaN tras readback.
+    for (const auto& n : g.nodes) {
+        REQUIRE(std::isfinite(n.x));
+        REQUIRE(std::isfinite(n.y));
+    }
+
+    graph_force_layout_gpu_destroy(ctx);
+}
+
+TEST_CASE("graph_force_layout_gpu — pinned nodes no se mueven", "[graph_force_layout_gpu]") {
+    GLContext gl;
+    if (!gl.ok) {
+        WARN("No GL 4.3 context. Skipping.");
+        SUCCEED("no GL context");
+        return;
+    }
+
+    auto g = make_graph(50, 30);
+    // Pinear nodo 0 en (0, 0)
+    g.nodes[0].x = 0.0f;
+    g.nodes[0].y = 0.0f;
+    g.nodes[0].vx = 0.0f;
+    g.nodes[0].vy = 0.0f;
+    g.nodes[0].flags |= NF_PINNED;
+
+    auto* ctx = graph_force_layout_gpu_create(g.data.node_count + 16,
+                                              g.data.edge_count + 16);
+    if (!ctx) {
+        WARN("No compute support. Skipping.");
+        SUCCEED("no compute");
+        return;
+    }
+    graph_force_layout_gpu_upload(ctx, g.data);
+
+    ForceLayoutConfig cfg;
+    cfg.repulsion = 500.0f;
+    cfg.attraction = 0.05f;
+    cfg.iterations = 1;
+
+    for (int i = 0; i < 100; ++i) graph_force_layout_gpu_step(ctx, cfg);
+    graph_force_layout_gpu_readback(ctx, g.data, /*include_velocities=*/true);
+
+    REQUIRE(g.nodes[0].x == Catch::Approx(0.0f).margin(1e-4));
+    REQUIRE(g.nodes[0].y == Catch::Approx(0.0f).margin(1e-4));
+    REQUIRE(g.nodes[0].vx == Catch::Approx(0.0f).margin(1e-4));
+    REQUIRE(g.nodes[0].vy == Catch::Approx(0.0f).margin(1e-4));
+
+    graph_force_layout_gpu_destroy(ctx);
+}
+
+TEST_CASE("graph_force_layout_gpu — CPU vs GPU (energia comparable)",
+          "[graph_force_layout_gpu]") {
+    GLContext gl;
+    if (!gl.ok) {
+        WARN("No GL 4.3 context. Skipping.");
+        SUCCEED("no GL context");
+        return;
+    }
+
+    // Mismo grafo en dos copias: una para CPU, otra para GPU.
+    auto g_cpu = make_graph(50, 60);
+    auto g_gpu = make_graph(50, 60);
+
+    auto* ctx = graph_force_layout_gpu_create(g_gpu.data.node_count + 16,
+                                              g_gpu.data.edge_count + 16);
+    if (!ctx) {
+        WARN("No compute support. Skipping.");
+        SUCCEED("no compute");
+        return;
+    }
+    graph_force_layout_gpu_upload(ctx, g_gpu.data);
+
+    ForceLayoutConfig cfg;
+    cfg.repulsion    = 300.0f;
+    cfg.attraction   = 0.03f;
+    cfg.damping      = 0.85f;
+    cfg.gravity      = 0.005f;
+    cfg.max_velocity = 20.0f;
+    cfg.iterations   = 1;
+
+    float e_cpu = 0.0f, e_gpu = 0.0f;
+    for (int i = 0; i < 80; ++i) {
+        e_cpu = graph_force_layout_step(g_cpu.data, cfg);
+        e_gpu = graph_force_layout_gpu_step(ctx, cfg);
+    }
+
+    INFO("e_cpu=" << e_cpu << "  e_gpu=" << e_gpu);
+
+    // No exigimos igualdad — Barnes-Hut (CPU) y spatial-hash (GPU) son
+    // aproximaciones distintas. Solo verificamos que ambas convergen al mismo
+    // orden de magnitud (factor 50x da margen para grafos pequenos donde la
+    // varianza es alta).
+    if (e_cpu > 1e-3f) {
+        float ratio = e_gpu / e_cpu;
+        REQUIRE(ratio > 0.001f);
+        REQUIRE(ratio < 50.0f);
+    }
+
+    graph_force_layout_gpu_destroy(ctx);
+}