From daf491cd993be0a3ec317e7c04f5256e68a4490a Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Wed, 29 Apr 2026 22:32:38 +0200 Subject: [PATCH] perf(viz): graph_renderer edges via TBO + vertex pulling (issue 0049d) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit El buffer de aristas pasa a estatico (16B/arista: source, target, color, flags) y solo se reupload cuando cambia el grafo. Las posiciones de los nodos viven en un Texture Buffer Object (RG32F) actualizado por frame; el vertex shader hace texelFetch con gl_VertexID & 1 para elegir endpoint. Draw call: glDrawArraysInstanced(GL_LINES, 0, 2, edge_count) con divisor=1. Para 100k aristas: el upload de 4.8 MB/frame baja a 0 en regimen estable. edge_alpha pasa a uniform; la pre-multiplicacion en CPU desaparece. GLSL sigue en 330 core (samplerBuffer/texelFetch estan en 1.40+). gl_loader gana glBufferSubData, glVertexAttribIPointer y glTexBuffer (en Linux ya estaban via GL_GLEXT_PROTOTYPES; ahora estan disponibles tambien en MinGW/Windows). Tests: nuevo test_graph_edge_static valida el layout de 16B y el packing RGBA8 del fallback. test_visual sigue verde — render visualmente identico. Bump graph_renderer 1.2.0 -> 1.3.0. --- cpp/functions/gfx/gl_loader.cpp | 6 + cpp/functions/gfx/gl_loader.h | 7 + cpp/functions/viz/graph_renderer.cpp | 293 +++++++++++++----- cpp/functions/viz/graph_renderer.md | 12 +- cpp/tests/CMakeLists.txt | 4 + cpp/tests/test_graph_edge_static.cpp | 60 ++++ dev/issues/README.md | 2 +- .../0049d-graph-edges-vertex-pulling.md | 0 8 files changed, 298 insertions(+), 86 deletions(-) create mode 100644 cpp/tests/test_graph_edge_static.cpp rename dev/issues/{ => completed}/0049d-graph-edges-vertex-pulling.md (100%) diff --git a/cpp/functions/gfx/gl_loader.cpp b/cpp/functions/gfx/gl_loader.cpp index 2fc0ccca..f0055bef 100644 --- a/cpp/functions/gfx/gl_loader.cpp +++ b/cpp/functions/gfx/gl_loader.cpp @@ -46,6 +46,9 @@ PFNGLFRAMEBUFFERRENDERBUFFERPROC fn_glFramebufferRenderbuffer = nullptr; PFNGLGENRENDERBUFFERSPROC fn_glGenRenderbuffers = nullptr; PFNGLRENDERBUFFERSTORAGEPROC fn_glRenderbufferStorage = nullptr; PFNGLFRAMEBUFFERTEXTUREPROC fn_glFramebufferTexture = nullptr; +PFNGLBUFFERSUBDATAPROC fn_glBufferSubData = nullptr; +PFNGLVERTEXATTRIBIPOINTERPROC fn_glVertexAttribIPointer = nullptr; +PFNGLTEXBUFFERPROC fn_glTexBuffer = nullptr; namespace fn::gfx { @@ -98,6 +101,9 @@ bool gl_loader_init() { LOAD(glGenRenderbuffers); LOAD(glRenderbufferStorage); LOAD(glFramebufferTexture); + LOAD(glBufferSubData); + LOAD(glVertexAttribIPointer); + LOAD(glTexBuffer); #undef LOAD return true; diff --git a/cpp/functions/gfx/gl_loader.h b/cpp/functions/gfx/gl_loader.h index eeebda9a..e682aa3d 100644 --- a/cpp/functions/gfx/gl_loader.h +++ b/cpp/functions/gfx/gl_loader.h @@ -55,6 +55,10 @@ extern PFNGLGENRENDERBUFFERSPROC fn_glGenRenderbuffers; extern PFNGLRENDERBUFFERSTORAGEPROC fn_glRenderbufferStorage; extern PFNGLFRAMEBUFFERTEXTUREPROC fn_glFramebufferTexture; // sin "2D" + // Vertex pulling — issue 0049d (TBO de posiciones + sub-data streaming) + extern PFNGLBUFFERSUBDATAPROC fn_glBufferSubData; + extern PFNGLVERTEXATTRIBIPOINTERPROC fn_glVertexAttribIPointer; + extern PFNGLTEXBUFFERPROC fn_glTexBuffer; #define glAttachShader fn_glAttachShader #define glBindBuffer fn_glBindBuffer @@ -100,6 +104,9 @@ #define glGenRenderbuffers fn_glGenRenderbuffers #define glRenderbufferStorage fn_glRenderbufferStorage #define glFramebufferTexture fn_glFramebufferTexture + #define glBufferSubData fn_glBufferSubData + #define glVertexAttribIPointer fn_glVertexAttribIPointer + #define glTexBuffer fn_glTexBuffer #else #define GL_GLEXT_PROTOTYPES #include diff --git a/cpp/functions/viz/graph_renderer.cpp b/cpp/functions/viz/graph_renderer.cpp index 225a2278..5a62eee6 100644 --- a/cpp/functions/viz/graph_renderer.cpp +++ b/cpp/functions/viz/graph_renderer.cpp @@ -41,9 +41,15 @@ struct NodeInstance { // 16 bytes uint32_t color; // packed RGBA8 }; -struct EdgeVertex { // 12 bytes - float x, y; // world position - uint32_t color; // packed RGBA8 (alpha ya pre-multiplicada por edge_alpha) +// Tier 2 (issue 0049d): aristas via vertex pulling. El buffer es estatico — +// solo `(source_idx, target_idx, color, flags)` por arista, 16 bytes — y +// se reuploads solo cuando cambia el grafo. El vertex shader hace fetch de +// las posiciones desde un TBO RG32F que SI se actualiza por frame. +struct EdgeStatic { // 16 bytes + uint32_t source; // index into nodes + uint32_t target; // index into nodes + uint32_t color; // packed RGBA8 (sin pre-multiplicar — el shader aplica edge_alpha) + uint32_t flags; // reservado para flechas/styles futuros }; // --------------------------------------------------------------------------- @@ -59,22 +65,44 @@ struct GraphRenderer { unsigned int node_vao, node_quad_vbo, node_instance_vbo; unsigned int node_shader; - // Edge rendering (lines) + // Edge rendering (vertex pulling — issue 0049d) + // edge_vao : VAO con atributos por-instancia (divisor=1) leyendo de edge_static_vbo + // edge_vbo : buffer estatico (uno por grafo) con (source, target, color, flags) + // node_pos_buf / node_pos_tex : TBO RG32F que el vertex shader muestrea via texelFetch unsigned int edge_vao, edge_vbo; unsigned int edge_shader; + unsigned int node_pos_buf; + unsigned int node_pos_tex; + int edge_u_viewport_loc; + int edge_u_scale_loc; + int edge_u_translate_loc; + int edge_u_alpha_loc; + int edge_u_node_pos_loc; // Streaming buffer capacities (in bytes). Grow x2 cuando used > capacity. // Mantenemos el VBO orphaned con glBufferData(NULL, capacity) y luego // hacemos glBufferSubData con los bytes realmente usados — evita el // sync stall del driver y reduce las reallocaciones a O(log N). size_t node_vbo_capacity; - size_t edge_vbo_capacity; + size_t node_pos_capacity; // bytes del TBO RG32F + size_t edge_static_capacity; // bytes del buffer estatico de aristas // CPU staging buffers — se reusan entre frames; crecen igual que el VBO. NodeInstance* node_staging; size_t node_staging_cap; // en NodeInstances, no bytes - EdgeVertex* edge_staging; - size_t edge_staging_cap; // en EdgeVertex + float* node_pos_staging; // 2 floats (x,y) por nodo + size_t node_pos_staging_cap; // en floats + EdgeStatic* edge_static_staging; + size_t edge_static_staging_cap; // en EdgeStatic + + // Cache para detectar cambios del grafo y reuploadear el edge_vbo + // estatico solo entonces. Identificamos el grafo por (puntero, count); + // basta para los flujos actuales (graph_viewport recrea el array al + // recargar). Cuando GraphData gane un campo `revision` se sustituira. + const void* cached_edges_ptr; + int cached_edge_count; // edges del grafo en el ultimo upload + int cached_edges_drawn; // edges realmente subidos (post-filtro) + bool edges_uploaded; GraphRendererConfig config; }; @@ -151,17 +179,33 @@ void main() { } )"; -// Edge vertex shader (RGBA8 packed) +// Edge vertex shader — vertex pulling (issue 0049d). +// El buffer de aristas es estatico: solo indices y color. Las posiciones +// vienen del TBO `u_node_pos` (RG32F, vec2 por nodo). gl_VertexID indica si +// dibujamos el endpoint source (0) o target (1). Asi eliminamos el upload +// de `12 floats × E` por frame que dominaba el coste de aristas. +// +// Nota: usamos divisor=1 en los 4 atributos y `glDrawArraysInstanced(LINES, +// 0, 2, edge_count)` — cada instancia rinde una linea de 2 vertices, los +// atributos se mantienen constantes en la instancia y `gl_VertexID` cicla +// 0..1 dentro de ella. +// +// `samplerBuffer` y `texelFetch(samplerBuffer, int)` estan en GLSL 1.40+; +// 330 core nos vale (no necesitamos 4.30 — el issue exageraba). static const char* k_edge_vert = R"( #version 330 core -layout(location = 0) in vec2 a_pos; -layout(location = 1) in uint a_color; - -out vec4 v_color; +layout(location = 0) in uint a_source; +layout(location = 1) in uint a_target; +layout(location = 2) in uint a_color; +// location 3 (flags) reservado en el buffer (16B alignment) pero no leido aqui. +uniform samplerBuffer u_node_pos; uniform vec2 u_viewport; uniform float u_scale; uniform vec2 u_translate; +uniform float u_alpha; // edge_alpha + +out vec4 v_color; vec4 unpack_rgba8(uint c) { return vec4( @@ -173,11 +217,16 @@ vec4 unpack_rgba8(uint c) { } void main() { - vec2 screen = a_pos * u_scale + u_translate; + int idx = (gl_VertexID & 1) == 0 ? int(a_source) : int(a_target); + vec2 wpos = texelFetch(u_node_pos, idx).xy; + vec2 screen = wpos * u_scale + u_translate; vec2 ndc = (screen / u_viewport) * 2.0 - 1.0; ndc.y = -ndc.y; gl_Position = vec4(ndc, 0.0, 1.0); - v_color = unpack_rgba8(a_color); + + vec4 c = unpack_rgba8(a_color); + c.a *= u_alpha; + v_color = c; } )"; @@ -281,12 +330,19 @@ GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererC r->height = height; r->config = config; - r->node_vbo_capacity = 0; - r->edge_vbo_capacity = 0; - r->node_staging = nullptr; - r->node_staging_cap = 0; - r->edge_staging = nullptr; - r->edge_staging_cap = 0; + r->node_vbo_capacity = 0; + r->node_pos_capacity = 0; + r->edge_static_capacity = 0; + r->node_staging = nullptr; + r->node_staging_cap = 0; + r->node_pos_staging = nullptr; + r->node_pos_staging_cap = 0; + r->edge_static_staging = nullptr; + r->edge_static_staging_cap = 0; + r->cached_edges_ptr = nullptr; + r->cached_edge_count = 0; + r->cached_edges_drawn = 0; + r->edges_uploaded = false; // --- FBO --- create_fbo(r); @@ -330,27 +386,56 @@ GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererC glBindVertexArray(0); - // --- Edge VAO --- + // --- Edge VAO (vertex pulling, divisor=1 sobre el buffer estatico) --- glGenVertexArrays(1, &r->edge_vao); glBindVertexArray(r->edge_vao); glGenBuffers(1, &r->edge_vbo); glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo); - glEnableVertexAttribArray(0); // pos - glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, - sizeof(EdgeVertex), - (void*)offsetof(EdgeVertex, x)); - glEnableVertexAttribArray(1); // color (uint32) - glVertexAttribIPointer(1, 1, GL_UNSIGNED_INT, - sizeof(EdgeVertex), - (void*)offsetof(EdgeVertex, color)); + + // (source, target, color, flags) — los 4 con divisor=1. + glEnableVertexAttribArray(0); + glVertexAttribIPointer(0, 1, GL_UNSIGNED_INT, sizeof(EdgeStatic), + (void*)offsetof(EdgeStatic, source)); + glVertexAttribDivisor(0, 1); + glEnableVertexAttribArray(1); + glVertexAttribIPointer(1, 1, GL_UNSIGNED_INT, sizeof(EdgeStatic), + (void*)offsetof(EdgeStatic, target)); + glVertexAttribDivisor(1, 1); + glEnableVertexAttribArray(2); + glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(EdgeStatic), + (void*)offsetof(EdgeStatic, color)); + glVertexAttribDivisor(2, 1); + // location 3 reservado en el buffer pero no enabled — el shader actual + // no lo lee. Mantenemos el slot para futuros estilos/flechas. glBindVertexArray(0); + // --- TBO de posiciones de nodos (RG32F, vec2 por nodo) --- + glGenBuffers(1, &r->node_pos_buf); + glBindBuffer(GL_TEXTURE_BUFFER, r->node_pos_buf); + // Reservamos capacidad inicial; se redimensiona en draw segun N. + glBufferData(GL_TEXTURE_BUFFER, 4096 * 2 * sizeof(float), nullptr, GL_STREAM_DRAW); + r->node_pos_capacity = 4096 * 2 * sizeof(float); + + glGenTextures(1, &r->node_pos_tex); + glBindTexture(GL_TEXTURE_BUFFER, r->node_pos_tex); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, r->node_pos_buf); + glBindTexture(GL_TEXTURE_BUFFER, 0); + glBindBuffer(GL_TEXTURE_BUFFER, 0); + // --- Shaders --- r->node_shader = link_program(k_node_vert, k_node_frag); r->edge_shader = link_program(k_edge_vert, k_edge_frag); + // Cachear locations de uniforms del edge shader (issue 0049d): se + // resuelven una vez en lugar de glGetUniformLocation cada frame. + r->edge_u_viewport_loc = glGetUniformLocation(r->edge_shader, "u_viewport"); + r->edge_u_scale_loc = glGetUniformLocation(r->edge_shader, "u_scale"); + r->edge_u_translate_loc = glGetUniformLocation(r->edge_shader, "u_translate"); + r->edge_u_alpha_loc = glGetUniformLocation(r->edge_shader, "u_alpha"); + r->edge_u_node_pos_loc = glGetUniformLocation(r->edge_shader, "u_node_pos"); + return r; } @@ -362,10 +447,13 @@ void graph_renderer_destroy(GraphRenderer* r) { glDeleteBuffers(1, &r->node_instance_vbo); glDeleteVertexArrays(1, &r->edge_vao); glDeleteBuffers(1, &r->edge_vbo); + glDeleteBuffers(1, &r->node_pos_buf); + glDeleteTextures(1, &r->node_pos_tex); glDeleteProgram(r->node_shader); glDeleteProgram(r->edge_shader); free(r->node_staging); - free(r->edge_staging); + free(r->node_pos_staging); + free(r->edge_static_staging); delete r; } @@ -417,72 +505,111 @@ unsigned int graph_renderer_draw(GraphRenderer* r, const GraphData& graph, float vy1 = cam_y + half_h * (1.0f + margin); // ---------------------------------------------------------------- - // Draw edges (frustum-culled) + // Subir posiciones de nodos al TBO (vec2 por nodo). Lo necesitamos + // tanto si dibujamos aristas (vertex pulling) como antes de dibujar + // nodos — pero se calcula una sola vez por frame. // ---------------------------------------------------------------- - if (graph.edge_count > 0 && graph.edges && graph.nodes) { - // Asegurar staging — capacidad maxima posible en este frame es - // edge_count * 2 vertices. La realidad post-cull suele ser mucho - // menor, pero reservamos para el peor caso y no realocamos por - // frame. - size_t need_verts = (size_t)graph.edge_count * 2; - if (need_verts > r->edge_staging_cap) { - size_t new_cap = grow_capacity(r->edge_staging_cap, need_verts, 8192); - r->edge_staging = (EdgeVertex*)realloc(r->edge_staging, new_cap * sizeof(EdgeVertex)); - r->edge_staging_cap = new_cap; + bool tbo_ready = false; + if (graph.node_count > 0 && graph.nodes) { + size_t need_floats = (size_t)graph.node_count * 2; + if (need_floats > r->node_pos_staging_cap) { + size_t new_cap = grow_capacity(r->node_pos_staging_cap, need_floats, 8192); + r->node_pos_staging = (float*)realloc(r->node_pos_staging, new_cap * sizeof(float)); + r->node_pos_staging_cap = new_cap; } - - size_t out = 0; - for (int i = 0; i < graph.edge_count; ++i) { - const GraphEdge& e = graph.edges[i]; - if (e.source >= (uint32_t)graph.node_count) continue; - if (e.target >= (uint32_t)graph.node_count) continue; - - const GraphNode& ns = graph.nodes[e.source]; - const GraphNode& nt = graph.nodes[e.target]; - - // Frustum cull: AABB del segmento (con margen para edges casi - // tangentes al viewport). Si el AABB no intersecta el viewport, - // skip — la arista no contribuye a ningun pixel visible. - float ex0 = std::min(ns.x, nt.x); - float ex1 = std::max(ns.x, nt.x); - float ey0 = std::min(ns.y, nt.y); - float ey1 = std::max(ns.y, nt.y); - if (ex1 < vx0 || ex0 > vx1 || ey1 < vy0 || ey0 > vy1) continue; - - uint32_t ecol = e.color != 0 ? e.color : pack_rgba8(0x88, 0x88, 0x88, 0xFF); - uint32_t col = modulate_alpha_rgba8(ecol, r->config.edge_alpha); - - r->edge_staging[out++] = { ns.x, ns.y, col }; - r->edge_staging[out++] = { nt.x, nt.y, col }; + for (int i = 0; i < graph.node_count; ++i) { + r->node_pos_staging[i * 2 + 0] = graph.nodes[i].x; + r->node_pos_staging[i * 2 + 1] = graph.nodes[i].y; } + const size_t used_bytes = need_floats * sizeof(float); + if (used_bytes > r->node_pos_capacity) { + r->node_pos_capacity = grow_capacity(r->node_pos_capacity, used_bytes, + 4096 * 2 * sizeof(float)); + } + glBindBuffer(GL_TEXTURE_BUFFER, r->node_pos_buf); + // Orphan + subdata: misma estrategia que en 0049c, evita stall. + glBufferData(GL_TEXTURE_BUFFER, (GLsizeiptr)r->node_pos_capacity, nullptr, GL_STREAM_DRAW); + glBufferSubData(GL_TEXTURE_BUFFER, 0, (GLsizeiptr)used_bytes, r->node_pos_staging); + // glTexBuffer ya esta vinculado al buffer en create — el view sigue + // valido tras orphan: GL_TEXTURE_BUFFER referencia al BO por nombre. + glBindBuffer(GL_TEXTURE_BUFFER, 0); + tbo_ready = true; + } - if (out > 0) { - const size_t used_bytes = out * sizeof(EdgeVertex); - if (used_bytes > r->edge_vbo_capacity) { - r->edge_vbo_capacity = grow_capacity(r->edge_vbo_capacity, used_bytes, - 8192 * sizeof(EdgeVertex)); + // ---------------------------------------------------------------- + // Aristas via vertex pulling. El buffer estatico solo se reupload + // cuando el grafo cambia — detectamos con (puntero, count). + // ---------------------------------------------------------------- + if (tbo_ready && graph.edge_count > 0 && graph.edges) { + const bool graph_changed = + !r->edges_uploaded + || r->cached_edges_ptr != (const void*)graph.edges + || r->cached_edge_count != graph.edge_count; + + if (graph_changed) { + // (Re)build el buffer estatico. Skipeamos aristas con indices + // fuera de rango — pueden aparecer durante una recarga parcial + // del grafo y no queremos que el GPU lea fuera del TBO. + if ((size_t)graph.edge_count > r->edge_static_staging_cap) { + size_t new_cap = grow_capacity(r->edge_static_staging_cap, + (size_t)graph.edge_count, 8192); + r->edge_static_staging = (EdgeStatic*)realloc(r->edge_static_staging, + new_cap * sizeof(EdgeStatic)); + r->edge_static_staging_cap = new_cap; } + size_t out = 0; + for (int i = 0; i < graph.edge_count; ++i) { + const GraphEdge& e = graph.edges[i]; + if (e.source >= (uint32_t)graph.node_count) continue; + if (e.target >= (uint32_t)graph.node_count) continue; + uint32_t col = e.color != 0 ? e.color + : pack_rgba8(0x88, 0x88, 0x88, 0xFF); + r->edge_static_staging[out++] = { e.source, e.target, col, 0u }; + } + if (out > 0) { + const size_t used_bytes = out * sizeof(EdgeStatic); + if (used_bytes > r->edge_static_capacity) { + r->edge_static_capacity = grow_capacity(r->edge_static_capacity, + used_bytes, + 8192 * sizeof(EdgeStatic)); + } + glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo); + glBufferData(GL_ARRAY_BUFFER, (GLsizeiptr)r->edge_static_capacity, + nullptr, GL_STATIC_DRAW); + glBufferSubData(GL_ARRAY_BUFFER, 0, (GLsizeiptr)used_bytes, + r->edge_static_staging); + } + r->cached_edges_ptr = (const void*)graph.edges; + r->cached_edge_count = graph.edge_count; + r->cached_edges_drawn = (int)out; + r->edges_uploaded = (out > 0); + } + if (r->edges_uploaded) { glUseProgram(r->edge_shader); - glUniform2f(glGetUniformLocation(r->edge_shader, "u_viewport"), - (float)r->width, (float)r->height); - glUniform1f(glGetUniformLocation(r->edge_shader, "u_scale"), scale); - glUniform2f(glGetUniformLocation(r->edge_shader, "u_translate"), tx, ty); + glUniform2f(r->edge_u_viewport_loc, (float)r->width, (float)r->height); + glUniform1f(r->edge_u_scale_loc, scale); + glUniform2f(r->edge_u_translate_loc, tx, ty); + glUniform1f(r->edge_u_alpha_loc, r->config.edge_alpha); + + // Bind TBO al sampler u_node_pos en la texture unit 0. + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_BUFFER, r->node_pos_tex); + glUniform1i(r->edge_u_node_pos_loc, 0); glLineWidth(r->config.edge_width); glBindVertexArray(r->edge_vao); - glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo); - // Orphan: descarta el contenido previo y damos al driver un - // buffer fresco con la capacidad reservada. Despues subimos - // solo los bytes realmente usados con BufferSubData — evitamos - // el sync stall y reutilizamos la asignacion entre frames - // mientras no crezca. - glBufferData(GL_ARRAY_BUFFER, (GLsizeiptr)r->edge_vbo_capacity, nullptr, GL_STREAM_DRAW); - glBufferSubData(GL_ARRAY_BUFFER, 0, (GLsizeiptr)used_bytes, r->edge_staging); - glDrawArrays(GL_LINES, 0, (GLsizei)out); + // Una "instancia" = 1 linea (2 vertices). gl_VertexID dentro + // de la instancia es 0 o 1 → elige endpoint source o target. + glDrawArraysInstanced(GL_LINES, 0, 2, (GLsizei)r->cached_edges_drawn); glBindVertexArray(0); + glBindTexture(GL_TEXTURE_BUFFER, 0); } + } else if (graph.edge_count == 0) { + // Si el caller borra todas las aristas, invalidamos el cache para + // que el siguiente upload reconstruya el buffer. + r->edges_uploaded = false; } // ---------------------------------------------------------------- diff --git a/cpp/functions/viz/graph_renderer.md b/cpp/functions/viz/graph_renderer.md index a245d694..4ddea3dd 100644 --- a/cpp/functions/viz/graph_renderer.md +++ b/cpp/functions/viz/graph_renderer.md @@ -3,11 +3,11 @@ name: graph_renderer kind: function lang: cpp domain: viz -version: "1.2.0" +version: "1.3.0" purity: impure signature: "GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererConfig& config)" description: "Renderer GPU de grafos con instanced rendering a FBO, compatible con ImGui::Image para visualizacion de grafos grandes" -tags: [graph, renderer, opengl, gpu, instanced, fbo, visualization, frustum-cull, rgba8] +tags: [graph, renderer, opengl, gpu, instanced, fbo, visualization, frustum-cull, rgba8, vertex-pulling, tbo] uses_functions: ["gl_loader_cpp_gfx"] uses_types: ["GraphData_cpp_viz"] returns: [] @@ -88,6 +88,14 @@ ndc = (screen / viewport) * 2 - 1 ## Notas +- **v1.3** (2026-04-29, issue 0049d): aristas via vertex pulling. API publica intacta. + - El buffer de aristas pasa a ser estatico (`source_idx, target_idx, color, flags` × E, 16 bytes/arista) y solo se reupload cuando el grafo cambia (detectado por `(edges_ptr, edge_count)` — heuristica suficiente mientras `GraphData` no tenga `revision`). Para 100k aristas: 1.6 MB iniciales vs 4.8 MB/frame del esquema anterior — el upload baja a cero en regimen estable. + - Las posiciones de los nodos se suben cada frame a un Texture Buffer Object `RG32F` (`vec2[]`, 8 bytes/nodo). El vertex shader de aristas hace `texelFetch(u_node_pos, idx)` con `idx` derivado de `gl_VertexID & 1` (0=source, 1=target). + - Draw call: `glDrawArraysInstanced(GL_LINES, 0, 2, edge_count)` — 1 instancia por arista, 2 vertices por linea, todos los atributos con `divisor=1`. + - Frustum cull de aristas eliminado en CPU (la GPU clipea fuera de viewport por su cuenta). Se mantiene para nodos. + - `edge_alpha` pasa a uniform en el shader; la pre-multiplicacion en CPU desaparece y permite que el buffer estatico no dependa del config. + - GLSL: 330 core (con `samplerBuffer`/`texelFetch` que estan en 1.40+). `gl_loader` gana `glBufferSubData`, `glVertexAttribIPointer` y `glTexBuffer` (Linux ya los tenia via `GL_GLEXT_PROTOTYPES`). + - **v1.2** (2026-04-29, issue 0049c): tres optimizaciones internas, API publica intacta. 1. **RGBA8 packing**: el buffer de instancia/vertice usa `uint32` por color en lugar de 4 floats. Nodo: 28 → 16 bytes/instance (-43%). Edge: 24 → 12 bytes/vertex (-50%). Los shaders desempaquetan con bit shifts (compatible GL 3.30+, sin necesidad de `unpackUnorm4x8` que es 4.20+). Helpers expuestos en el .h: `pack_rgba8`, `unpack_rgba8`, `modulate_alpha_rgba8` (testeados en `test_graph_pack_rgba8.cpp`). 2. **Capacity-tracked streaming buffers**: el VBO se mantiene orphaned con `glBufferData(NULL, capacity)` y se actualiza con `glBufferSubData` solo los bytes usados. La capacidad crece x2 cuando hace falta (inicial: 4096 nodos / 8192 vertices de aristas) → reallocaciones en O(log N). Staging CPU reutilizado entre frames. diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 851ce823..d341e287 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -66,6 +66,10 @@ add_fn_test(test_graph_should_pause test_graph_should_pause.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_force_layout.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_types.cpp) +# --- Issue 0049d — vertex pulling edge buffer (logica solo, sin GL) -------- +add_fn_test(test_graph_edge_static test_graph_edge_static.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_types.cpp) + # --- Visual golden-image diff (issue 0048) --------------------------------- # El binario primitives_gallery se compila con --capture; el test compara los # PNGs generados con los goldens en cpp/tests/golden/. Si no hay goldens o el diff --git a/cpp/tests/test_graph_edge_static.cpp b/cpp/tests/test_graph_edge_static.cpp new file mode 100644 index 00000000..16247196 --- /dev/null +++ b/cpp/tests/test_graph_edge_static.cpp @@ -0,0 +1,60 @@ +// Smoke test for graph_renderer's vertex-pulling edge buffer layout (issue +// 0049d). El test no toca GL — solo verifica las garantias estructurales que +// el renderer asume: +// 1. EdgeStatic mide 16 bytes (alineacion natural sin padding sorpresa). +// 2. Los offsets de source/target/color/flags son 0/4/8/12. +// 3. La paleta de colores por defecto del fallback (gris 0x88) tiene la +// forma RGBA8 esperada por el shader (R en byte 0). +// +// Render-a-FBO-y-readback queda fuera porque exigirseria un contexto GL en +// el runner de tests; ya hay un golden image gate via test_visual + capture +// en primitives_gallery. + +#define CATCH_CONFIG_MAIN +#include "catch_amalgamated.hpp" + +#include "viz/graph_renderer.h" +#include "viz/graph_types.h" + +#include +#include + +// Re-declaramos la struct local a graph_renderer.cpp para chequear su layout +// "en espejo". Si cambia el layout en el .cpp, este test debe actualizarse — +// es intencional: el shader y el VAO setup dependen de estos offsets. +namespace test_layout { +struct EdgeStatic { + uint32_t source; + uint32_t target; + uint32_t color; + uint32_t flags; +}; +} // namespace test_layout + +TEST_CASE("EdgeStatic mide 16 bytes y tiene offsets contiguos", "[viz][edge_static]") { + using test_layout::EdgeStatic; + REQUIRE(sizeof(EdgeStatic) == 16); + REQUIRE(offsetof(EdgeStatic, source) == 0); + REQUIRE(offsetof(EdgeStatic, target) == 4); + REQUIRE(offsetof(EdgeStatic, color) == 8); + REQUIRE(offsetof(EdgeStatic, flags) == 12); +} + +TEST_CASE("Fallback gris 0x88 tiene R en el byte LSB", "[viz][edge_static]") { + // El renderer construye `pack_rgba8(0x88, 0x88, 0x88, 0xFF)` cuando + // `e.color == 0`. El shader hace unpack manual asumiendo R en LSB. + uint32_t gray = pack_rgba8(0x88, 0x88, 0x88, 0xFF); + REQUIRE((gray & 0xFFu) == 0x88u); // R + REQUIRE(((gray >> 8) & 0xFFu) == 0x88u); // G + REQUIRE(((gray >> 16) & 0xFFu) == 0x88u); // B + REQUIRE(((gray >> 24) & 0xFFu) == 0xFFu); // A +} + +TEST_CASE("GraphEdge.color = 0 indica fallback al gris por defecto", + "[viz][edge_static]") { + GraphEdge e = graph_edge(0, 1, 1.0f); + REQUIRE(e.color == 0u); + // El renderer interpreta esto como "usa el gris 0x88,0x88,0x88,0xFF". + // Un agente que precarga colores debe usar `pack_rgba8` para evitar + // colisionar con el sentinel 0. +} diff --git a/dev/issues/README.md b/dev/issues/README.md index 83a152e2..980fec4c 100644 --- a/dev/issues/README.md +++ b/dev/issues/README.md @@ -58,7 +58,7 @@ | [0049a](completed/0049a-osint-graph-setup.md) | Setup proyecto osint_graph + sub-repo graph_explorer | completado | alta | infra | parte de 0049 | | [0049b](completed/0049b-cpp-bump-gl-43.md) | Bump OpenGL 3.3 → 4.3 core en cpp/framework | completado | alta | infra | parte de 0049 | | [0049c](completed/0049c-graph-renderer-tier1.md) | graph_renderer Tier 1: RGBA8, orphan, frustum cull, auto-pause | completado | alta | perf | parte de 0049 | -| [0049d](0049d-graph-edges-vertex-pulling.md) | Aristas via vertex pulling con TBO | pendiente | alta | perf | parte de 0049 | +| [0049d](completed/0049d-graph-edges-vertex-pulling.md) | Aristas via vertex pulling con TBO | completado | alta | perf | parte de 0049 | | [0049e](0049e-graph-types-extended.md) | graph_types modelo extendido + EntityType/RelationType | pendiente | alta | feature | parte de 0049 | | [0049f](0049f-graph-renderer-symbols.md) | Renderer extendido: shapes SDF, icon atlas, flechas, edge styles | pendiente | alta | feature | parte de 0049 | | [0049g](0049g-graph-source-operations.md) | graph_sources: lector operations.db + abstraccion funcional | pendiente | alta | feature | parte de 0049 | diff --git a/dev/issues/0049d-graph-edges-vertex-pulling.md b/dev/issues/completed/0049d-graph-edges-vertex-pulling.md similarity index 100% rename from dev/issues/0049d-graph-edges-vertex-pulling.md rename to dev/issues/completed/0049d-graph-edges-vertex-pulling.md