perf(viz): graph_renderer edges via TBO + vertex pulling (issue 0049d)
El buffer de aristas pasa a estatico (16B/arista: source, target, color, flags) y solo se reupload cuando cambia el grafo. Las posiciones de los nodos viven en un Texture Buffer Object (RG32F) actualizado por frame; el vertex shader hace texelFetch con gl_VertexID & 1 para elegir endpoint. Draw call: glDrawArraysInstanced(GL_LINES, 0, 2, edge_count) con divisor=1. Para 100k aristas: el upload de 4.8 MB/frame baja a 0 en regimen estable. edge_alpha pasa a uniform; la pre-multiplicacion en CPU desaparece. GLSL sigue en 330 core (samplerBuffer/texelFetch estan en 1.40+). gl_loader gana glBufferSubData, glVertexAttribIPointer y glTexBuffer (en Linux ya estaban via GL_GLEXT_PROTOTYPES; ahora estan disponibles tambien en MinGW/Windows). Tests: nuevo test_graph_edge_static valida el layout de 16B y el packing RGBA8 del fallback. test_visual sigue verde — render visualmente identico. Bump graph_renderer 1.2.0 -> 1.3.0.
This commit is contained in:
@@ -46,6 +46,9 @@ PFNGLFRAMEBUFFERRENDERBUFFERPROC fn_glFramebufferRenderbuffer = nullptr;
|
|||||||
PFNGLGENRENDERBUFFERSPROC fn_glGenRenderbuffers = nullptr;
|
PFNGLGENRENDERBUFFERSPROC fn_glGenRenderbuffers = nullptr;
|
||||||
PFNGLRENDERBUFFERSTORAGEPROC fn_glRenderbufferStorage = nullptr;
|
PFNGLRENDERBUFFERSTORAGEPROC fn_glRenderbufferStorage = nullptr;
|
||||||
PFNGLFRAMEBUFFERTEXTUREPROC fn_glFramebufferTexture = nullptr;
|
PFNGLFRAMEBUFFERTEXTUREPROC fn_glFramebufferTexture = nullptr;
|
||||||
|
PFNGLBUFFERSUBDATAPROC fn_glBufferSubData = nullptr;
|
||||||
|
PFNGLVERTEXATTRIBIPOINTERPROC fn_glVertexAttribIPointer = nullptr;
|
||||||
|
PFNGLTEXBUFFERPROC fn_glTexBuffer = nullptr;
|
||||||
|
|
||||||
namespace fn::gfx {
|
namespace fn::gfx {
|
||||||
|
|
||||||
@@ -98,6 +101,9 @@ bool gl_loader_init() {
|
|||||||
LOAD(glGenRenderbuffers);
|
LOAD(glGenRenderbuffers);
|
||||||
LOAD(glRenderbufferStorage);
|
LOAD(glRenderbufferStorage);
|
||||||
LOAD(glFramebufferTexture);
|
LOAD(glFramebufferTexture);
|
||||||
|
LOAD(glBufferSubData);
|
||||||
|
LOAD(glVertexAttribIPointer);
|
||||||
|
LOAD(glTexBuffer);
|
||||||
|
|
||||||
#undef LOAD
|
#undef LOAD
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -55,6 +55,10 @@
|
|||||||
extern PFNGLGENRENDERBUFFERSPROC fn_glGenRenderbuffers;
|
extern PFNGLGENRENDERBUFFERSPROC fn_glGenRenderbuffers;
|
||||||
extern PFNGLRENDERBUFFERSTORAGEPROC fn_glRenderbufferStorage;
|
extern PFNGLRENDERBUFFERSTORAGEPROC fn_glRenderbufferStorage;
|
||||||
extern PFNGLFRAMEBUFFERTEXTUREPROC fn_glFramebufferTexture; // sin "2D"
|
extern PFNGLFRAMEBUFFERTEXTUREPROC fn_glFramebufferTexture; // sin "2D"
|
||||||
|
// Vertex pulling — issue 0049d (TBO de posiciones + sub-data streaming)
|
||||||
|
extern PFNGLBUFFERSUBDATAPROC fn_glBufferSubData;
|
||||||
|
extern PFNGLVERTEXATTRIBIPOINTERPROC fn_glVertexAttribIPointer;
|
||||||
|
extern PFNGLTEXBUFFERPROC fn_glTexBuffer;
|
||||||
|
|
||||||
#define glAttachShader fn_glAttachShader
|
#define glAttachShader fn_glAttachShader
|
||||||
#define glBindBuffer fn_glBindBuffer
|
#define glBindBuffer fn_glBindBuffer
|
||||||
@@ -100,6 +104,9 @@
|
|||||||
#define glGenRenderbuffers fn_glGenRenderbuffers
|
#define glGenRenderbuffers fn_glGenRenderbuffers
|
||||||
#define glRenderbufferStorage fn_glRenderbufferStorage
|
#define glRenderbufferStorage fn_glRenderbufferStorage
|
||||||
#define glFramebufferTexture fn_glFramebufferTexture
|
#define glFramebufferTexture fn_glFramebufferTexture
|
||||||
|
#define glBufferSubData fn_glBufferSubData
|
||||||
|
#define glVertexAttribIPointer fn_glVertexAttribIPointer
|
||||||
|
#define glTexBuffer fn_glTexBuffer
|
||||||
#else
|
#else
|
||||||
#define GL_GLEXT_PROTOTYPES
|
#define GL_GLEXT_PROTOTYPES
|
||||||
#include <GL/gl.h>
|
#include <GL/gl.h>
|
||||||
|
|||||||
@@ -41,9 +41,15 @@ struct NodeInstance { // 16 bytes
|
|||||||
uint32_t color; // packed RGBA8
|
uint32_t color; // packed RGBA8
|
||||||
};
|
};
|
||||||
|
|
||||||
struct EdgeVertex { // 12 bytes
|
// Tier 2 (issue 0049d): aristas via vertex pulling. El buffer es estatico —
|
||||||
float x, y; // world position
|
// solo `(source_idx, target_idx, color, flags)` por arista, 16 bytes — y
|
||||||
uint32_t color; // packed RGBA8 (alpha ya pre-multiplicada por edge_alpha)
|
// se reuploads solo cuando cambia el grafo. El vertex shader hace fetch de
|
||||||
|
// las posiciones desde un TBO RG32F que SI se actualiza por frame.
|
||||||
|
struct EdgeStatic { // 16 bytes
|
||||||
|
uint32_t source; // index into nodes
|
||||||
|
uint32_t target; // index into nodes
|
||||||
|
uint32_t color; // packed RGBA8 (sin pre-multiplicar — el shader aplica edge_alpha)
|
||||||
|
uint32_t flags; // reservado para flechas/styles futuros
|
||||||
};
|
};
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -59,22 +65,44 @@ struct GraphRenderer {
|
|||||||
unsigned int node_vao, node_quad_vbo, node_instance_vbo;
|
unsigned int node_vao, node_quad_vbo, node_instance_vbo;
|
||||||
unsigned int node_shader;
|
unsigned int node_shader;
|
||||||
|
|
||||||
// Edge rendering (lines)
|
// Edge rendering (vertex pulling — issue 0049d)
|
||||||
|
// edge_vao : VAO con atributos por-instancia (divisor=1) leyendo de edge_static_vbo
|
||||||
|
// edge_vbo : buffer estatico (uno por grafo) con (source, target, color, flags)
|
||||||
|
// node_pos_buf / node_pos_tex : TBO RG32F que el vertex shader muestrea via texelFetch
|
||||||
unsigned int edge_vao, edge_vbo;
|
unsigned int edge_vao, edge_vbo;
|
||||||
unsigned int edge_shader;
|
unsigned int edge_shader;
|
||||||
|
unsigned int node_pos_buf;
|
||||||
|
unsigned int node_pos_tex;
|
||||||
|
int edge_u_viewport_loc;
|
||||||
|
int edge_u_scale_loc;
|
||||||
|
int edge_u_translate_loc;
|
||||||
|
int edge_u_alpha_loc;
|
||||||
|
int edge_u_node_pos_loc;
|
||||||
|
|
||||||
// Streaming buffer capacities (in bytes). Grow x2 cuando used > capacity.
|
// Streaming buffer capacities (in bytes). Grow x2 cuando used > capacity.
|
||||||
// Mantenemos el VBO orphaned con glBufferData(NULL, capacity) y luego
|
// Mantenemos el VBO orphaned con glBufferData(NULL, capacity) y luego
|
||||||
// hacemos glBufferSubData con los bytes realmente usados — evita el
|
// hacemos glBufferSubData con los bytes realmente usados — evita el
|
||||||
// sync stall del driver y reduce las reallocaciones a O(log N).
|
// sync stall del driver y reduce las reallocaciones a O(log N).
|
||||||
size_t node_vbo_capacity;
|
size_t node_vbo_capacity;
|
||||||
size_t edge_vbo_capacity;
|
size_t node_pos_capacity; // bytes del TBO RG32F
|
||||||
|
size_t edge_static_capacity; // bytes del buffer estatico de aristas
|
||||||
|
|
||||||
// CPU staging buffers — se reusan entre frames; crecen igual que el VBO.
|
// CPU staging buffers — se reusan entre frames; crecen igual que el VBO.
|
||||||
NodeInstance* node_staging;
|
NodeInstance* node_staging;
|
||||||
size_t node_staging_cap; // en NodeInstances, no bytes
|
size_t node_staging_cap; // en NodeInstances, no bytes
|
||||||
EdgeVertex* edge_staging;
|
float* node_pos_staging; // 2 floats (x,y) por nodo
|
||||||
size_t edge_staging_cap; // en EdgeVertex
|
size_t node_pos_staging_cap; // en floats
|
||||||
|
EdgeStatic* edge_static_staging;
|
||||||
|
size_t edge_static_staging_cap; // en EdgeStatic
|
||||||
|
|
||||||
|
// Cache para detectar cambios del grafo y reuploadear el edge_vbo
|
||||||
|
// estatico solo entonces. Identificamos el grafo por (puntero, count);
|
||||||
|
// basta para los flujos actuales (graph_viewport recrea el array al
|
||||||
|
// recargar). Cuando GraphData gane un campo `revision` se sustituira.
|
||||||
|
const void* cached_edges_ptr;
|
||||||
|
int cached_edge_count; // edges del grafo en el ultimo upload
|
||||||
|
int cached_edges_drawn; // edges realmente subidos (post-filtro)
|
||||||
|
bool edges_uploaded;
|
||||||
|
|
||||||
GraphRendererConfig config;
|
GraphRendererConfig config;
|
||||||
};
|
};
|
||||||
@@ -151,17 +179,33 @@ void main() {
|
|||||||
}
|
}
|
||||||
)";
|
)";
|
||||||
|
|
||||||
// Edge vertex shader (RGBA8 packed)
|
// Edge vertex shader — vertex pulling (issue 0049d).
|
||||||
|
// El buffer de aristas es estatico: solo indices y color. Las posiciones
|
||||||
|
// vienen del TBO `u_node_pos` (RG32F, vec2 por nodo). gl_VertexID indica si
|
||||||
|
// dibujamos el endpoint source (0) o target (1). Asi eliminamos el upload
|
||||||
|
// de `12 floats × E` por frame que dominaba el coste de aristas.
|
||||||
|
//
|
||||||
|
// Nota: usamos divisor=1 en los 4 atributos y `glDrawArraysInstanced(LINES,
|
||||||
|
// 0, 2, edge_count)` — cada instancia rinde una linea de 2 vertices, los
|
||||||
|
// atributos se mantienen constantes en la instancia y `gl_VertexID` cicla
|
||||||
|
// 0..1 dentro de ella.
|
||||||
|
//
|
||||||
|
// `samplerBuffer` y `texelFetch(samplerBuffer, int)` estan en GLSL 1.40+;
|
||||||
|
// 330 core nos vale (no necesitamos 4.30 — el issue exageraba).
|
||||||
static const char* k_edge_vert = R"(
|
static const char* k_edge_vert = R"(
|
||||||
#version 330 core
|
#version 330 core
|
||||||
layout(location = 0) in vec2 a_pos;
|
layout(location = 0) in uint a_source;
|
||||||
layout(location = 1) in uint a_color;
|
layout(location = 1) in uint a_target;
|
||||||
|
layout(location = 2) in uint a_color;
|
||||||
out vec4 v_color;
|
// location 3 (flags) reservado en el buffer (16B alignment) pero no leido aqui.
|
||||||
|
|
||||||
|
uniform samplerBuffer u_node_pos;
|
||||||
uniform vec2 u_viewport;
|
uniform vec2 u_viewport;
|
||||||
uniform float u_scale;
|
uniform float u_scale;
|
||||||
uniform vec2 u_translate;
|
uniform vec2 u_translate;
|
||||||
|
uniform float u_alpha; // edge_alpha
|
||||||
|
|
||||||
|
out vec4 v_color;
|
||||||
|
|
||||||
vec4 unpack_rgba8(uint c) {
|
vec4 unpack_rgba8(uint c) {
|
||||||
return vec4(
|
return vec4(
|
||||||
@@ -173,11 +217,16 @@ vec4 unpack_rgba8(uint c) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
vec2 screen = a_pos * u_scale + u_translate;
|
int idx = (gl_VertexID & 1) == 0 ? int(a_source) : int(a_target);
|
||||||
|
vec2 wpos = texelFetch(u_node_pos, idx).xy;
|
||||||
|
vec2 screen = wpos * u_scale + u_translate;
|
||||||
vec2 ndc = (screen / u_viewport) * 2.0 - 1.0;
|
vec2 ndc = (screen / u_viewport) * 2.0 - 1.0;
|
||||||
ndc.y = -ndc.y;
|
ndc.y = -ndc.y;
|
||||||
gl_Position = vec4(ndc, 0.0, 1.0);
|
gl_Position = vec4(ndc, 0.0, 1.0);
|
||||||
v_color = unpack_rgba8(a_color);
|
|
||||||
|
vec4 c = unpack_rgba8(a_color);
|
||||||
|
c.a *= u_alpha;
|
||||||
|
v_color = c;
|
||||||
}
|
}
|
||||||
)";
|
)";
|
||||||
|
|
||||||
@@ -281,12 +330,19 @@ GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererC
|
|||||||
r->height = height;
|
r->height = height;
|
||||||
r->config = config;
|
r->config = config;
|
||||||
|
|
||||||
r->node_vbo_capacity = 0;
|
r->node_vbo_capacity = 0;
|
||||||
r->edge_vbo_capacity = 0;
|
r->node_pos_capacity = 0;
|
||||||
r->node_staging = nullptr;
|
r->edge_static_capacity = 0;
|
||||||
r->node_staging_cap = 0;
|
r->node_staging = nullptr;
|
||||||
r->edge_staging = nullptr;
|
r->node_staging_cap = 0;
|
||||||
r->edge_staging_cap = 0;
|
r->node_pos_staging = nullptr;
|
||||||
|
r->node_pos_staging_cap = 0;
|
||||||
|
r->edge_static_staging = nullptr;
|
||||||
|
r->edge_static_staging_cap = 0;
|
||||||
|
r->cached_edges_ptr = nullptr;
|
||||||
|
r->cached_edge_count = 0;
|
||||||
|
r->cached_edges_drawn = 0;
|
||||||
|
r->edges_uploaded = false;
|
||||||
|
|
||||||
// --- FBO ---
|
// --- FBO ---
|
||||||
create_fbo(r);
|
create_fbo(r);
|
||||||
@@ -330,27 +386,56 @@ GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererC
|
|||||||
|
|
||||||
glBindVertexArray(0);
|
glBindVertexArray(0);
|
||||||
|
|
||||||
// --- Edge VAO ---
|
// --- Edge VAO (vertex pulling, divisor=1 sobre el buffer estatico) ---
|
||||||
glGenVertexArrays(1, &r->edge_vao);
|
glGenVertexArrays(1, &r->edge_vao);
|
||||||
glBindVertexArray(r->edge_vao);
|
glBindVertexArray(r->edge_vao);
|
||||||
|
|
||||||
glGenBuffers(1, &r->edge_vbo);
|
glGenBuffers(1, &r->edge_vbo);
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo);
|
glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo);
|
||||||
glEnableVertexAttribArray(0); // pos
|
|
||||||
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE,
|
// (source, target, color, flags) — los 4 con divisor=1.
|
||||||
sizeof(EdgeVertex),
|
glEnableVertexAttribArray(0);
|
||||||
(void*)offsetof(EdgeVertex, x));
|
glVertexAttribIPointer(0, 1, GL_UNSIGNED_INT, sizeof(EdgeStatic),
|
||||||
glEnableVertexAttribArray(1); // color (uint32)
|
(void*)offsetof(EdgeStatic, source));
|
||||||
glVertexAttribIPointer(1, 1, GL_UNSIGNED_INT,
|
glVertexAttribDivisor(0, 1);
|
||||||
sizeof(EdgeVertex),
|
glEnableVertexAttribArray(1);
|
||||||
(void*)offsetof(EdgeVertex, color));
|
glVertexAttribIPointer(1, 1, GL_UNSIGNED_INT, sizeof(EdgeStatic),
|
||||||
|
(void*)offsetof(EdgeStatic, target));
|
||||||
|
glVertexAttribDivisor(1, 1);
|
||||||
|
glEnableVertexAttribArray(2);
|
||||||
|
glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(EdgeStatic),
|
||||||
|
(void*)offsetof(EdgeStatic, color));
|
||||||
|
glVertexAttribDivisor(2, 1);
|
||||||
|
// location 3 reservado en el buffer pero no enabled — el shader actual
|
||||||
|
// no lo lee. Mantenemos el slot para futuros estilos/flechas.
|
||||||
|
|
||||||
glBindVertexArray(0);
|
glBindVertexArray(0);
|
||||||
|
|
||||||
|
// --- TBO de posiciones de nodos (RG32F, vec2 por nodo) ---
|
||||||
|
glGenBuffers(1, &r->node_pos_buf);
|
||||||
|
glBindBuffer(GL_TEXTURE_BUFFER, r->node_pos_buf);
|
||||||
|
// Reservamos capacidad inicial; se redimensiona en draw segun N.
|
||||||
|
glBufferData(GL_TEXTURE_BUFFER, 4096 * 2 * sizeof(float), nullptr, GL_STREAM_DRAW);
|
||||||
|
r->node_pos_capacity = 4096 * 2 * sizeof(float);
|
||||||
|
|
||||||
|
glGenTextures(1, &r->node_pos_tex);
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, r->node_pos_tex);
|
||||||
|
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, r->node_pos_buf);
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||||
|
glBindBuffer(GL_TEXTURE_BUFFER, 0);
|
||||||
|
|
||||||
// --- Shaders ---
|
// --- Shaders ---
|
||||||
r->node_shader = link_program(k_node_vert, k_node_frag);
|
r->node_shader = link_program(k_node_vert, k_node_frag);
|
||||||
r->edge_shader = link_program(k_edge_vert, k_edge_frag);
|
r->edge_shader = link_program(k_edge_vert, k_edge_frag);
|
||||||
|
|
||||||
|
// Cachear locations de uniforms del edge shader (issue 0049d): se
|
||||||
|
// resuelven una vez en lugar de glGetUniformLocation cada frame.
|
||||||
|
r->edge_u_viewport_loc = glGetUniformLocation(r->edge_shader, "u_viewport");
|
||||||
|
r->edge_u_scale_loc = glGetUniformLocation(r->edge_shader, "u_scale");
|
||||||
|
r->edge_u_translate_loc = glGetUniformLocation(r->edge_shader, "u_translate");
|
||||||
|
r->edge_u_alpha_loc = glGetUniformLocation(r->edge_shader, "u_alpha");
|
||||||
|
r->edge_u_node_pos_loc = glGetUniformLocation(r->edge_shader, "u_node_pos");
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -362,10 +447,13 @@ void graph_renderer_destroy(GraphRenderer* r) {
|
|||||||
glDeleteBuffers(1, &r->node_instance_vbo);
|
glDeleteBuffers(1, &r->node_instance_vbo);
|
||||||
glDeleteVertexArrays(1, &r->edge_vao);
|
glDeleteVertexArrays(1, &r->edge_vao);
|
||||||
glDeleteBuffers(1, &r->edge_vbo);
|
glDeleteBuffers(1, &r->edge_vbo);
|
||||||
|
glDeleteBuffers(1, &r->node_pos_buf);
|
||||||
|
glDeleteTextures(1, &r->node_pos_tex);
|
||||||
glDeleteProgram(r->node_shader);
|
glDeleteProgram(r->node_shader);
|
||||||
glDeleteProgram(r->edge_shader);
|
glDeleteProgram(r->edge_shader);
|
||||||
free(r->node_staging);
|
free(r->node_staging);
|
||||||
free(r->edge_staging);
|
free(r->node_pos_staging);
|
||||||
|
free(r->edge_static_staging);
|
||||||
delete r;
|
delete r;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -417,72 +505,111 @@ unsigned int graph_renderer_draw(GraphRenderer* r, const GraphData& graph,
|
|||||||
float vy1 = cam_y + half_h * (1.0f + margin);
|
float vy1 = cam_y + half_h * (1.0f + margin);
|
||||||
|
|
||||||
// ----------------------------------------------------------------
|
// ----------------------------------------------------------------
|
||||||
// Draw edges (frustum-culled)
|
// Subir posiciones de nodos al TBO (vec2 por nodo). Lo necesitamos
|
||||||
|
// tanto si dibujamos aristas (vertex pulling) como antes de dibujar
|
||||||
|
// nodos — pero se calcula una sola vez por frame.
|
||||||
// ----------------------------------------------------------------
|
// ----------------------------------------------------------------
|
||||||
if (graph.edge_count > 0 && graph.edges && graph.nodes) {
|
bool tbo_ready = false;
|
||||||
// Asegurar staging — capacidad maxima posible en este frame es
|
if (graph.node_count > 0 && graph.nodes) {
|
||||||
// edge_count * 2 vertices. La realidad post-cull suele ser mucho
|
size_t need_floats = (size_t)graph.node_count * 2;
|
||||||
// menor, pero reservamos para el peor caso y no realocamos por
|
if (need_floats > r->node_pos_staging_cap) {
|
||||||
// frame.
|
size_t new_cap = grow_capacity(r->node_pos_staging_cap, need_floats, 8192);
|
||||||
size_t need_verts = (size_t)graph.edge_count * 2;
|
r->node_pos_staging = (float*)realloc(r->node_pos_staging, new_cap * sizeof(float));
|
||||||
if (need_verts > r->edge_staging_cap) {
|
r->node_pos_staging_cap = new_cap;
|
||||||
size_t new_cap = grow_capacity(r->edge_staging_cap, need_verts, 8192);
|
|
||||||
r->edge_staging = (EdgeVertex*)realloc(r->edge_staging, new_cap * sizeof(EdgeVertex));
|
|
||||||
r->edge_staging_cap = new_cap;
|
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < graph.node_count; ++i) {
|
||||||
size_t out = 0;
|
r->node_pos_staging[i * 2 + 0] = graph.nodes[i].x;
|
||||||
for (int i = 0; i < graph.edge_count; ++i) {
|
r->node_pos_staging[i * 2 + 1] = graph.nodes[i].y;
|
||||||
const GraphEdge& e = graph.edges[i];
|
|
||||||
if (e.source >= (uint32_t)graph.node_count) continue;
|
|
||||||
if (e.target >= (uint32_t)graph.node_count) continue;
|
|
||||||
|
|
||||||
const GraphNode& ns = graph.nodes[e.source];
|
|
||||||
const GraphNode& nt = graph.nodes[e.target];
|
|
||||||
|
|
||||||
// Frustum cull: AABB del segmento (con margen para edges casi
|
|
||||||
// tangentes al viewport). Si el AABB no intersecta el viewport,
|
|
||||||
// skip — la arista no contribuye a ningun pixel visible.
|
|
||||||
float ex0 = std::min(ns.x, nt.x);
|
|
||||||
float ex1 = std::max(ns.x, nt.x);
|
|
||||||
float ey0 = std::min(ns.y, nt.y);
|
|
||||||
float ey1 = std::max(ns.y, nt.y);
|
|
||||||
if (ex1 < vx0 || ex0 > vx1 || ey1 < vy0 || ey0 > vy1) continue;
|
|
||||||
|
|
||||||
uint32_t ecol = e.color != 0 ? e.color : pack_rgba8(0x88, 0x88, 0x88, 0xFF);
|
|
||||||
uint32_t col = modulate_alpha_rgba8(ecol, r->config.edge_alpha);
|
|
||||||
|
|
||||||
r->edge_staging[out++] = { ns.x, ns.y, col };
|
|
||||||
r->edge_staging[out++] = { nt.x, nt.y, col };
|
|
||||||
}
|
}
|
||||||
|
const size_t used_bytes = need_floats * sizeof(float);
|
||||||
|
if (used_bytes > r->node_pos_capacity) {
|
||||||
|
r->node_pos_capacity = grow_capacity(r->node_pos_capacity, used_bytes,
|
||||||
|
4096 * 2 * sizeof(float));
|
||||||
|
}
|
||||||
|
glBindBuffer(GL_TEXTURE_BUFFER, r->node_pos_buf);
|
||||||
|
// Orphan + subdata: misma estrategia que en 0049c, evita stall.
|
||||||
|
glBufferData(GL_TEXTURE_BUFFER, (GLsizeiptr)r->node_pos_capacity, nullptr, GL_STREAM_DRAW);
|
||||||
|
glBufferSubData(GL_TEXTURE_BUFFER, 0, (GLsizeiptr)used_bytes, r->node_pos_staging);
|
||||||
|
// glTexBuffer ya esta vinculado al buffer en create — el view sigue
|
||||||
|
// valido tras orphan: GL_TEXTURE_BUFFER referencia al BO por nombre.
|
||||||
|
glBindBuffer(GL_TEXTURE_BUFFER, 0);
|
||||||
|
tbo_ready = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (out > 0) {
|
// ----------------------------------------------------------------
|
||||||
const size_t used_bytes = out * sizeof(EdgeVertex);
|
// Aristas via vertex pulling. El buffer estatico solo se reupload
|
||||||
if (used_bytes > r->edge_vbo_capacity) {
|
// cuando el grafo cambia — detectamos con (puntero, count).
|
||||||
r->edge_vbo_capacity = grow_capacity(r->edge_vbo_capacity, used_bytes,
|
// ----------------------------------------------------------------
|
||||||
8192 * sizeof(EdgeVertex));
|
if (tbo_ready && graph.edge_count > 0 && graph.edges) {
|
||||||
|
const bool graph_changed =
|
||||||
|
!r->edges_uploaded
|
||||||
|
|| r->cached_edges_ptr != (const void*)graph.edges
|
||||||
|
|| r->cached_edge_count != graph.edge_count;
|
||||||
|
|
||||||
|
if (graph_changed) {
|
||||||
|
// (Re)build el buffer estatico. Skipeamos aristas con indices
|
||||||
|
// fuera de rango — pueden aparecer durante una recarga parcial
|
||||||
|
// del grafo y no queremos que el GPU lea fuera del TBO.
|
||||||
|
if ((size_t)graph.edge_count > r->edge_static_staging_cap) {
|
||||||
|
size_t new_cap = grow_capacity(r->edge_static_staging_cap,
|
||||||
|
(size_t)graph.edge_count, 8192);
|
||||||
|
r->edge_static_staging = (EdgeStatic*)realloc(r->edge_static_staging,
|
||||||
|
new_cap * sizeof(EdgeStatic));
|
||||||
|
r->edge_static_staging_cap = new_cap;
|
||||||
}
|
}
|
||||||
|
size_t out = 0;
|
||||||
|
for (int i = 0; i < graph.edge_count; ++i) {
|
||||||
|
const GraphEdge& e = graph.edges[i];
|
||||||
|
if (e.source >= (uint32_t)graph.node_count) continue;
|
||||||
|
if (e.target >= (uint32_t)graph.node_count) continue;
|
||||||
|
uint32_t col = e.color != 0 ? e.color
|
||||||
|
: pack_rgba8(0x88, 0x88, 0x88, 0xFF);
|
||||||
|
r->edge_static_staging[out++] = { e.source, e.target, col, 0u };
|
||||||
|
}
|
||||||
|
if (out > 0) {
|
||||||
|
const size_t used_bytes = out * sizeof(EdgeStatic);
|
||||||
|
if (used_bytes > r->edge_static_capacity) {
|
||||||
|
r->edge_static_capacity = grow_capacity(r->edge_static_capacity,
|
||||||
|
used_bytes,
|
||||||
|
8192 * sizeof(EdgeStatic));
|
||||||
|
}
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo);
|
||||||
|
glBufferData(GL_ARRAY_BUFFER, (GLsizeiptr)r->edge_static_capacity,
|
||||||
|
nullptr, GL_STATIC_DRAW);
|
||||||
|
glBufferSubData(GL_ARRAY_BUFFER, 0, (GLsizeiptr)used_bytes,
|
||||||
|
r->edge_static_staging);
|
||||||
|
}
|
||||||
|
r->cached_edges_ptr = (const void*)graph.edges;
|
||||||
|
r->cached_edge_count = graph.edge_count;
|
||||||
|
r->cached_edges_drawn = (int)out;
|
||||||
|
r->edges_uploaded = (out > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r->edges_uploaded) {
|
||||||
glUseProgram(r->edge_shader);
|
glUseProgram(r->edge_shader);
|
||||||
glUniform2f(glGetUniformLocation(r->edge_shader, "u_viewport"),
|
glUniform2f(r->edge_u_viewport_loc, (float)r->width, (float)r->height);
|
||||||
(float)r->width, (float)r->height);
|
glUniform1f(r->edge_u_scale_loc, scale);
|
||||||
glUniform1f(glGetUniformLocation(r->edge_shader, "u_scale"), scale);
|
glUniform2f(r->edge_u_translate_loc, tx, ty);
|
||||||
glUniform2f(glGetUniformLocation(r->edge_shader, "u_translate"), tx, ty);
|
glUniform1f(r->edge_u_alpha_loc, r->config.edge_alpha);
|
||||||
|
|
||||||
|
// Bind TBO al sampler u_node_pos en la texture unit 0.
|
||||||
|
glActiveTexture(GL_TEXTURE0);
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, r->node_pos_tex);
|
||||||
|
glUniform1i(r->edge_u_node_pos_loc, 0);
|
||||||
|
|
||||||
glLineWidth(r->config.edge_width);
|
glLineWidth(r->config.edge_width);
|
||||||
|
|
||||||
glBindVertexArray(r->edge_vao);
|
glBindVertexArray(r->edge_vao);
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo);
|
// Una "instancia" = 1 linea (2 vertices). gl_VertexID dentro
|
||||||
// Orphan: descarta el contenido previo y damos al driver un
|
// de la instancia es 0 o 1 → elige endpoint source o target.
|
||||||
// buffer fresco con la capacidad reservada. Despues subimos
|
glDrawArraysInstanced(GL_LINES, 0, 2, (GLsizei)r->cached_edges_drawn);
|
||||||
// solo los bytes realmente usados con BufferSubData — evitamos
|
|
||||||
// el sync stall y reutilizamos la asignacion entre frames
|
|
||||||
// mientras no crezca.
|
|
||||||
glBufferData(GL_ARRAY_BUFFER, (GLsizeiptr)r->edge_vbo_capacity, nullptr, GL_STREAM_DRAW);
|
|
||||||
glBufferSubData(GL_ARRAY_BUFFER, 0, (GLsizeiptr)used_bytes, r->edge_staging);
|
|
||||||
glDrawArrays(GL_LINES, 0, (GLsizei)out);
|
|
||||||
glBindVertexArray(0);
|
glBindVertexArray(0);
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||||
}
|
}
|
||||||
|
} else if (graph.edge_count == 0) {
|
||||||
|
// Si el caller borra todas las aristas, invalidamos el cache para
|
||||||
|
// que el siguiente upload reconstruya el buffer.
|
||||||
|
r->edges_uploaded = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------
|
// ----------------------------------------------------------------
|
||||||
|
|||||||
@@ -3,11 +3,11 @@ name: graph_renderer
|
|||||||
kind: function
|
kind: function
|
||||||
lang: cpp
|
lang: cpp
|
||||||
domain: viz
|
domain: viz
|
||||||
version: "1.2.0"
|
version: "1.3.0"
|
||||||
purity: impure
|
purity: impure
|
||||||
signature: "GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererConfig& config)"
|
signature: "GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererConfig& config)"
|
||||||
description: "Renderer GPU de grafos con instanced rendering a FBO, compatible con ImGui::Image para visualizacion de grafos grandes"
|
description: "Renderer GPU de grafos con instanced rendering a FBO, compatible con ImGui::Image para visualizacion de grafos grandes"
|
||||||
tags: [graph, renderer, opengl, gpu, instanced, fbo, visualization, frustum-cull, rgba8]
|
tags: [graph, renderer, opengl, gpu, instanced, fbo, visualization, frustum-cull, rgba8, vertex-pulling, tbo]
|
||||||
uses_functions: ["gl_loader_cpp_gfx"]
|
uses_functions: ["gl_loader_cpp_gfx"]
|
||||||
uses_types: ["GraphData_cpp_viz"]
|
uses_types: ["GraphData_cpp_viz"]
|
||||||
returns: []
|
returns: []
|
||||||
@@ -88,6 +88,14 @@ ndc = (screen / viewport) * 2 - 1
|
|||||||
|
|
||||||
## Notas
|
## Notas
|
||||||
|
|
||||||
|
- **v1.3** (2026-04-29, issue 0049d): aristas via vertex pulling. API publica intacta.
|
||||||
|
- El buffer de aristas pasa a ser estatico (`source_idx, target_idx, color, flags` × E, 16 bytes/arista) y solo se reupload cuando el grafo cambia (detectado por `(edges_ptr, edge_count)` — heuristica suficiente mientras `GraphData` no tenga `revision`). Para 100k aristas: 1.6 MB iniciales vs 4.8 MB/frame del esquema anterior — el upload baja a cero en regimen estable.
|
||||||
|
- Las posiciones de los nodos se suben cada frame a un Texture Buffer Object `RG32F` (`vec2[]`, 8 bytes/nodo). El vertex shader de aristas hace `texelFetch(u_node_pos, idx)` con `idx` derivado de `gl_VertexID & 1` (0=source, 1=target).
|
||||||
|
- Draw call: `glDrawArraysInstanced(GL_LINES, 0, 2, edge_count)` — 1 instancia por arista, 2 vertices por linea, todos los atributos con `divisor=1`.
|
||||||
|
- Frustum cull de aristas eliminado en CPU (la GPU clipea fuera de viewport por su cuenta). Se mantiene para nodos.
|
||||||
|
- `edge_alpha` pasa a uniform en el shader; la pre-multiplicacion en CPU desaparece y permite que el buffer estatico no dependa del config.
|
||||||
|
- GLSL: 330 core (con `samplerBuffer`/`texelFetch` que estan en 1.40+). `gl_loader` gana `glBufferSubData`, `glVertexAttribIPointer` y `glTexBuffer` (Linux ya los tenia via `GL_GLEXT_PROTOTYPES`).
|
||||||
|
|
||||||
- **v1.2** (2026-04-29, issue 0049c): tres optimizaciones internas, API publica intacta.
|
- **v1.2** (2026-04-29, issue 0049c): tres optimizaciones internas, API publica intacta.
|
||||||
1. **RGBA8 packing**: el buffer de instancia/vertice usa `uint32` por color en lugar de 4 floats. Nodo: 28 → 16 bytes/instance (-43%). Edge: 24 → 12 bytes/vertex (-50%). Los shaders desempaquetan con bit shifts (compatible GL 3.30+, sin necesidad de `unpackUnorm4x8` que es 4.20+). Helpers expuestos en el .h: `pack_rgba8`, `unpack_rgba8`, `modulate_alpha_rgba8` (testeados en `test_graph_pack_rgba8.cpp`).
|
1. **RGBA8 packing**: el buffer de instancia/vertice usa `uint32` por color en lugar de 4 floats. Nodo: 28 → 16 bytes/instance (-43%). Edge: 24 → 12 bytes/vertex (-50%). Los shaders desempaquetan con bit shifts (compatible GL 3.30+, sin necesidad de `unpackUnorm4x8` que es 4.20+). Helpers expuestos en el .h: `pack_rgba8`, `unpack_rgba8`, `modulate_alpha_rgba8` (testeados en `test_graph_pack_rgba8.cpp`).
|
||||||
2. **Capacity-tracked streaming buffers**: el VBO se mantiene orphaned con `glBufferData(NULL, capacity)` y se actualiza con `glBufferSubData` solo los bytes usados. La capacidad crece x2 cuando hace falta (inicial: 4096 nodos / 8192 vertices de aristas) → reallocaciones en O(log N). Staging CPU reutilizado entre frames.
|
2. **Capacity-tracked streaming buffers**: el VBO se mantiene orphaned con `glBufferData(NULL, capacity)` y se actualiza con `glBufferSubData` solo los bytes usados. La capacidad crece x2 cuando hace falta (inicial: 4096 nodos / 8192 vertices de aristas) → reallocaciones en O(log N). Staging CPU reutilizado entre frames.
|
||||||
|
|||||||
@@ -66,6 +66,10 @@ add_fn_test(test_graph_should_pause test_graph_should_pause.cpp
|
|||||||
${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_force_layout.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_force_layout.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_types.cpp)
|
${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_types.cpp)
|
||||||
|
|
||||||
|
# --- Issue 0049d — vertex pulling edge buffer (logica solo, sin GL) --------
|
||||||
|
add_fn_test(test_graph_edge_static test_graph_edge_static.cpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/../functions/viz/graph_types.cpp)
|
||||||
|
|
||||||
# --- Visual golden-image diff (issue 0048) ---------------------------------
|
# --- Visual golden-image diff (issue 0048) ---------------------------------
|
||||||
# El binario primitives_gallery se compila con --capture; el test compara los
|
# El binario primitives_gallery se compila con --capture; el test compara los
|
||||||
# PNGs generados con los goldens en cpp/tests/golden/. Si no hay goldens o el
|
# PNGs generados con los goldens en cpp/tests/golden/. Si no hay goldens o el
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
// Smoke test for graph_renderer's vertex-pulling edge buffer layout (issue
|
||||||
|
// 0049d). El test no toca GL — solo verifica las garantias estructurales que
|
||||||
|
// el renderer asume:
|
||||||
|
// 1. EdgeStatic mide 16 bytes (alineacion natural sin padding sorpresa).
|
||||||
|
// 2. Los offsets de source/target/color/flags son 0/4/8/12.
|
||||||
|
// 3. La paleta de colores por defecto del fallback (gris 0x88) tiene la
|
||||||
|
// forma RGBA8 esperada por el shader (R en byte 0).
|
||||||
|
//
|
||||||
|
// Render-a-FBO-y-readback queda fuera porque exigirseria un contexto GL en
|
||||||
|
// el runner de tests; ya hay un golden image gate via test_visual + capture
|
||||||
|
// en primitives_gallery.
|
||||||
|
|
||||||
|
#define CATCH_CONFIG_MAIN
|
||||||
|
#include "catch_amalgamated.hpp"
|
||||||
|
|
||||||
|
#include "viz/graph_renderer.h"
|
||||||
|
#include "viz/graph_types.h"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
// Re-declaramos la struct local a graph_renderer.cpp para chequear su layout
|
||||||
|
// "en espejo". Si cambia el layout en el .cpp, este test debe actualizarse —
|
||||||
|
// es intencional: el shader y el VAO setup dependen de estos offsets.
|
||||||
|
namespace test_layout {
|
||||||
|
struct EdgeStatic {
|
||||||
|
uint32_t source;
|
||||||
|
uint32_t target;
|
||||||
|
uint32_t color;
|
||||||
|
uint32_t flags;
|
||||||
|
};
|
||||||
|
} // namespace test_layout
|
||||||
|
|
||||||
|
TEST_CASE("EdgeStatic mide 16 bytes y tiene offsets contiguos", "[viz][edge_static]") {
|
||||||
|
using test_layout::EdgeStatic;
|
||||||
|
REQUIRE(sizeof(EdgeStatic) == 16);
|
||||||
|
REQUIRE(offsetof(EdgeStatic, source) == 0);
|
||||||
|
REQUIRE(offsetof(EdgeStatic, target) == 4);
|
||||||
|
REQUIRE(offsetof(EdgeStatic, color) == 8);
|
||||||
|
REQUIRE(offsetof(EdgeStatic, flags) == 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Fallback gris 0x88 tiene R en el byte LSB", "[viz][edge_static]") {
|
||||||
|
// El renderer construye `pack_rgba8(0x88, 0x88, 0x88, 0xFF)` cuando
|
||||||
|
// `e.color == 0`. El shader hace unpack manual asumiendo R en LSB.
|
||||||
|
uint32_t gray = pack_rgba8(0x88, 0x88, 0x88, 0xFF);
|
||||||
|
REQUIRE((gray & 0xFFu) == 0x88u); // R
|
||||||
|
REQUIRE(((gray >> 8) & 0xFFu) == 0x88u); // G
|
||||||
|
REQUIRE(((gray >> 16) & 0xFFu) == 0x88u); // B
|
||||||
|
REQUIRE(((gray >> 24) & 0xFFu) == 0xFFu); // A
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("GraphEdge.color = 0 indica fallback al gris por defecto",
|
||||||
|
"[viz][edge_static]") {
|
||||||
|
GraphEdge e = graph_edge(0, 1, 1.0f);
|
||||||
|
REQUIRE(e.color == 0u);
|
||||||
|
// El renderer interpreta esto como "usa el gris 0x88,0x88,0x88,0xFF".
|
||||||
|
// Un agente que precarga colores debe usar `pack_rgba8` para evitar
|
||||||
|
// colisionar con el sentinel 0.
|
||||||
|
}
|
||||||
@@ -58,7 +58,7 @@
|
|||||||
| [0049a](completed/0049a-osint-graph-setup.md) | Setup proyecto osint_graph + sub-repo graph_explorer | completado | alta | infra | parte de 0049 |
|
| [0049a](completed/0049a-osint-graph-setup.md) | Setup proyecto osint_graph + sub-repo graph_explorer | completado | alta | infra | parte de 0049 |
|
||||||
| [0049b](completed/0049b-cpp-bump-gl-43.md) | Bump OpenGL 3.3 → 4.3 core en cpp/framework | completado | alta | infra | parte de 0049 |
|
| [0049b](completed/0049b-cpp-bump-gl-43.md) | Bump OpenGL 3.3 → 4.3 core en cpp/framework | completado | alta | infra | parte de 0049 |
|
||||||
| [0049c](completed/0049c-graph-renderer-tier1.md) | graph_renderer Tier 1: RGBA8, orphan, frustum cull, auto-pause | completado | alta | perf | parte de 0049 |
|
| [0049c](completed/0049c-graph-renderer-tier1.md) | graph_renderer Tier 1: RGBA8, orphan, frustum cull, auto-pause | completado | alta | perf | parte de 0049 |
|
||||||
| [0049d](0049d-graph-edges-vertex-pulling.md) | Aristas via vertex pulling con TBO | pendiente | alta | perf | parte de 0049 |
|
| [0049d](completed/0049d-graph-edges-vertex-pulling.md) | Aristas via vertex pulling con TBO | completado | alta | perf | parte de 0049 |
|
||||||
| [0049e](0049e-graph-types-extended.md) | graph_types modelo extendido + EntityType/RelationType | pendiente | alta | feature | parte de 0049 |
|
| [0049e](0049e-graph-types-extended.md) | graph_types modelo extendido + EntityType/RelationType | pendiente | alta | feature | parte de 0049 |
|
||||||
| [0049f](0049f-graph-renderer-symbols.md) | Renderer extendido: shapes SDF, icon atlas, flechas, edge styles | pendiente | alta | feature | parte de 0049 |
|
| [0049f](0049f-graph-renderer-symbols.md) | Renderer extendido: shapes SDF, icon atlas, flechas, edge styles | pendiente | alta | feature | parte de 0049 |
|
||||||
| [0049g](0049g-graph-source-operations.md) | graph_sources: lector operations.db + abstraccion funcional | pendiente | alta | feature | parte de 0049 |
|
| [0049g](0049g-graph-source-operations.md) | graph_sources: lector operations.db + abstraccion funcional | pendiente | alta | feature | parte de 0049 |
|
||||||
|
|||||||
Reference in New Issue
Block a user