@@ -41,9 +41,15 @@ struct NodeInstance { // 16 bytes
uint32_t color ; // packed RGBA8
} ;
struct EdgeVertex { // 12 bytes
float x , y ; // world position
uint32_t color ; // packed RGBA8 (alpha ya pre-multiplicada por edge_alpha)
// Tier 2 (issue 0049d): aristas via vertex pulling. El buffer es estatico —
// solo `(source_idx, target_idx, color, flags)` por arista, 16 bytes — y
// se reuploads solo cuando cambia el grafo. El vertex shader hace fetch de
// las posiciones desde un TBO RG32F que SI se actualiza por frame.
struct EdgeStatic { // 16 bytes
uint32_t source ; // index into nodes
uint32_t target ; // index into nodes
uint32_t color ; // packed RGBA8 (sin pre-multiplicar — el shader aplica edge_alpha)
uint32_t flags ; // reservado para flechas/styles futuros
} ;
// ---------------------------------------------------------------------------
@@ -59,22 +65,44 @@ struct GraphRenderer {
unsigned int node_vao , node_quad_vbo , node_instance_vbo ;
unsigned int node_shader ;
// Edge rendering (lines )
// Edge rendering (vertex pulling — issue 0049d )
// edge_vao : VAO con atributos por-instancia (divisor=1) leyendo de edge_static_vbo
// edge_vbo : buffer estatico (uno por grafo) con (source, target, color, flags)
// node_pos_buf / node_pos_tex : TBO RG32F que el vertex shader muestrea via texelFetch
unsigned int edge_vao , edge_vbo ;
unsigned int edge_shader ;
unsigned int node_pos_buf ;
unsigned int node_pos_tex ;
int edge_u_viewport_loc ;
int edge_u_scale_loc ;
int edge_u_translate_loc ;
int edge_u_alpha_loc ;
int edge_u_node_pos_loc ;
// Streaming buffer capacities (in bytes). Grow x2 cuando used > capacity.
// Mantenemos el VBO orphaned con glBufferData(NULL, capacity) y luego
// hacemos glBufferSubData con los bytes realmente usados — evita el
// sync stall del driver y reduce las reallocaciones a O(log N).
size_t node_vbo_capacity ;
size_t edge_vbo_capacity ;
size_t node_pos_capacity ; // bytes del TBO RG32F
size_t edge_static_capacity ; // bytes del buffer estatico de aristas
// CPU staging buffers — se reusan entre frames; crecen igual que el VBO.
NodeInstance * node_staging ;
size_t node_staging_cap ; // en NodeInstances, no bytes
EdgeVertex * edge_staging ;
size_t edge_staging_cap ; // en EdgeVertex
float * node_pos_staging ; // 2 floats (x,y) por nodo
size_t node_pos_staging_cap ; // en floats
EdgeStatic * edge_static_staging ;
size_t edge_static_staging_cap ; // en EdgeStatic
// Cache para detectar cambios del grafo y reuploadear el edge_vbo
// estatico solo entonces. Identificamos el grafo por (puntero, count);
// basta para los flujos actuales (graph_viewport recrea el array al
// recargar). Cuando GraphData gane un campo `revision` se sustituira.
const void * cached_edges_ptr ;
int cached_edge_count ; // edges del grafo en el ultimo upload
int cached_edges_drawn ; // edges realmente subidos (post-filtro)
bool edges_uploaded ;
GraphRendererConfig config ;
} ;
@@ -151,17 +179,33 @@ void main() {
}
) " ;
// Edge vertex shader (RGBA8 packe d)
// Edge vertex shader — vertex pulling (issue 0049 d).
// El buffer de aristas es estatico: solo indices y color. Las posiciones
// vienen del TBO `u_node_pos` (RG32F, vec2 por nodo). gl_VertexID indica si
// dibujamos el endpoint source (0) o target (1). Asi eliminamos el upload
// de `12 floats × E` por frame que dominaba el coste de aristas.
//
// Nota: usamos divisor=1 en los 4 atributos y `glDrawArraysInstanced(LINES,
// 0, 2, edge_count)` — cada instancia rinde una linea de 2 vertices, los
// atributos se mantienen constantes en la instancia y `gl_VertexID` cicla
// 0..1 dentro de ella.
//
// `samplerBuffer` y `texelFetch(samplerBuffer, int)` estan en GLSL 1.40+;
// 330 core nos vale (no necesitamos 4.30 — el issue exageraba).
static const char * k_edge_vert = R " (
# version 330 core
layout ( location = 0 ) in vec2 a_pos ;
layout ( location = 1 ) in uint a_color ;
out vec4 v_color ;
layout ( location = 0 ) in uint a_source ;
layout ( location = 1 ) in uint a_target ;
layout ( location = 2 ) in uint a_color ;
// location 3 (flags) reservado en el buffer (16B alignment) pero no leido aqui.
uniform samplerBuffer u_node_pos ;
uniform vec2 u_viewport ;
uniform float u_scale ;
uniform vec2 u_translate ;
uniform float u_alpha ; // edge_alpha
out vec4 v_color ;
vec4 unpack_rgba8 ( uint c ) {
return vec4 (
@@ -173,11 +217,16 @@ vec4 unpack_rgba8(uint c) {
}
void main ( ) {
vec2 screen = a_pos * u_scale + u_translate ;
int idx = ( gl_VertexID & 1 ) = = 0 ? int ( a_source ) : int ( a_target ) ;
vec2 wpos = texelFetch ( u_node_pos , idx ) . xy ;
vec2 screen = wpos * u_scale + u_translate ;
vec2 ndc = ( screen / u_viewport ) * 2.0 - 1.0 ;
ndc . y = - ndc . y ;
gl_Position = vec4 ( ndc , 0.0 , 1.0 ) ;
v_color = unpack_rgba8 ( a_color ) ;
vec4 c = unpack_rgba8 ( a_color ) ;
c . a * = u_alpha ;
v_color = c ;
}
) " ;
@@ -281,12 +330,19 @@ GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererC
r - > height = height ;
r - > config = config ;
r - > node_vbo_capacity = 0 ;
r - > edge_vbo_capacity = 0 ;
r - > node_staging = nullptr ;
r - > node_staging_cap = 0 ;
r - > edge_staging = nullptr ;
r - > edge_staging_cap = 0 ;
r - > node_vbo_capacity = 0 ;
r - > node_pos_capacity = 0 ;
r - > edge_static_capacity = 0 ;
r - > node_staging = nullptr ;
r - > node_staging_cap = 0 ;
r - > node_pos_staging = nullptr ;
r - > node_pos_staging_cap = 0 ;
r - > edge_static_staging = nullptr ;
r - > edge_static_staging_cap = 0 ;
r - > cached_edges_ptr = nullptr ;
r - > cached_edge_count = 0 ;
r - > cached_edges_drawn = 0 ;
r - > edges_uploaded = false ;
// --- FBO ---
create_fbo ( r ) ;
@@ -330,27 +386,56 @@ GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererC
glBindVertexArray ( 0 ) ;
// --- Edge VAO ---
// --- Edge VAO (vertex pulling, divisor=1 sobre el buffer estatico) ---
glGenVertexArrays ( 1 , & r - > edge_vao ) ;
glBindVertexArray ( r - > edge_vao ) ;
glGenBuffers ( 1 , & r - > edge_vbo ) ;
glBindBuffer ( GL_ARRAY_BUFFER , r - > edge_vbo ) ;
glEnableVertexAttribArray ( 0 ) ; // pos
glVertexAttribPointer ( 0 , 2 , GL_FLOAT , GL_FALSE ,
sizeof ( EdgeVertex ) ,
( void * ) offsetof ( EdgeVertex , x ) ) ;
glEnableVertexAttribArray ( 1 ) ; // color (uint32)
glVertexAttribIPointer ( 1 , 1 , GL_UNSIGNED_INT ,
sizeof ( EdgeVertex ) ,
( void * ) offsetof ( EdgeVertex , color ) ) ;
// (source, target, color, flags) — los 4 con divisor=1.
glEnableVertexAttribArray ( 0 ) ;
glVertexAttribIPointer ( 0 , 1 , GL_UNSIGNED_INT , sizeof ( EdgeStatic ) ,
( void * ) offsetof ( EdgeStatic , source ) ) ;
glVertexAttribDivisor ( 0 , 1 ) ;
glEnableVertexAttribArray ( 1 ) ;
glVertexAttribIPointer ( 1 , 1 , GL_UNSIGNED_INT , sizeof ( EdgeStatic ) ,
( void * ) offsetof ( EdgeStatic , target ) ) ;
glVertexAttribDivisor ( 1 , 1 ) ;
glEnableVertexAttribArray ( 2 ) ;
glVertexAttribIPointer ( 2 , 1 , GL_UNSIGNED_INT , sizeof ( EdgeStatic ) ,
( void * ) offsetof ( EdgeStatic , color ) ) ;
glVertexAttribDivisor ( 2 , 1 ) ;
// location 3 reservado en el buffer pero no enabled — el shader actual
// no lo lee. Mantenemos el slot para futuros estilos/flechas.
glBindVertexArray ( 0 ) ;
// --- TBO de posiciones de nodos (RG32F, vec2 por nodo) ---
glGenBuffers ( 1 , & r - > node_pos_buf ) ;
glBindBuffer ( GL_TEXTURE_BUFFER , r - > node_pos_buf ) ;
// Reservamos capacidad inicial; se redimensiona en draw segun N.
glBufferData ( GL_TEXTURE_BUFFER , 4096 * 2 * sizeof ( float ) , nullptr , GL_STREAM_DRAW ) ;
r - > node_pos_capacity = 4096 * 2 * sizeof ( float ) ;
glGenTextures ( 1 , & r - > node_pos_tex ) ;
glBindTexture ( GL_TEXTURE_BUFFER , r - > node_pos_tex ) ;
glTexBuffer ( GL_TEXTURE_BUFFER , GL_RG32F , r - > node_pos_buf ) ;
glBindTexture ( GL_TEXTURE_BUFFER , 0 ) ;
glBindBuffer ( GL_TEXTURE_BUFFER , 0 ) ;
// --- Shaders ---
r - > node_shader = link_program ( k_node_vert , k_node_frag ) ;
r - > edge_shader = link_program ( k_edge_vert , k_edge_frag ) ;
// Cachear locations de uniforms del edge shader (issue 0049d): se
// resuelven una vez en lugar de glGetUniformLocation cada frame.
r - > edge_u_viewport_loc = glGetUniformLocation ( r - > edge_shader , " u_viewport " ) ;
r - > edge_u_scale_loc = glGetUniformLocation ( r - > edge_shader , " u_scale " ) ;
r - > edge_u_translate_loc = glGetUniformLocation ( r - > edge_shader , " u_translate " ) ;
r - > edge_u_alpha_loc = glGetUniformLocation ( r - > edge_shader , " u_alpha " ) ;
r - > edge_u_node_pos_loc = glGetUniformLocation ( r - > edge_shader , " u_node_pos " ) ;
return r ;
}
@@ -362,10 +447,13 @@ void graph_renderer_destroy(GraphRenderer* r) {
glDeleteBuffers ( 1 , & r - > node_instance_vbo ) ;
glDeleteVertexArrays ( 1 , & r - > edge_vao ) ;
glDeleteBuffers ( 1 , & r - > edge_vbo ) ;
glDeleteBuffers ( 1 , & r - > node_pos_buf ) ;
glDeleteTextures ( 1 , & r - > node_pos_tex ) ;
glDeleteProgram ( r - > node_shader ) ;
glDeleteProgram ( r - > edge_shader ) ;
free ( r - > node_staging ) ;
free ( r - > edge_staging ) ;
free ( r - > node_pos_staging ) ;
free ( r - > edge_static_staging ) ;
delete r ;
}
@@ -417,72 +505,111 @@ unsigned int graph_renderer_draw(GraphRenderer* r, const GraphData& graph,
float vy1 = cam_y + half_h * ( 1.0f + margin ) ;
// ----------------------------------------------------------------
// Draw edges (frustum-culled)
// Subir posiciones de nodos al TBO (vec2 por nodo). Lo necesitamos
// tanto si dibujamos aristas (vertex pulling) como antes de dibujar
// nodos — pero se calcula una sola vez por frame.
// ----------------------------------------------------------------
if ( graph . edge_count > 0 & & graph . edges & & graph . nodes ) {
// Asegurar staging — capacidad maxima posible en este frame es
// edge_count * 2 vertices. La realidad post-cull suele ser mucho
// menor, pero reservamos para el peor caso y no realocamos por
// frame.
size_t need_verts = ( size_t ) graph . edge_count * 2 ;
if ( need_verts > r - > edge_staging_cap ) {
size_t new_cap = grow_capacity ( r - > edge_staging_cap , need_verts , 8192 ) ;
r - > edge_staging = ( EdgeVertex * ) realloc ( r - > edge_staging , new_cap * sizeof ( EdgeVertex ) ) ;
r - > edge_staging_cap = new_cap ;
bool tbo_ready = false ;
if ( graph . node_count > 0 & & graph . nodes ) {
size_t need_floats = ( size_t ) graph . node_count * 2 ;
if ( need_floats > r - > node_pos_staging_cap ) {
size_t new_cap = grow_capacity ( r - > node_pos_staging_cap , need_floats , 8192 ) ;
r - > node_pos_staging = ( float * ) realloc ( r - > node_pos_staging , new_cap * sizeof ( float ) ) ;
r - > node_pos_staging_cap = new_cap ;
}
size_t out = 0 ;
for ( int i = 0 ; i < graph . edge_count ; + + i ) {
const GraphEdge & e = graph . edges [ i ] ;
if ( e . source > = ( uint32_t ) graph . node_count ) continue ;
if ( e . target > = ( uint32_t ) graph . node_count ) continue ;
const GraphNode & ns = graph . nodes [ e . source ] ;
const GraphNode & nt = graph . nodes [ e . target ] ;
// Frustum cull: AABB del segmento (con margen para edges casi
// tangentes al viewport). Si el AABB no intersecta el viewport,
// skip — la arista no contribuye a ningun pixel visible.
float ex0 = std : : min ( ns . x , nt . x ) ;
float ex1 = std : : max ( ns . x , nt . x ) ;
float ey0 = std : : min ( ns . y , nt . y ) ;
float ey1 = std : : max ( ns . y , nt . y ) ;
if ( ex1 < vx0 | | ex0 > vx1 | | ey1 < vy0 | | ey0 > vy1 ) continue ;
uint32_t ecol = e . color ! = 0 ? e . color : pack_rgba8 ( 0x88 , 0x88 , 0x88 , 0xFF ) ;
uint32_t col = modulate_alpha_rgba8 ( ecol , r - > config . edge_alpha ) ;
r - > edge_staging [ out + + ] = { ns . x , ns . y , col } ;
r - > edge_staging [ out + + ] = { nt . x , nt . y , col } ;
for ( int i = 0 ; i < graph . node_count ; + + i ) {
r - > node_pos_staging [ i * 2 + 0 ] = graph . nodes [ i ] . x ;
r - > node_pos_staging [ i * 2 + 1 ] = graph . nodes [ i ] . y ;
}
const size_t used_bytes = need_floats * sizeof ( float ) ;
if ( used_bytes > r - > node_pos_capacity ) {
r - > node_pos_capacity = grow_capacity ( r - > node_pos_capacity , used_bytes ,
4096 * 2 * sizeof ( float ) ) ;
}
glBindBuffer ( GL_TEXTURE_BUFFER , r - > node_pos_buf ) ;
// Orphan + subdata: misma estrategia que en 0049c, evita stall.
glBufferData ( GL_TEXTURE_BUFFER , ( GLsizeiptr ) r - > node_pos_capacity , nullptr , GL_STREAM_DRAW ) ;
glBufferSubData ( GL_TEXTURE_BUFFER , 0 , ( GLsizeiptr ) used_bytes , r - > node_pos_staging ) ;
// glTexBuffer ya esta vinculado al buffer en create — el view sigue
// valido tras orphan: GL_TEXTURE_BUFFER referencia al BO por nombre.
glBindBuffer ( GL_TEXTURE_BUFFER , 0 ) ;
tbo_ready = true ;
}
if ( out > 0 ) {
const size_t used_bytes = out * sizeof ( EdgeVertex ) ;
if ( used_bytes > r - > edge_vbo_capacity ) {
r - > edge_vbo_capacity = grow_capacity ( r - > edge_vbo_capacity , used_bytes ,
8192 * sizeof ( EdgeVertex ) ) ;
// ----------------------------------------------------------------
// Aristas via vertex pulling. El buffer estatico solo se reupload
// cuando el grafo cambia — detectamos con (puntero, count).
// ----------------------------------------------------------------
if ( tbo_ready & & graph . edge_count > 0 & & graph . edges ) {
const bool graph_changed =
! r - > edges_uploaded
| | r - > cached_edges_ptr ! = ( const void * ) graph . edges
| | r - > cached_edge_count ! = graph . edge_count ;
if ( graph_changed ) {
// (Re)build el buffer estatico. Skipeamos aristas con indices
// fuera de rango — pueden aparecer durante una recarga parcial
// del grafo y no queremos que el GPU lea fuera del TBO.
if ( ( size_t ) graph . edge_count > r - > edge_static_staging_cap ) {
size_t new_cap = grow_capacity ( r - > edge_static_staging_cap ,
( size_t ) graph . edge_count , 8192 ) ;
r - > edge_static_staging = ( EdgeStatic * ) realloc ( r - > edge_static_staging ,
new_cap * sizeof ( EdgeStatic ) ) ;
r - > edge_static_staging_cap = new_cap ;
}
size_t out = 0 ;
for ( int i = 0 ; i < graph . edge_count ; + + i ) {
const GraphEdge & e = graph . edges [ i ] ;
if ( e . source > = ( uint32_t ) graph . node_count ) continue ;
if ( e . target > = ( uint32_t ) graph . node_count ) continue ;
uint32_t col = e . color ! = 0 ? e . color
: pack_rgba8 ( 0x88 , 0x88 , 0x88 , 0xFF ) ;
r - > edge_static_staging [ out + + ] = { e . source , e . target , col , 0u } ;
}
if ( out > 0 ) {
const size_t used_bytes = out * sizeof ( EdgeStatic ) ;
if ( used_bytes > r - > edge_static_capacity ) {
r - > edge_static_capacity = grow_capacity ( r - > edge_static_capacity ,
used_bytes ,
8192 * sizeof ( EdgeStatic ) ) ;
}
glBindBuffer ( GL_ARRAY_BUFFER , r - > edge_vbo ) ;
glBufferData ( GL_ARRAY_BUFFER , ( GLsizeiptr ) r - > edge_static_capacity ,
nullptr , GL_STATIC_DRAW ) ;
glBufferSubData ( GL_ARRAY_BUFFER , 0 , ( GLsizeiptr ) used_bytes ,
r - > edge_static_staging ) ;
}
r - > cached_edges_ptr = ( const void * ) graph . edges ;
r - > cached_edge_count = graph . edge_count ;
r - > cached_edges_drawn = ( int ) out ;
r - > edges_uploaded = ( out > 0 ) ;
}
if ( r - > edges_uploaded ) {
glUseProgram ( r - > edge_shader ) ;
glUniform2f ( glGetUniformLocation ( r - > edge_shader , " u_viewport " ) ,
( float ) r - > width , ( float ) r - > height ) ;
glUniform1f ( glGetUniformLocation ( r - > edge_shader , " u_scale " ) , scale ) ;
glUniform2f ( glGetUniformLocation ( r - > edge_shader , " u_translate " ) , tx , ty ) ;
glUniform2f ( r - > edge_u_viewport_loc , ( float ) r - > width , ( float ) r - > height ) ;
glUniform1f ( r - > edge_u_scale_loc , scale ) ;
glUniform2f ( r - > edge_u_translate_loc , tx , ty ) ;
glUniform1f ( r - > edge_u_alpha_loc , r - > config . edge_alpha ) ;
// Bind TBO al sampler u_node_pos en la texture unit 0.
glActiveTexture ( GL_TEXTURE0 ) ;
glBindTexture ( GL_TEXTURE_BUFFER , r - > node_pos_tex ) ;
glUniform1i ( r - > edge_u_node_pos_loc , 0 ) ;
glLineWidth ( r - > config . edge_width ) ;
glBindVertexArray ( r - > edge_vao ) ;
glBindBuffer ( GL_ARRAY_BUFFER , r - > edge_vbo ) ;
// Orphan: descarta el contenido previo y damos al driver un
// buffer fresco con la capacidad reservada. Despues subimos
// solo los bytes realmente usados con BufferSubData — evitamos
// el sync stall y reutilizamos la asignacion entre frames
// mientras no crezca.
glBufferData ( GL_ARRAY_BUFFER , ( GLsizeiptr ) r - > edge_vbo_capacity , nullptr , GL_STREAM_DRAW ) ;
glBufferSubData ( GL_ARRAY_BUFFER , 0 , ( GLsizeiptr ) used_bytes , r - > edge_staging ) ;
glDrawArrays ( GL_LINES , 0 , ( GLsizei ) out ) ;
// Una "instancia" = 1 linea (2 vertices). gl_VertexID dentro
// de la instancia es 0 o 1 → elige endpoint source o target.
glDrawArraysInstanced ( GL_LINES , 0 , 2 , ( GLsizei ) r - > cached_edges_drawn ) ;
glBindVertexArray ( 0 ) ;
glBindTexture ( GL_TEXTURE_BUFFER , 0 ) ;
}
} else if ( graph . edge_count = = 0 ) {
// Si el caller borra todas las aristas, invalidamos el cache para
// que el siguiente upload reconstruya el buffer.
r - > edges_uploaded = false ;
}
// ----------------------------------------------------------------