Files
fn_registry/cpp/functions/viz/graph_renderer.cpp
T
egutierrez 02b4141cc1 perf(viz): graph_renderer Tier 1 (RGBA8 + orphan + frustum cull) + force_layout auto-pause helper
Issue 0049c. Tres optimizaciones internas en graph_renderer.cpp + un
helper puro en graph_force_layout para detectar convergencia. API publica
intacta — solo cambian el layout interno de los buffers, el shader y
los costes por frame.

1. RGBA8 color packing
   - El instance buffer de nodos pasa de (x,y,size,r,g,b,a) 28B a
     (x,y,size,color_u32) 16B (-43%). Aristas: 24B → 12B/vertex (-50%).
   - Shaders desempaquetan con bit shifts (compatible GL 3.30+, no
     necesita unpackUnorm4x8 que es 4.20+).
   - Helpers expuestos: pack_rgba8 / unpack_rgba8 / modulate_alpha_rgba8
     en graph_renderer.h. Los GraphNode.color y la paleta ya tenian el
     layout correcto (R en LSB), asi que CPU ahora pasa el uint32 directo
     sin convertir a 4 floats por nodo y por frame.

2. Capacity-tracked streaming buffers
   - Sustituye el doble glBufferData de antes por:
       glBufferData(NULL, capacity, STREAM_DRAW)   // orphan + reserva
       glBufferSubData(0, used_bytes, data)        // solo lo usado
   - capacity crece x2 cuando hace falta (inicial 4096 nodos /
     8192 vertices de aristas) → reallocaciones en O(log N).
   - Staging CPU (NodeInstance* / EdgeVertex*) reusado entre frames con
     realloc, no malloc/free per frame.

3. Frustum cull (CPU-side)
   - AABB del viewport en world coords con margen 10%.
   - Aristas: skip si AABB del segmento no intersecta el viewport.
   - Nodos: solo los visibles entran al instance buffer; visible_count
     es el N que pasa a glDrawArraysInstanced. Pop-in de borde mitigado
     por el margen.

4. graph_force_layout_should_pause(low_frames, min_consecutive)
   - Helper puro: el caller mantiene el contador, la funcion solo
     decide si parar. Reemplaza la rama inline en demos_graph.cpp.
   - Test Catch2 con secuencias artificiales.

Tests: test_graph_pack_rgba8 (16401 asserts, 4 cases — roundtrip exhaustivo
+ alpha modulation + clamp). test_graph_should_pause (3 cases, 14 asserts).
Los 29 tests del cpp/tests/ siguen verdes (incluido test_visual con goldens).

Bump versiones:
- graph_renderer 1.1.0 → 1.2.0
- graph_force_layout 1.0.0 → 1.1.0  (tested: true via should_pause test)
2026-04-29 22:17:13 +02:00

545 lines
20 KiB
C++

#include "viz/graph_renderer.h"
#include "viz/graph_types.h"
// gl_loader: en Linux es no-op (incluye GL headers con GL_GLEXT_PROTOTYPES);
// en Windows expone los punteros via #define gl* fn_gl* tras gl_loader_init().
#include "gfx/gl_loader.h"
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <cstddef>
#include <cmath>
#include <algorithm>
// ---------------------------------------------------------------------------
// Community palette (ABGR packed, 10 colors)
// ---------------------------------------------------------------------------
static const uint32_t k_palette[10] = {
0xFF4CAF50, // green
0xFFF44336, // red
0xFF2196F3, // blue
0xFFFF9800, // orange
0xFF9C27B0, // purple
0xFF00BCD4, // cyan
0xFFFFEB3B, // yellow
0xFFE91E63, // pink
0xFF795548, // brown
0xFF607D8B // blue-grey
};
// ---------------------------------------------------------------------------
// Per-instance / per-vertex data layouts
// ---------------------------------------------------------------------------
// Tier 1 packing: el color va como uint32 unico en lugar de 4 floats. Reduce
// el bandwidth de upload en 60% para nodos (28 → 16 bytes/instance) y 50%
// para aristas (24 → 12 bytes/vertex), y elimina la conversion ABGR→4floats
// en CPU (los uint32 ya tienen el layout de unpackUnorm4x8 en little-endian).
struct NodeInstance { // 16 bytes
float x, y; // world position
float size; // diameter
uint32_t color; // packed RGBA8
};
struct EdgeVertex { // 12 bytes
float x, y; // world position
uint32_t color; // packed RGBA8 (alpha ya pre-multiplicada por edge_alpha)
};
// ---------------------------------------------------------------------------
// Internal struct
// ---------------------------------------------------------------------------
struct GraphRenderer {
unsigned int fbo;
unsigned int texture;
unsigned int rbo; // depth/stencil renderbuffer
int width, height;
// Node rendering (instanced quads)
unsigned int node_vao, node_quad_vbo, node_instance_vbo;
unsigned int node_shader;
// Edge rendering (lines)
unsigned int edge_vao, edge_vbo;
unsigned int edge_shader;
// Streaming buffer capacities (in bytes). Grow x2 cuando used > capacity.
// Mantenemos el VBO orphaned con glBufferData(NULL, capacity) y luego
// hacemos glBufferSubData con los bytes realmente usados — evita el
// sync stall del driver y reduce las reallocaciones a O(log N).
size_t node_vbo_capacity;
size_t edge_vbo_capacity;
// CPU staging buffers — se reusan entre frames; crecen igual que el VBO.
NodeInstance* node_staging;
size_t node_staging_cap; // en NodeInstances, no bytes
EdgeVertex* edge_staging;
size_t edge_staging_cap; // en EdgeVertex
GraphRendererConfig config;
};
// ---------------------------------------------------------------------------
// Shader sources
// ---------------------------------------------------------------------------
// Node vertex shader — instanced unit quad
// a_color es uint32 packeado (R,G,B,A) — unpackUnorm4x8 esta en GLSL 4.20+,
// pero en core 3.30 lo hacemos manualmente con bit shifts. Eso mantiene
// compatibilidad con drivers que no exponen GL 4.x sin tener que tocar
// fn_framework.
static const char* k_node_vert = R"(
#version 330 core
// Quad corners [-0.5, 0.5]
layout(location = 0) in vec2 a_quad;
// Per-instance: world position, size, packed RGBA8 color.
layout(location = 1) in vec2 a_pos;
layout(location = 2) in float a_size;
layout(location = 3) in uint a_color;
out vec2 v_uv;
out vec4 v_color;
uniform vec2 u_viewport; // (width, height) in pixels
uniform float u_scale; // cam_zoom
uniform vec2 u_translate; // (tx, ty) in pixels
vec4 unpack_rgba8(uint c) {
return vec4(
float( c & 0xFFu),
float((c >> 8) & 0xFFu),
float((c >> 16) & 0xFFu),
float((c >> 24) & 0xFFu)
) * (1.0 / 255.0);
}
void main() {
vec2 screen = a_pos * u_scale + u_translate;
screen += a_quad * a_size * u_scale;
vec2 ndc = (screen / u_viewport) * 2.0 - 1.0;
ndc.y = -ndc.y;
gl_Position = vec4(ndc, 0.0, 1.0);
v_uv = a_quad + 0.5;
v_color = unpack_rgba8(a_color);
}
)";
// Node fragment shader — SDF circle with outline
static const char* k_node_frag = R"(
#version 330 core
in vec2 v_uv;
in vec4 v_color;
out vec4 frag_color;
uniform float u_outline_px; // outline width in uv units
uniform float u_node_px; // node diameter in pixels (= size * zoom)
void main() {
float dist = length(v_uv - 0.5);
float r = 0.5;
float fwidth_uv = 1.5 / max(u_node_px, 1.0);
float alpha = 1.0 - smoothstep(r - fwidth_uv, r, dist);
if (alpha < 0.001) discard;
float outline_uv = u_outline_px / max(u_node_px, 1.0);
float outline = smoothstep(r - outline_uv - fwidth_uv, r - outline_uv, dist);
vec3 fill = v_color.rgb;
vec3 outline_col = mix(fill, vec3(1.0), 0.6);
vec3 color = mix(fill, outline_col, outline);
frag_color = vec4(color, v_color.a * alpha);
}
)";
// Edge vertex shader (RGBA8 packed)
static const char* k_edge_vert = R"(
#version 330 core
layout(location = 0) in vec2 a_pos;
layout(location = 1) in uint a_color;
out vec4 v_color;
uniform vec2 u_viewport;
uniform float u_scale;
uniform vec2 u_translate;
vec4 unpack_rgba8(uint c) {
return vec4(
float( c & 0xFFu),
float((c >> 8) & 0xFFu),
float((c >> 16) & 0xFFu),
float((c >> 24) & 0xFFu)
) * (1.0 / 255.0);
}
void main() {
vec2 screen = a_pos * u_scale + u_translate;
vec2 ndc = (screen / u_viewport) * 2.0 - 1.0;
ndc.y = -ndc.y;
gl_Position = vec4(ndc, 0.0, 1.0);
v_color = unpack_rgba8(a_color);
}
)";
// Edge fragment shader
static const char* k_edge_frag = R"(
#version 330 core
in vec4 v_color;
out vec4 frag_color;
void main() {
frag_color = v_color;
}
)";
// ---------------------------------------------------------------------------
// Shader helpers
// ---------------------------------------------------------------------------
static unsigned int compile_shader(GLenum type, const char* src) {
unsigned int s = glCreateShader(type);
glShaderSource(s, 1, &src, nullptr);
glCompileShader(s);
int ok;
glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
if (!ok) {
char buf[512];
glGetShaderInfoLog(s, sizeof(buf), nullptr, buf);
fprintf(stderr, "[graph_renderer] shader compile error: %s\n", buf);
}
return s;
}
static unsigned int link_program(const char* vert_src, const char* frag_src) {
unsigned int vs = compile_shader(GL_VERTEX_SHADER, vert_src);
unsigned int fs = compile_shader(GL_FRAGMENT_SHADER, frag_src);
unsigned int prog = glCreateProgram();
glAttachShader(prog, vs);
glAttachShader(prog, fs);
glLinkProgram(prog);
int ok;
glGetProgramiv(prog, GL_LINK_STATUS, &ok);
if (!ok) {
char buf[512];
glGetProgramInfoLog(prog, sizeof(buf), nullptr, buf);
fprintf(stderr, "[graph_renderer] program link error: %s\n", buf);
}
glDeleteShader(vs);
glDeleteShader(fs);
return prog;
}
// ---------------------------------------------------------------------------
// FBO helpers
// ---------------------------------------------------------------------------
static void create_fbo(GraphRenderer* r) {
glGenTextures(1, &r->texture);
glBindTexture(GL_TEXTURE_2D, r->texture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, r->width, r->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glBindTexture(GL_TEXTURE_2D, 0);
glGenRenderbuffers(1, &r->rbo);
glBindRenderbuffer(GL_RENDERBUFFER, r->rbo);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, r->width, r->height);
glBindRenderbuffer(GL_RENDERBUFFER, 0);
glGenFramebuffers(1, &r->fbo);
glBindFramebuffer(GL_FRAMEBUFFER, r->fbo);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, r->texture, 0);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, r->rbo);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
static void destroy_fbo(GraphRenderer* r) {
glDeleteFramebuffers(1, &r->fbo);
glDeleteTextures(1, &r->texture);
glDeleteRenderbuffers(1, &r->rbo);
r->fbo = r->texture = r->rbo = 0;
}
// ---------------------------------------------------------------------------
// Capacity-tracked streaming helpers
// ---------------------------------------------------------------------------
// Doblar la capacidad cada vez que el upload supera el VBO. Asi las
// reallocaciones quedan en O(log N) en el peor caso y en >0 en el regimen
// estable. Capacidad inicial razonable: 4096 nodos / aristas (segun el .md
// del issue) — la primera llamada paga el redimensionado si hay mas.
static size_t grow_capacity(size_t current, size_t needed, size_t initial) {
size_t cap = current > 0 ? current : initial;
while (cap < needed) cap *= 2;
return cap;
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
GraphRenderer* graph_renderer_create(int width, int height, const GraphRendererConfig& config) {
GraphRenderer* r = new GraphRenderer();
r->width = width;
r->height = height;
r->config = config;
r->node_vbo_capacity = 0;
r->edge_vbo_capacity = 0;
r->node_staging = nullptr;
r->node_staging_cap = 0;
r->edge_staging = nullptr;
r->edge_staging_cap = 0;
// --- FBO ---
create_fbo(r);
// --- Node VAO ---
static const float quad_verts[8] = {
-0.5f, -0.5f,
0.5f, -0.5f,
-0.5f, 0.5f,
0.5f, 0.5f,
};
glGenVertexArrays(1, &r->node_vao);
glBindVertexArray(r->node_vao);
// Quad VBO (location 0)
glGenBuffers(1, &r->node_quad_vbo);
glBindBuffer(GL_ARRAY_BUFFER, r->node_quad_vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(quad_verts), quad_verts, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0);
// Instance VBO — layout: NodeInstance (x, y, size, color_u32)
glGenBuffers(1, &r->node_instance_vbo);
glBindBuffer(GL_ARRAY_BUFFER, r->node_instance_vbo);
glEnableVertexAttribArray(1); // pos (2 float)
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE,
sizeof(NodeInstance),
(void*)offsetof(NodeInstance, x));
glVertexAttribDivisor(1, 1);
glEnableVertexAttribArray(2); // size (1 float)
glVertexAttribPointer(2, 1, GL_FLOAT, GL_FALSE,
sizeof(NodeInstance),
(void*)offsetof(NodeInstance, size));
glVertexAttribDivisor(2, 1);
glEnableVertexAttribArray(3); // color (1 uint32) — IPointer, no normalizado
glVertexAttribIPointer(3, 1, GL_UNSIGNED_INT,
sizeof(NodeInstance),
(void*)offsetof(NodeInstance, color));
glVertexAttribDivisor(3, 1);
glBindVertexArray(0);
// --- Edge VAO ---
glGenVertexArrays(1, &r->edge_vao);
glBindVertexArray(r->edge_vao);
glGenBuffers(1, &r->edge_vbo);
glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo);
glEnableVertexAttribArray(0); // pos
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE,
sizeof(EdgeVertex),
(void*)offsetof(EdgeVertex, x));
glEnableVertexAttribArray(1); // color (uint32)
glVertexAttribIPointer(1, 1, GL_UNSIGNED_INT,
sizeof(EdgeVertex),
(void*)offsetof(EdgeVertex, color));
glBindVertexArray(0);
// --- Shaders ---
r->node_shader = link_program(k_node_vert, k_node_frag);
r->edge_shader = link_program(k_edge_vert, k_edge_frag);
return r;
}
void graph_renderer_destroy(GraphRenderer* r) {
if (!r) return;
destroy_fbo(r);
glDeleteVertexArrays(1, &r->node_vao);
glDeleteBuffers(1, &r->node_quad_vbo);
glDeleteBuffers(1, &r->node_instance_vbo);
glDeleteVertexArrays(1, &r->edge_vao);
glDeleteBuffers(1, &r->edge_vbo);
glDeleteProgram(r->node_shader);
glDeleteProgram(r->edge_shader);
free(r->node_staging);
free(r->edge_staging);
delete r;
}
void graph_renderer_resize(GraphRenderer* r, int width, int height) {
if (!r) return;
if (r->width == width && r->height == height) return;
r->width = width;
r->height = height;
destroy_fbo(r);
create_fbo(r);
}
unsigned int graph_renderer_draw(GraphRenderer* r, const GraphData& graph,
float cam_x, float cam_y, float cam_zoom) {
if (!r) return 0;
// --- Save GL state ---
GLint prev_fbo;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &prev_fbo);
GLint prev_viewport[4];
glGetIntegerv(GL_VIEWPORT, prev_viewport);
// --- Bind FBO ---
glBindFramebuffer(GL_FRAMEBUFFER, r->fbo);
glViewport(0, 0, r->width, r->height);
// Clear with bg_color (interpreted as RGBA8 packed — same memory layout)
uint8_t br, bg, bb, ba;
unpack_rgba8(r->config.bg_color, br, bg, bb, ba);
glClearColor(br / 255.0f, bg / 255.0f, bb / 255.0f, ba / 255.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
// View transform: world -> screen pixels
float scale = cam_zoom;
float tx = -cam_x * scale + (float)r->width * 0.5f;
float ty = -cam_y * scale + (float)r->height * 0.5f;
// Frustum cull AABB en world coords. Margen del 10% para que un nodo o
// arista a punto de entrar en pantalla no haga pop-in al moverse.
float half_w = ((float)r->width * 0.5f) / std::max(scale, 0.0001f);
float half_h = ((float)r->height * 0.5f) / std::max(scale, 0.0001f);
const float margin = 0.10f;
float vx0 = cam_x - half_w * (1.0f + margin);
float vx1 = cam_x + half_w * (1.0f + margin);
float vy0 = cam_y - half_h * (1.0f + margin);
float vy1 = cam_y + half_h * (1.0f + margin);
// ----------------------------------------------------------------
// Draw edges (frustum-culled)
// ----------------------------------------------------------------
if (graph.edge_count > 0 && graph.edges && graph.nodes) {
// Asegurar staging — capacidad maxima posible en este frame es
// edge_count * 2 vertices. La realidad post-cull suele ser mucho
// menor, pero reservamos para el peor caso y no realocamos por
// frame.
size_t need_verts = (size_t)graph.edge_count * 2;
if (need_verts > r->edge_staging_cap) {
size_t new_cap = grow_capacity(r->edge_staging_cap, need_verts, 8192);
r->edge_staging = (EdgeVertex*)realloc(r->edge_staging, new_cap * sizeof(EdgeVertex));
r->edge_staging_cap = new_cap;
}
size_t out = 0;
for (int i = 0; i < graph.edge_count; ++i) {
const GraphEdge& e = graph.edges[i];
if (e.source >= (uint32_t)graph.node_count) continue;
if (e.target >= (uint32_t)graph.node_count) continue;
const GraphNode& ns = graph.nodes[e.source];
const GraphNode& nt = graph.nodes[e.target];
// Frustum cull: AABB del segmento (con margen para edges casi
// tangentes al viewport). Si el AABB no intersecta el viewport,
// skip — la arista no contribuye a ningun pixel visible.
float ex0 = std::min(ns.x, nt.x);
float ex1 = std::max(ns.x, nt.x);
float ey0 = std::min(ns.y, nt.y);
float ey1 = std::max(ns.y, nt.y);
if (ex1 < vx0 || ex0 > vx1 || ey1 < vy0 || ey0 > vy1) continue;
uint32_t ecol = e.color != 0 ? e.color : pack_rgba8(0x88, 0x88, 0x88, 0xFF);
uint32_t col = modulate_alpha_rgba8(ecol, r->config.edge_alpha);
r->edge_staging[out++] = { ns.x, ns.y, col };
r->edge_staging[out++] = { nt.x, nt.y, col };
}
if (out > 0) {
const size_t used_bytes = out * sizeof(EdgeVertex);
if (used_bytes > r->edge_vbo_capacity) {
r->edge_vbo_capacity = grow_capacity(r->edge_vbo_capacity, used_bytes,
8192 * sizeof(EdgeVertex));
}
glUseProgram(r->edge_shader);
glUniform2f(glGetUniformLocation(r->edge_shader, "u_viewport"),
(float)r->width, (float)r->height);
glUniform1f(glGetUniformLocation(r->edge_shader, "u_scale"), scale);
glUniform2f(glGetUniformLocation(r->edge_shader, "u_translate"), tx, ty);
glLineWidth(r->config.edge_width);
glBindVertexArray(r->edge_vao);
glBindBuffer(GL_ARRAY_BUFFER, r->edge_vbo);
// Orphan: descarta el contenido previo y damos al driver un
// buffer fresco con la capacidad reservada. Despues subimos
// solo los bytes realmente usados con BufferSubData — evitamos
// el sync stall y reutilizamos la asignacion entre frames
// mientras no crezca.
glBufferData(GL_ARRAY_BUFFER, (GLsizeiptr)r->edge_vbo_capacity, nullptr, GL_STREAM_DRAW);
glBufferSubData(GL_ARRAY_BUFFER, 0, (GLsizeiptr)used_bytes, r->edge_staging);
glDrawArrays(GL_LINES, 0, (GLsizei)out);
glBindVertexArray(0);
}
}
// ----------------------------------------------------------------
// Draw nodes (instanced quads, frustum-culled)
// ----------------------------------------------------------------
if (graph.node_count > 0 && graph.nodes) {
if ((size_t)graph.node_count > r->node_staging_cap) {
size_t new_cap = grow_capacity(r->node_staging_cap, (size_t)graph.node_count, 4096);
r->node_staging = (NodeInstance*)realloc(r->node_staging, new_cap * sizeof(NodeInstance));
r->node_staging_cap = new_cap;
}
size_t visible = 0;
for (int i = 0; i < graph.node_count; ++i) {
const GraphNode& n = graph.nodes[i];
float sz = n.size > 0.0f ? n.size : 4.0f;
float half = sz * 0.5f;
// AABB del nodo: centro ± half. Skip si fuera del viewport.
if (n.x + half < vx0 || n.x - half > vx1) continue;
if (n.y + half < vy0 || n.y - half > vy1) continue;
uint32_t ncol = n.color != 0 ? n.color : k_palette[n.community % 10];
r->node_staging[visible++] = { n.x, n.y, sz, ncol };
}
if (visible > 0) {
const size_t used_bytes = visible * sizeof(NodeInstance);
if (used_bytes > r->node_vbo_capacity) {
r->node_vbo_capacity = grow_capacity(r->node_vbo_capacity, used_bytes,
4096 * sizeof(NodeInstance));
}
glUseProgram(r->node_shader);
glUniform2f(glGetUniformLocation(r->node_shader, "u_viewport"),
(float)r->width, (float)r->height);
glUniform1f(glGetUniformLocation(r->node_shader, "u_scale"), scale);
glUniform2f(glGetUniformLocation(r->node_shader, "u_translate"), tx, ty);
glUniform1f(glGetUniformLocation(r->node_shader, "u_outline_px"), r->config.node_outline);
glBindVertexArray(r->node_vao);
glBindBuffer(GL_ARRAY_BUFFER, r->node_instance_vbo);
glBufferData(GL_ARRAY_BUFFER, (GLsizeiptr)r->node_vbo_capacity, nullptr, GL_STREAM_DRAW);
glBufferSubData(GL_ARRAY_BUFFER, 0, (GLsizeiptr)used_bytes, r->node_staging);
float avg_px = 8.0f * scale; // estimacion para el AA del SDF
glUniform1f(glGetUniformLocation(r->node_shader, "u_node_px"), avg_px);
glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, (GLsizei)visible);
glBindVertexArray(0);
}
}
// --- Restore GL state ---
glDisable(GL_BLEND);
glBindFramebuffer(GL_FRAMEBUFFER, (GLuint)prev_fbo);
glViewport(prev_viewport[0], prev_viewport[1], prev_viewport[2], prev_viewport[3]);
return r->texture;
}