02b4141cc1
Issue 0049c. Tres optimizaciones internas en graph_renderer.cpp + un
helper puro en graph_force_layout para detectar convergencia. API publica
intacta — solo cambian el layout interno de los buffers, el shader y
los costes por frame.
1. RGBA8 color packing
- El instance buffer de nodos pasa de (x,y,size,r,g,b,a) 28B a
(x,y,size,color_u32) 16B (-43%). Aristas: 24B → 12B/vertex (-50%).
- Shaders desempaquetan con bit shifts (compatible GL 3.30+, no
necesita unpackUnorm4x8 que es 4.20+).
- Helpers expuestos: pack_rgba8 / unpack_rgba8 / modulate_alpha_rgba8
en graph_renderer.h. Los GraphNode.color y la paleta ya tenian el
layout correcto (R en LSB), asi que CPU ahora pasa el uint32 directo
sin convertir a 4 floats por nodo y por frame.
2. Capacity-tracked streaming buffers
- Sustituye el doble glBufferData de antes por:
glBufferData(NULL, capacity, STREAM_DRAW) // orphan + reserva
glBufferSubData(0, used_bytes, data) // solo lo usado
- capacity crece x2 cuando hace falta (inicial 4096 nodos /
8192 vertices de aristas) → reallocaciones en O(log N).
- Staging CPU (NodeInstance* / EdgeVertex*) reusado entre frames con
realloc, no malloc/free per frame.
3. Frustum cull (CPU-side)
- AABB del viewport en world coords con margen 10%.
- Aristas: skip si AABB del segmento no intersecta el viewport.
- Nodos: solo los visibles entran al instance buffer; visible_count
es el N que pasa a glDrawArraysInstanced. Pop-in de borde mitigado
por el margen.
4. graph_force_layout_should_pause(low_frames, min_consecutive)
- Helper puro: el caller mantiene el contador, la funcion solo
decide si parar. Reemplaza la rama inline en demos_graph.cpp.
- Test Catch2 con secuencias artificiales.
Tests: test_graph_pack_rgba8 (16401 asserts, 4 cases — roundtrip exhaustivo
+ alpha modulation + clamp). test_graph_should_pause (3 cases, 14 asserts).
Los 29 tests del cpp/tests/ siguen verdes (incluido test_visual con goldens).
Bump versiones:
- graph_renderer 1.1.0 → 1.2.0
- graph_force_layout 1.0.0 → 1.1.0 (tested: true via should_pause test)
383 lines
14 KiB
C++
383 lines
14 KiB
C++
#include "viz/graph_force_layout.h"
|
|
#include "viz/graph_types.h"
|
|
|
|
#include <cmath>
|
|
#include <cstdlib>
|
|
#include <algorithm>
|
|
#include <vector>
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Quadtree for Barnes-Hut approximation
|
|
// ---------------------------------------------------------------------------
|
|
|
|
struct QuadNode {
|
|
float cx, cy; // center of mass
|
|
float mass; // total mass (node count in subtree)
|
|
float x0, y0; // bounding box min
|
|
float x1, y1; // bounding box max
|
|
int children[4]; // NW=0, NE=1, SW=2, SE=3 (-1 = empty)
|
|
int body; // node index if leaf (-1 if internal)
|
|
};
|
|
|
|
// Pool dinamico — antes era un array static QuadNode[1<<20] (~48MB siempre
|
|
// reservados, tope rigido en ~250k nodos por la fan-out del subdivide).
|
|
// Ahora se redimensiona UNA VEZ al inicio de cada step segun el N del grafo
|
|
// (5*N + 1024 celdas como cota holgada para subdivisiones). Despues de eso
|
|
// quad_new solo incrementa quad_count, asi que las referencias QuadNode& que
|
|
// se mantienen vivas durante la construccion del arbol son seguras.
|
|
static std::vector<QuadNode> quad_pool;
|
|
static int quad_count = 0;
|
|
|
|
static int quad_new(float x0, float y0, float x1, float y1) {
|
|
if (quad_count >= (int)quad_pool.size()) return -1; // pool agotado
|
|
int idx = quad_count++;
|
|
QuadNode& q = quad_pool[idx];
|
|
q.cx = 0; q.cy = 0; q.mass = 0;
|
|
q.x0 = x0; q.y0 = y0; q.x1 = x1; q.y1 = y1;
|
|
q.children[0] = q.children[1] = q.children[2] = q.children[3] = -1;
|
|
q.body = -1;
|
|
return idx;
|
|
}
|
|
|
|
// Garantiza que el pool tenga al menos `need` celdas disponibles. Llamar
|
|
// ANTES de empezar a construir el arbol para evitar invalidar referencias
|
|
// QuadNode& durante quad_subdivide / quad_insert_body.
|
|
static void quad_pool_reserve(size_t need) {
|
|
if (quad_pool.size() < need) quad_pool.resize(need);
|
|
}
|
|
|
|
// Determine quadrant index for point (px,py) relative to cell midpoint.
|
|
// 0=NW, 1=NE, 2=SW, 3=SE
|
|
static int quad_child_idx(const QuadNode& q, float px, float py) {
|
|
float mx = (q.x0 + q.x1) * 0.5f;
|
|
float my = (q.y0 + q.y1) * 0.5f;
|
|
int xi = (px >= mx) ? 1 : 0;
|
|
int yi = (py >= my) ? 2 : 0;
|
|
return xi | yi;
|
|
}
|
|
|
|
// Subdivide cell qi into four children.
|
|
static void quad_subdivide(int qi) {
|
|
QuadNode& q = quad_pool[qi];
|
|
float mx = (q.x0 + q.x1) * 0.5f;
|
|
float my = (q.y0 + q.y1) * 0.5f;
|
|
// NW
|
|
quad_pool[qi].children[0] = quad_new(q.x0, q.y0, mx, my);
|
|
// NE
|
|
quad_pool[qi].children[1] = quad_new(mx, q.y0, q.x1, my);
|
|
// SW
|
|
quad_pool[qi].children[2] = quad_new(q.x0, my, mx, q.y1);
|
|
// SE
|
|
quad_pool[qi].children[3] = quad_new(mx, my, q.x1, q.y1);
|
|
}
|
|
|
|
// Insert body (node_idx at position nx,ny with mass nmass) into cell qi.
|
|
// Uses iterative descent to avoid stack overflow on deep trees.
|
|
static void quad_insert(int root, int node_idx, float nx, float ny, float nmass) {
|
|
int qi = root;
|
|
while (qi >= 0) {
|
|
QuadNode& q = quad_pool[qi];
|
|
// Update center of mass
|
|
float total = q.mass + nmass;
|
|
q.cx = (q.cx * q.mass + nx * nmass) / total;
|
|
q.cy = (q.cy * q.mass + ny * nmass) / total;
|
|
q.mass = total;
|
|
|
|
if (q.body == -1 && q.children[0] == -1) {
|
|
// Empty leaf: place body here
|
|
q.body = node_idx;
|
|
return;
|
|
}
|
|
|
|
if (q.body >= 0) {
|
|
// Leaf with existing body: subdivide, push existing body down
|
|
quad_subdivide(qi);
|
|
// Move old body into correct child (re-read q after subdivide since pool may shift)
|
|
QuadNode& qq = quad_pool[qi];
|
|
int old_body = qq.body;
|
|
float obx = /* we need positions */ 0, oby = 0;
|
|
// We store positions in the GraphData, pass via closure is not possible here.
|
|
// Instead we pass a pointer to positions alongside. We'll fix this by using
|
|
// a file-scope pointer set before each build.
|
|
(void)old_body; (void)obx; (void)oby;
|
|
// NOTE: positions accessed via file-scope g_nodes pointer below.
|
|
qq.body = -1;
|
|
}
|
|
|
|
int ci = quad_child_idx(quad_pool[qi], nx, ny);
|
|
qi = quad_pool[qi].children[ci];
|
|
}
|
|
}
|
|
|
|
// File-scope pointers set before each tree build (avoids passing them everywhere).
|
|
static const GraphNode* g_nodes = nullptr;
|
|
|
|
// Insert body knowing positions from g_nodes.
|
|
static void quad_insert_body(int qi, int node_idx) {
|
|
float nx = g_nodes[node_idx].x;
|
|
float ny = g_nodes[node_idx].y;
|
|
const float nmass = 1.0f;
|
|
|
|
while (qi >= 0) {
|
|
QuadNode& q = quad_pool[qi];
|
|
float total = q.mass + nmass;
|
|
q.cx = (q.cx * q.mass + nx * nmass) / total;
|
|
q.cy = (q.cy * q.mass + ny * nmass) / total;
|
|
q.mass = total;
|
|
|
|
if (q.body == -1 && q.children[0] == -1) {
|
|
// Empty leaf
|
|
q.body = node_idx;
|
|
return;
|
|
}
|
|
|
|
if (q.children[0] == -1) {
|
|
// Leaf occupied: subdivide and push existing body down
|
|
int old_body = q.body;
|
|
q.body = -1;
|
|
quad_subdivide(qi);
|
|
|
|
// Push old body into child
|
|
int old_ci = quad_child_idx(quad_pool[qi], g_nodes[old_body].x, g_nodes[old_body].y);
|
|
int old_child = quad_pool[qi].children[old_ci];
|
|
if (old_child >= 0) {
|
|
QuadNode& oc = quad_pool[old_child];
|
|
oc.cx = g_nodes[old_body].x;
|
|
oc.cy = g_nodes[old_body].y;
|
|
oc.mass = 1.0f;
|
|
oc.body = old_body;
|
|
}
|
|
}
|
|
|
|
int ci = quad_child_idx(quad_pool[qi], nx, ny);
|
|
qi = quad_pool[qi].children[ci];
|
|
}
|
|
}
|
|
|
|
// Compute Barnes-Hut repulsion force on node at (nx,ny) from subtree qi.
|
|
// Accumulates force into (fx, fy).
|
|
static void quad_force(int qi, float nx, float ny,
|
|
float theta, float repulsion, float min_dist,
|
|
float& fx, float& fy) {
|
|
// Stack en pila de la funcion: thread-safe (la version anterior con
|
|
// `static` se rompia bajo OpenMP). La profundidad de un quadtree con N
|
|
// bodies acotada por log4(N) ~= 10 niveles para N <= 1M, asi que 256
|
|
// entradas son holgadas para todos los pushes simultaneos.
|
|
int stack[256];
|
|
int top = 0;
|
|
stack[top++] = qi;
|
|
|
|
while (top > 0) {
|
|
int ci = stack[--top];
|
|
if (ci < 0) continue;
|
|
const QuadNode& q = quad_pool[ci];
|
|
if (q.mass == 0) continue;
|
|
|
|
float dx = q.cx - nx;
|
|
float dy = q.cy - ny;
|
|
float dist2 = dx * dx + dy * dy;
|
|
float dist = std::sqrt(dist2);
|
|
if (dist < min_dist) dist = min_dist;
|
|
|
|
// Cell size
|
|
float cell_size = q.x1 - q.x0;
|
|
|
|
// Use multipole approximation if far enough OR if leaf
|
|
bool is_leaf = (q.children[0] == -1);
|
|
if (is_leaf || (cell_size / dist) < theta) {
|
|
// Coulomb repulsion: F = repulsion * mass / dist^2
|
|
float force = repulsion * q.mass / (dist * dist);
|
|
fx -= force * dx / dist;
|
|
fy -= force * dy / dist;
|
|
} else {
|
|
// Push children
|
|
for (int k = 0; k < 4; ++k) {
|
|
if (q.children[k] >= 0)
|
|
stack[top++] = q.children[k];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Public API
|
|
// ---------------------------------------------------------------------------
|
|
|
|
float graph_force_layout_step(GraphData& graph, const ForceLayoutConfig& config) {
|
|
if (graph.node_count <= 0) return 0.0f;
|
|
|
|
// Temporary force accumulators (stack-allocated for small graphs, static for large)
|
|
static float* fx_buf = nullptr;
|
|
static float* fy_buf = nullptr;
|
|
static int buf_cap = 0;
|
|
|
|
if (graph.node_count > buf_cap) {
|
|
delete[] fx_buf;
|
|
delete[] fy_buf;
|
|
buf_cap = graph.node_count + 64;
|
|
fx_buf = new float[buf_cap];
|
|
fy_buf = new float[buf_cap];
|
|
}
|
|
|
|
float total_energy = 0.0f;
|
|
|
|
for (int iter = 0; iter < config.iterations; ++iter) {
|
|
// Zero forces
|
|
#pragma omp parallel for if(graph.node_count >= 1024) schedule(static)
|
|
for (int i = 0; i < graph.node_count; ++i) {
|
|
fx_buf[i] = 0.0f;
|
|
fy_buf[i] = 0.0f;
|
|
}
|
|
|
|
// ---- Build Barnes-Hut quadtree ----
|
|
// Compute bounding box of current positions
|
|
float bx0 = graph.nodes[0].x, bx1 = graph.nodes[0].x;
|
|
float by0 = graph.nodes[0].y, by1 = graph.nodes[0].y;
|
|
for (int i = 1; i < graph.node_count; ++i) {
|
|
float px = graph.nodes[i].x, py = graph.nodes[i].y;
|
|
if (px < bx0) bx0 = px; if (px > bx1) bx1 = px;
|
|
if (py < by0) by0 = py; if (py > by1) by1 = py;
|
|
}
|
|
// Add margin to avoid degeneracies
|
|
float margin = (bx1 - bx0 + by1 - by0) * 0.05f + 1.0f;
|
|
bx0 -= margin; bx1 += margin;
|
|
by0 -= margin; by1 += margin;
|
|
// Make it square
|
|
float side = std::max(bx1 - bx0, by1 - by0);
|
|
float cx = (bx0 + bx1) * 0.5f, cy = (by0 + by1) * 0.5f;
|
|
bx0 = cx - side * 0.5f; bx1 = cx + side * 0.5f;
|
|
by0 = cy - side * 0.5f; by1 = cy + side * 0.5f;
|
|
|
|
// Reserva el pool antes de construir: 5*N + 1024 es cota holgada
|
|
// para quadtrees de 2D (worst case ~4N celdas internas+hojas).
|
|
quad_pool_reserve((size_t)graph.node_count * 5 + 1024);
|
|
quad_count = 0;
|
|
g_nodes = graph.nodes;
|
|
int root = quad_new(bx0, by0, bx1, by1);
|
|
|
|
for (int i = 0; i < graph.node_count; ++i) {
|
|
quad_insert_body(root, i);
|
|
}
|
|
|
|
// ---- Repulsion via Barnes-Hut ----
|
|
// Cada iteracion lee del quadtree (read-only) y escribe en su propio
|
|
// slot de fx_buf/fy_buf — embarrassingly parallel. quad_force usa
|
|
// stack local en pila, asi que es thread-safe.
|
|
#pragma omp parallel for if(graph.node_count >= 1024) schedule(dynamic, 256)
|
|
for (int i = 0; i < graph.node_count; ++i) {
|
|
if (graph.nodes[i].pinned) continue;
|
|
quad_force(root,
|
|
graph.nodes[i].x, graph.nodes[i].y,
|
|
config.theta, config.repulsion, config.min_distance,
|
|
fx_buf[i], fy_buf[i]);
|
|
}
|
|
|
|
// ---- Attraction along edges (spring force) ----
|
|
for (int e = 0; e < graph.edge_count; ++e) {
|
|
const GraphEdge& edge = graph.edges[e];
|
|
int s = (int)edge.source;
|
|
int t = (int)edge.target;
|
|
if (s < 0 || s >= graph.node_count) continue;
|
|
if (t < 0 || t >= graph.node_count) continue;
|
|
|
|
float dx = graph.nodes[t].x - graph.nodes[s].x;
|
|
float dy = graph.nodes[t].y - graph.nodes[s].y;
|
|
float dist = std::sqrt(dx * dx + dy * dy);
|
|
if (dist < config.min_distance) dist = config.min_distance;
|
|
|
|
// F = k * dist * weight (Hooke: pulls toward equilibrium at 0)
|
|
float force = config.attraction * dist * edge.weight;
|
|
float fx_e = force * dx / dist;
|
|
float fy_e = force * dy / dist;
|
|
|
|
if (!graph.nodes[s].pinned) { fx_buf[s] += fx_e; fy_buf[s] += fy_e; }
|
|
if (!graph.nodes[t].pinned) { fx_buf[t] -= fx_e; fy_buf[t] -= fy_e; }
|
|
}
|
|
|
|
// ---- Gravity toward center (0,0) ----
|
|
if (config.gravity != 0.0f) {
|
|
#pragma omp parallel for if(graph.node_count >= 1024) schedule(static)
|
|
for (int i = 0; i < graph.node_count; ++i) {
|
|
if (graph.nodes[i].pinned) continue;
|
|
fx_buf[i] -= config.gravity * graph.nodes[i].x;
|
|
fy_buf[i] -= config.gravity * graph.nodes[i].y;
|
|
}
|
|
}
|
|
|
|
// ---- Integrate: v = v * damping + F; pos += v ----
|
|
total_energy = 0.0f;
|
|
#pragma omp parallel for if(graph.node_count >= 1024) schedule(static) reduction(+:total_energy)
|
|
for (int i = 0; i < graph.node_count; ++i) {
|
|
GraphNode& n = graph.nodes[i];
|
|
if (n.pinned) continue;
|
|
|
|
n.vx = n.vx * config.damping + fx_buf[i];
|
|
n.vy = n.vy * config.damping + fy_buf[i];
|
|
|
|
// Clamp velocity
|
|
n.vx = std::max(-config.max_velocity, std::min(config.max_velocity, n.vx));
|
|
n.vy = std::max(-config.max_velocity, std::min(config.max_velocity, n.vy));
|
|
|
|
n.x += n.vx;
|
|
n.y += n.vy;
|
|
|
|
total_energy += n.vx * n.vx + n.vy * n.vy;
|
|
}
|
|
}
|
|
|
|
graph.update_bounds();
|
|
return total_energy;
|
|
}
|
|
|
|
void graph_force_layout_reset(GraphData& graph, float spread) {
|
|
for (int i = 0; i < graph.node_count; ++i) {
|
|
GraphNode& n = graph.nodes[i];
|
|
if (n.pinned) continue;
|
|
// rand() produces [0, RAND_MAX]; map to [-spread, spread]
|
|
n.x = spread * (2.0f * (float)rand() / (float)RAND_MAX - 1.0f);
|
|
n.y = spread * (2.0f * (float)rand() / (float)RAND_MAX - 1.0f);
|
|
n.vx = 0.0f;
|
|
n.vy = 0.0f;
|
|
}
|
|
graph.update_bounds();
|
|
}
|
|
|
|
void graph_layout_circular(GraphData& graph, float radius) {
|
|
if (graph.node_count <= 0) return;
|
|
const float two_pi = 6.28318530718f;
|
|
for (int i = 0; i < graph.node_count; ++i) {
|
|
GraphNode& n = graph.nodes[i];
|
|
if (n.pinned) continue;
|
|
float angle = two_pi * (float)i / (float)graph.node_count;
|
|
n.x = radius * std::cos(angle);
|
|
n.y = radius * std::sin(angle);
|
|
n.vx = 0.0f;
|
|
n.vy = 0.0f;
|
|
}
|
|
graph.update_bounds();
|
|
}
|
|
|
|
bool graph_force_layout_should_pause(int consecutive_low_frames, int min_consecutive) {
|
|
if (min_consecutive <= 0) return true;
|
|
return consecutive_low_frames >= min_consecutive;
|
|
}
|
|
|
|
void graph_layout_grid(GraphData& graph, float spacing) {
|
|
if (graph.node_count <= 0) return;
|
|
int cols = (int)std::ceil(std::sqrt((float)graph.node_count));
|
|
int rows = (graph.node_count + cols - 1) / cols;
|
|
float ox = -0.5f * (cols - 1) * spacing;
|
|
float oy = -0.5f * (rows - 1) * spacing;
|
|
for (int i = 0; i < graph.node_count; ++i) {
|
|
GraphNode& n = graph.nodes[i];
|
|
if (n.pinned) continue;
|
|
int col = i % cols;
|
|
int row = i / cols;
|
|
n.x = ox + col * spacing;
|
|
n.y = oy + row * spacing;
|
|
n.vx = 0.0f;
|
|
n.vy = 0.0f;
|
|
}
|
|
graph.update_bounds();
|
|
}
|