fn_registry/cpp/functions/viz/graph_force_layout.cpp

#include "viz/graph_force_layout.h"
#include "viz/graph_types.h"

#include <cmath>
#include <cstdlib>
#include <algorithm>
#include <vector>

// ---------------------------------------------------------------------------
// Quadtree for Barnes-Hut approximation
// ---------------------------------------------------------------------------

struct QuadNode {
    float cx, cy;       // center of mass
    float mass;         // total mass (node count in subtree)
    float x0, y0;       // bounding box min
    float x1, y1;       // bounding box max
    int children[4];    // NW=0, NE=1, SW=2, SE=3  (-1 = empty)
    int body;           // node index if leaf (-1 if internal)
};

// Pool dinamico — antes era un array static QuadNode[1<<20] (~48MB siempre
// reservados, tope rigido en ~250k nodos por la fan-out del subdivide).
// Ahora se redimensiona UNA VEZ al inicio de cada step segun el N del grafo
// (5*N + 1024 celdas como cota holgada para subdivisiones). Despues de eso
// quad_new solo incrementa quad_count, asi que las referencias QuadNode& que
// se mantienen vivas durante la construccion del arbol son seguras.
static std::vector<QuadNode> quad_pool;
static int quad_count = 0;

static int quad_new(float x0, float y0, float x1, float y1) {
    if (quad_count >= (int)quad_pool.size()) return -1; // pool agotado
    int idx = quad_count++;
    QuadNode& q = quad_pool[idx];
    q.cx = 0; q.cy = 0; q.mass = 0;
    q.x0 = x0; q.y0 = y0; q.x1 = x1; q.y1 = y1;
    q.children[0] = q.children[1] = q.children[2] = q.children[3] = -1;
    q.body = -1;
    return idx;
}

// Garantiza que el pool tenga al menos `need` celdas disponibles. Llamar
// ANTES de empezar a construir el arbol para evitar invalidar referencias
// QuadNode& durante quad_subdivide / quad_insert_body.
static void quad_pool_reserve(size_t need) {
    if (quad_pool.size() < need) quad_pool.resize(need);
}

// Determine quadrant index for point (px,py) relative to cell midpoint.
// 0=NW, 1=NE, 2=SW, 3=SE
static int quad_child_idx(const QuadNode& q, float px, float py) {
    float mx = (q.x0 + q.x1) * 0.5f;
    float my = (q.y0 + q.y1) * 0.5f;
    int xi = (px >= mx) ? 1 : 0;
    int yi = (py >= my) ? 2 : 0;
    return xi | yi;
}

// Subdivide cell qi into four children.
static void quad_subdivide(int qi) {
    QuadNode& q = quad_pool[qi];
    float mx = (q.x0 + q.x1) * 0.5f;
    float my = (q.y0 + q.y1) * 0.5f;
    // NW
    quad_pool[qi].children[0] = quad_new(q.x0, q.y0, mx, my);
    // NE
    quad_pool[qi].children[1] = quad_new(mx, q.y0, q.x1, my);
    // SW
    quad_pool[qi].children[2] = quad_new(q.x0, my, mx, q.y1);
    // SE
    quad_pool[qi].children[3] = quad_new(mx, my, q.x1, q.y1);
}

// Insert body (node_idx at position nx,ny with mass nmass) into cell qi.
// Uses iterative descent to avoid stack overflow on deep trees.
static void quad_insert(int root, int node_idx, float nx, float ny, float nmass) {
    int qi = root;
    while (qi >= 0) {
        QuadNode& q = quad_pool[qi];
        // Update center of mass
        float total = q.mass + nmass;
        q.cx = (q.cx * q.mass + nx * nmass) / total;
        q.cy = (q.cy * q.mass + ny * nmass) / total;
        q.mass = total;

        if (q.body == -1 && q.children[0] == -1) {
            // Empty leaf: place body here
            q.body = node_idx;
            return;
        }

        if (q.body >= 0) {
            // Leaf with existing body: subdivide, push existing body down
            quad_subdivide(qi);
            // Move old body into correct child (re-read q after subdivide since pool may shift)
            QuadNode& qq = quad_pool[qi];
            int old_body = qq.body;
            float obx = /* we need positions */ 0, oby = 0;
            // We store positions in the GraphData, pass via closure is not possible here.
            // Instead we pass a pointer to positions alongside. We'll fix this by using
            // a file-scope pointer set before each build.
            (void)old_body; (void)obx; (void)oby;
            // NOTE: positions accessed via file-scope g_nodes pointer below.
            qq.body = -1;
        }

        int ci = quad_child_idx(quad_pool[qi], nx, ny);
        qi = quad_pool[qi].children[ci];
    }
}

// File-scope pointers set before each tree build (avoids passing them everywhere).
static const GraphNode* g_nodes = nullptr;

// Insert body knowing positions from g_nodes.
static void quad_insert_body(int qi, int node_idx) {
    float nx = g_nodes[node_idx].x;
    float ny = g_nodes[node_idx].y;
    const float nmass = 1.0f;

    while (qi >= 0) {
        QuadNode& q = quad_pool[qi];
        float total = q.mass + nmass;
        q.cx = (q.cx * q.mass + nx * nmass) / total;
        q.cy = (q.cy * q.mass + ny * nmass) / total;
        q.mass = total;

        if (q.body == -1 && q.children[0] == -1) {
            // Empty leaf
            q.body = node_idx;
            return;
        }

        if (q.children[0] == -1) {
            // Leaf occupied: subdivide and push existing body down
            int old_body = q.body;
            q.body = -1;
            quad_subdivide(qi);

            // Push old body into child
            int old_ci = quad_child_idx(quad_pool[qi], g_nodes[old_body].x, g_nodes[old_body].y);
            int old_child = quad_pool[qi].children[old_ci];
            if (old_child >= 0) {
                QuadNode& oc = quad_pool[old_child];
                oc.cx = g_nodes[old_body].x;
                oc.cy = g_nodes[old_body].y;
                oc.mass = 1.0f;
                oc.body = old_body;
            }
        }

        int ci = quad_child_idx(quad_pool[qi], nx, ny);
        qi = quad_pool[qi].children[ci];
    }
}

// Compute Barnes-Hut repulsion force on node at (nx,ny) from subtree qi.
// Accumulates force into (fx, fy).
static void quad_force(int qi, float nx, float ny,
                       float theta, float repulsion, float min_dist,
                       float& fx, float& fy) {
    // Stack en pila de la funcion: thread-safe (la version anterior con
    // `static` se rompia bajo OpenMP). La profundidad de un quadtree con N
    // bodies acotada por log4(N) ~= 10 niveles para N <= 1M, asi que 256
    // entradas son holgadas para todos los pushes simultaneos.
    int stack[256];
    int top = 0;
    stack[top++] = qi;

    while (top > 0) {
        int ci = stack[--top];
        if (ci < 0) continue;
        const QuadNode& q = quad_pool[ci];
        if (q.mass == 0) continue;

        float dx = q.cx - nx;
        float dy = q.cy - ny;
        float dist2 = dx * dx + dy * dy;
        float dist = std::sqrt(dist2);
        if (dist < min_dist) dist = min_dist;

        // Cell size
        float cell_size = q.x1 - q.x0;

        // Use multipole approximation if far enough OR if leaf
        bool is_leaf = (q.children[0] == -1);
        if (is_leaf || (cell_size / dist) < theta) {
            // Coulomb repulsion: F = repulsion * mass / dist^2
            float force = repulsion * q.mass / (dist * dist);
            fx -= force * dx / dist;
            fy -= force * dy / dist;
        } else {
            // Push children
            for (int k = 0; k < 4; ++k) {
                if (q.children[k] >= 0)
                    stack[top++] = q.children[k];
            }
        }
    }
}

// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------

float graph_force_layout_step(GraphData& graph, const ForceLayoutConfig& config) {
    if (graph.node_count <= 0) return 0.0f;

    // Temporary force accumulators (stack-allocated for small graphs, static for large)
    static float* fx_buf = nullptr;
    static float* fy_buf = nullptr;
    static int    buf_cap = 0;

    if (graph.node_count > buf_cap) {
        delete[] fx_buf;
        delete[] fy_buf;
        buf_cap = graph.node_count + 64;
        fx_buf = new float[buf_cap];
        fy_buf = new float[buf_cap];
    }

    float total_energy = 0.0f;

    for (int iter = 0; iter < config.iterations; ++iter) {
        // Zero forces
        #pragma omp parallel for if(graph.node_count >= 1024) schedule(static)
        for (int i = 0; i < graph.node_count; ++i) {
            fx_buf[i] = 0.0f;
            fy_buf[i] = 0.0f;
        }

        // ---- Build Barnes-Hut quadtree ----
        // Compute bounding box of current positions
        float bx0 = graph.nodes[0].x, bx1 = graph.nodes[0].x;
        float by0 = graph.nodes[0].y, by1 = graph.nodes[0].y;
        for (int i = 1; i < graph.node_count; ++i) {
            float px = graph.nodes[i].x, py = graph.nodes[i].y;
            if (px < bx0) bx0 = px; if (px > bx1) bx1 = px;
            if (py < by0) by0 = py; if (py > by1) by1 = py;
        }
        // Add margin to avoid degeneracies
        float margin = (bx1 - bx0 + by1 - by0) * 0.05f + 1.0f;
        bx0 -= margin; bx1 += margin;
        by0 -= margin; by1 += margin;
        // Make it square
        float side = std::max(bx1 - bx0, by1 - by0);
        float cx = (bx0 + bx1) * 0.5f, cy = (by0 + by1) * 0.5f;
        bx0 = cx - side * 0.5f; bx1 = cx + side * 0.5f;
        by0 = cy - side * 0.5f; by1 = cy + side * 0.5f;

        // Reserva el pool antes de construir: 5*N + 1024 es cota holgada
        // para quadtrees de 2D (worst case ~4N celdas internas+hojas).
        quad_pool_reserve((size_t)graph.node_count * 5 + 1024);
        quad_count = 0;
        g_nodes = graph.nodes;
        int root = quad_new(bx0, by0, bx1, by1);

        for (int i = 0; i < graph.node_count; ++i) {
            quad_insert_body(root, i);
        }

        // ---- Repulsion via Barnes-Hut ----
        // Cada iteracion lee del quadtree (read-only) y escribe en su propio
        // slot de fx_buf/fy_buf — embarrassingly parallel. quad_force usa
        // stack local en pila, asi que es thread-safe.
        #pragma omp parallel for if(graph.node_count >= 1024) schedule(dynamic, 256)
        for (int i = 0; i < graph.node_count; ++i) {
            if (graph.nodes[i].pinned) continue;
            quad_force(root,
                       graph.nodes[i].x, graph.nodes[i].y,
                       config.theta, config.repulsion, config.min_distance,
                       fx_buf[i], fy_buf[i]);
        }

        // ---- Attraction along edges (spring force) ----
        for (int e = 0; e < graph.edge_count; ++e) {
            const GraphEdge& edge = graph.edges[e];
            int s = (int)edge.source;
            int t = (int)edge.target;
            if (s < 0 || s >= graph.node_count) continue;
            if (t < 0 || t >= graph.node_count) continue;

            float dx = graph.nodes[t].x - graph.nodes[s].x;
            float dy = graph.nodes[t].y - graph.nodes[s].y;
            float dist = std::sqrt(dx * dx + dy * dy);
            if (dist < config.min_distance) dist = config.min_distance;

            // F = k * dist * weight  (Hooke: pulls toward equilibrium at 0)
            float force = config.attraction * dist * edge.weight;
            float fx_e = force * dx / dist;
            float fy_e = force * dy / dist;

            if (!graph.nodes[s].pinned) { fx_buf[s] += fx_e; fy_buf[s] += fy_e; }
            if (!graph.nodes[t].pinned) { fx_buf[t] -= fx_e; fy_buf[t] -= fy_e; }
        }

        // ---- Gravity toward center (0,0) ----
        if (config.gravity != 0.0f) {
            #pragma omp parallel for if(graph.node_count >= 1024) schedule(static)
            for (int i = 0; i < graph.node_count; ++i) {
                if (graph.nodes[i].pinned) continue;
                fx_buf[i] -= config.gravity * graph.nodes[i].x;
                fy_buf[i] -= config.gravity * graph.nodes[i].y;
            }
        }

        // ---- Integrate: v = v * damping + F; pos += v ----
        total_energy = 0.0f;
        #pragma omp parallel for if(graph.node_count >= 1024) schedule(static) reduction(+:total_energy)
        for (int i = 0; i < graph.node_count; ++i) {
            GraphNode& n = graph.nodes[i];
            if (n.pinned) continue;

            n.vx = n.vx * config.damping + fx_buf[i];
            n.vy = n.vy * config.damping + fy_buf[i];

            // Clamp velocity
            n.vx = std::max(-config.max_velocity, std::min(config.max_velocity, n.vx));
            n.vy = std::max(-config.max_velocity, std::min(config.max_velocity, n.vy));

            n.x += n.vx;
            n.y += n.vy;

            total_energy += n.vx * n.vx + n.vy * n.vy;
        }
    }

    graph.update_bounds();
    return total_energy;
}

void graph_force_layout_reset(GraphData& graph, float spread) {
    for (int i = 0; i < graph.node_count; ++i) {
        GraphNode& n = graph.nodes[i];
        if (n.pinned) continue;
        // rand() produces [0, RAND_MAX]; map to [-spread, spread]
        n.x = spread * (2.0f * (float)rand() / (float)RAND_MAX - 1.0f);
        n.y = spread * (2.0f * (float)rand() / (float)RAND_MAX - 1.0f);
        n.vx = 0.0f;
        n.vy = 0.0f;
    }
    graph.update_bounds();
}

void graph_layout_circular(GraphData& graph, float radius) {
    if (graph.node_count <= 0) return;
    const float two_pi = 6.28318530718f;
    for (int i = 0; i < graph.node_count; ++i) {
        GraphNode& n = graph.nodes[i];
        if (n.pinned) continue;
        float angle = two_pi * (float)i / (float)graph.node_count;
        n.x = radius * std::cos(angle);
        n.y = radius * std::sin(angle);
        n.vx = 0.0f;
        n.vy = 0.0f;
    }
    graph.update_bounds();
}

bool graph_force_layout_should_pause(int consecutive_low_frames, int min_consecutive) {
    if (min_consecutive <= 0) return true;
    return consecutive_low_frames >= min_consecutive;
}

void graph_layout_grid(GraphData& graph, float spacing) {
    if (graph.node_count <= 0) return;
    int cols = (int)std::ceil(std::sqrt((float)graph.node_count));
    int rows = (graph.node_count + cols - 1) / cols;
    float ox = -0.5f * (cols - 1) * spacing;
    float oy = -0.5f * (rows - 1) * spacing;
    for (int i = 0; i < graph.node_count; ++i) {
        GraphNode& n = graph.nodes[i];
        if (n.pinned) continue;
        int col = i % cols;
        int row = i / cols;
        n.x = ox + col * spacing;
        n.y = oy + row * spacing;
        n.vx = 0.0f;
        n.vy = 0.0f;
    }
    graph.update_bounds();
}