c74fd4ae0d
Stack base de compute shaders OpenGL 4.3 para cargas Monte Carlo intensivas en GPU. Reutiliza el patron de graph_force_layout_gpu (SSBO + compute) y se integra con el resto del registry sin nuevos simbolos en gl_loader (todo lo que se necesita ya estaba expuesto). - gpu_ssbo: lifecycle de Shader Storage Buffer Objects. - gpu_compute_program: compila compute GLSL 4.3 con preamble inyectable (mismo pattern de gl_shader::compile_fragment). - gpu_dispatch: dispatch_1d/2d/3d con ceil(N/local) automatico + barrier helpers (storage, uniform, image, buffer_update, all). - gpu_rng_glsl: PCG32 GLSL (uniform/normal/below) + SplitMix64 seed walkers para sembrar deterministicamente N walkers desde un master seed. - gpu_histogram_1d: SSBO float[N] -> uint[nbins] via atomicAdd. - gpu_histogram_2d: SSBO float[2N] xy-interleaved -> uint[nx*ny] + to_density helper para alimentar heatmap_cpp_viz. - gpu_reduce: workgroup-shared sum/min/max/mean (local 256, partials CPU). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
83 lines
3.2 KiB
C++
83 lines
3.2 KiB
C++
#include "gfx/gpu_rng_glsl.h"
|
|
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
|
|
namespace fn::gfx {
|
|
|
|
std::string glsl_rng_preamble(int seed_binding) {
|
|
char header[256];
|
|
std::snprintf(header, sizeof(header),
|
|
"layout(std430, binding = %d) buffer RngSeeds { uint rng_seeds[]; };\n",
|
|
seed_binding);
|
|
|
|
// PCG32 (M.E. O'Neill, 2014). Buena calidad estadistica con state de
|
|
// 32 bits — suficiente para Monte Carlo no criptografico. Periodo 2^32
|
|
// por chain; con N chains independientes el periodo agregado es enorme.
|
|
//
|
|
// rng_uniform: float(state) * 2^-32 = [0, 1).
|
|
// Nota: float tiene 24 bits de mantisa => ~1/2^23 spacing, OK para MC.
|
|
//
|
|
// rng_normal: Box-Muller polar. Descarta una de las dos normales
|
|
// generadas en cada paso (suficiente; si se necesita perf extra se
|
|
// puede cachear u2 entre llamadas con un flag por thread).
|
|
//
|
|
// rng_below: rejection sampling con mascara de potencia-de-2 superior.
|
|
// Sesgo despreciable comparado con (state % n).
|
|
static const char* body =
|
|
"uint pcg32(inout uint state) {\n"
|
|
" state = state * 747796405u + 2891336453u;\n"
|
|
" uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;\n"
|
|
" return (word >> 22u) ^ word;\n"
|
|
"}\n"
|
|
"float rng_uniform(inout uint state) {\n"
|
|
" return float(pcg32(state)) * (1.0 / 4294967296.0);\n"
|
|
"}\n"
|
|
"float rng_normal(inout uint state) {\n"
|
|
" float u1 = max(rng_uniform(state), 1e-7);\n"
|
|
" float u2 = rng_uniform(state);\n"
|
|
" return sqrt(-2.0 * log(u1)) * cos(6.28318530717958647692 * u2);\n"
|
|
"}\n"
|
|
"uint rng_below(inout uint state, uint n) {\n"
|
|
" if (n == 0u) return 0u;\n"
|
|
" uint mask = n - 1u;\n"
|
|
" mask |= mask >> 1u; mask |= mask >> 2u; mask |= mask >> 4u;\n"
|
|
" mask |= mask >> 8u; mask |= mask >> 16u;\n"
|
|
" for (int k = 0; k < 16; ++k) {\n"
|
|
" uint v = pcg32(state) & mask;\n"
|
|
" if (v < n) return v;\n"
|
|
" }\n"
|
|
" return pcg32(state) % n;\n"
|
|
"}\n";
|
|
|
|
std::string out;
|
|
out.reserve(1024);
|
|
out += header;
|
|
out += body;
|
|
return out;
|
|
}
|
|
|
|
void seed_walkers_init(unsigned long long master_seed,
|
|
unsigned int* out, int count) {
|
|
if (out == nullptr || count <= 0) return;
|
|
// SplitMix64 — pequeno PRNG que genera bien-distribuidos uint64 a partir
|
|
// de cualquier seed. Lo usamos como "stream" de seeds independientes
|
|
// para PCG32. Si el master_seed es 0, lo sustituimos por la constante
|
|
// de Knuth para evitar que SplitMix arranque desde un estado degenerado.
|
|
std::uint64_t state = master_seed;
|
|
if (state == 0ULL) state = 0x9E3779B97F4A7C15ULL;
|
|
|
|
for (int i = 0; i < count; ++i) {
|
|
state += 0x9E3779B97F4A7C15ULL;
|
|
std::uint64_t z = state;
|
|
z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL;
|
|
z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL;
|
|
z = z ^ (z >> 31);
|
|
std::uint32_t s = static_cast<std::uint32_t>(z & 0xFFFFFFFFULL);
|
|
if (s == 0u) s = 0x9E3779B9u; // PCG state must be non-zero
|
|
out[i] = s;
|
|
}
|
|
}
|
|
|
|
} // namespace fn::gfx
|