#include "gfx/gpu_histogram_2d.h" #include "gfx/gl_loader.h" #include "gfx/gpu_compute_program.h" #include "gfx/gpu_dispatch.h" #include #include namespace fn::gfx { // Samples se almacenan como float[2*count] xy-interleaved, accediendo via // vec2(samples[2*i], samples[2*i+1]). Esto evita preocuparse por el padding // de vec2 en std430 (que en GL es 8 bytes, ok, pero al pasar por CPU // flotantes sueltos es mas portable). static const char* k_accum_body = R"glsl( layout(std430, binding = 0) readonly buffer Samples { float samples[]; }; layout(std430, binding = 1) coherent buffer Bins { uint bins[]; }; uniform uint u_count; uniform uint u_nx; uniform uint u_ny; uniform vec2 u_min; // (xmin, ymin) uniform vec2 u_inv_range; // (1/xrange, 1/yrange) void main() { uint i = gl_GlobalInvocationID.x; if (i >= u_count) return; float x = samples[2u * i + 0u]; float y = samples[2u * i + 1u]; float tx = (x - u_min.x) * u_inv_range.x; float ty = (y - u_min.y) * u_inv_range.y; if (tx < 0.0 || tx >= 1.0 || ty < 0.0 || ty >= 1.0) return; uint bx = uint(tx * float(u_nx)); uint by = uint(ty * float(u_ny)); if (bx >= u_nx) bx = u_nx - 1u; if (by >= u_ny) by = u_ny - 1u; atomicAdd(bins[by * u_nx + bx], 1u); } )glsl"; GpuHistogram2D gpu_histogram_2d_create(int nx, int ny) { GpuHistogram2D h{}; if (nx <= 0 || ny <= 0) return h; auto r = compile_compute(k_accum_body, 64, ""); if (!r.ok) { std::fprintf(stderr, "[gpu_histogram_2d] compile error: %s\n", r.err_msg.c_str()); return h; } h.program = r.program; h.loc_count = static_cast(glGetUniformLocation(h.program, "u_count")); h.loc_nx = static_cast(glGetUniformLocation(h.program, "u_nx")); h.loc_ny = static_cast(glGetUniformLocation(h.program, "u_ny")); h.loc_min = static_cast(glGetUniformLocation(h.program, "u_min")); h.loc_inv_range = static_cast(glGetUniformLocation(h.program, "u_inv_range")); h.nx = nx; h.ny = ny; h.bins = ssbo_create(static_cast(nx) * static_cast(ny) * sizeof(unsigned int), nullptr, GL_DYNAMIC_COPY); gpu_histogram_2d_clear(h); return h; } void gpu_histogram_2d_clear(GpuHistogram2D& h) { if (h.bins.id == 0) return; std::size_t total = static_cast(h.nx) * static_cast(h.ny); std::vector zeros(total, 0u); ssbo_upload(h.bins, 0, total * sizeof(unsigned int), zeros.data()); } void gpu_histogram_2d_accumulate(GpuHistogram2D& h, const Ssbo& samples_xy, int count, float xmin, float xmax, float ymin, float ymax) { if (h.program == 0 || count <= 0) return; float xr = xmax - xmin; float yr = ymax - ymin; if (xr <= 0.0f || yr <= 0.0f) return; glUseProgram(h.program); ssbo_bind(samples_xy, 0); ssbo_bind(h.bins, 1); glUniform1ui(static_cast(h.loc_count), static_cast(count)); glUniform1ui(static_cast(h.loc_nx), static_cast(h.nx)); glUniform1ui(static_cast(h.loc_ny), static_cast(h.ny)); glUniform2f(static_cast(h.loc_min), xmin, ymin); glUniform2f(static_cast(h.loc_inv_range), 1.0f / xr, 1.0f / yr); dispatch_1d(count, 64); barrier_storage(); } void gpu_histogram_2d_readback(const GpuHistogram2D& h, unsigned int* out) { if (h.bins.id == 0 || out == nullptr) return; barrier_buffer_update(); std::size_t total = static_cast(h.nx) * static_cast(h.ny); ssbo_readback(h.bins, 0, total * sizeof(unsigned int), out); } void gpu_histogram_2d_to_density(const unsigned int* counts, int nx, int ny, float* out_density) { if (counts == nullptr || out_density == nullptr || nx <= 0 || ny <= 0) return; std::size_t total = static_cast(nx) * static_cast(ny); unsigned int max_c = 0u; for (std::size_t i = 0; i < total; ++i) { if (counts[i] > max_c) max_c = counts[i]; } if (max_c == 0u) { for (std::size_t i = 0; i < total; ++i) out_density[i] = 0.0f; return; } float inv = 1.0f / static_cast(max_c); for (std::size_t i = 0; i < total; ++i) { out_density[i] = static_cast(counts[i]) * inv; } } void gpu_histogram_2d_destroy(GpuHistogram2D& h) { delete_compute_program(h.program); h.program = 0; ssbo_destroy(h.bins); h.nx = 0; h.ny = 0; } } // namespace fn::gfx