#include "datascience/mc_session_sim_gpu.h" #include "gfx/gl_loader.h" #include "gfx/gpu_compute_program.h" #include "gfx/gpu_dispatch.h" #include "gfx/gpu_rng_glsl.h" #include #include namespace fn::ds { constexpr int kMaxTiersHard = 8; constexpr int kSeedBinding = 9; // 1 thread = 1 sesion completa. Loops internamente sobre los spins. Como // cada sesion es independiente y cada thread tiene su seed, no hace falta // ningun atomic ni barrier dentro del workgroup. La unica restriccion es // que cada thread procesa hasta MAX_TIERS_HARD = 8 contadores locales en // registros (uint local_counts[8]). static const char* k_body = R"glsl( #define MAX_TIERS 8 layout(std430, binding = 0) buffer Summaries { float summary[]; }; layout(std430, binding = 1) buffer TierCounts { uint tier_counts[]; }; layout(std430, binding = 2) buffer Tiers { vec2 tiers[]; }; uniform uint u_n_sessions; uniform uint u_n_spins; uniform uint u_n_tiers; uniform float u_p_base; uniform float u_cost; uniform float u_start_balance; uniform int u_mode; uniform float u_mode_p1; uniform float u_mode_p2; void main() { uint sid = gl_GlobalInvocationID.x; if (sid >= u_n_sessions) return; uint s = rng_seeds[sid]; float balance = u_start_balance; float peak = balance; float trough = balance; float max_dd = 0.0; uint spins_done = 0u; uint wins = 0u; uint miss_streak = 0u; uint longest_miss = 0u; uint local_counts[MAX_TIERS]; for (int k = 0; k < MAX_TIERS; ++k) local_counts[k] = 0u; float total_q = 0.0; for (uint k = 0u; k < u_n_tiers; ++k) total_q += tiers[k].x; float inv_total = (total_q > 0.0) ? (1.0 / total_q) : 0.0; for (uint i = 0u; i < u_n_spins; ++i) { if (balance < u_cost) break; // Effective probability (modes Pure / Pity / Streak) float p_eff = u_p_base; if (u_mode == 1) { float ms = float(miss_streak); float soft = u_mode_p1; float hard = u_mode_p2; if (ms < soft) { p_eff = u_p_base; } else if (ms >= hard) { p_eff = 1.0; } else { p_eff = u_p_base + (1.0 - u_p_base) * ((ms - soft) / max(hard - soft, 1.0)); } } else if (u_mode == 2) { p_eff = min(1.0, u_p_base * (1.0 + u_mode_p1 * float(miss_streak))); } balance -= u_cost; spins_done = i + 1u; float r1 = rng_uniform(s); if (r1 < p_eff) { float u = rng_uniform(s) * total_q; float acc = 0.0; uint chosen = u_n_tiers - 1u; for (uint k = 0u; k < u_n_tiers; ++k) { acc += tiers[k].x; if (u < acc) { chosen = k; break; } } float mult = tiers[chosen].y; balance += mult * u_cost; wins += 1u; if (chosen < uint(MAX_TIERS)) local_counts[chosen] += 1u; miss_streak = 0u; } else { miss_streak += 1u; if (miss_streak > longest_miss) longest_miss = miss_streak; } if (balance > peak) peak = balance; if (balance < trough) trough = balance; float dd = peak - balance; if (dd > max_dd) max_dd = dd; } rng_seeds[sid] = s; uint base = sid * 8u; summary[base + 0u] = balance; summary[base + 1u] = balance - u_start_balance; // pnl summary[base + 2u] = max_dd; summary[base + 3u] = peak; summary[base + 4u] = trough; summary[base + 5u] = float(spins_done); summary[base + 6u] = float(wins); summary[base + 7u] = float(longest_miss); uint cbase = sid * uint(MAX_TIERS); for (int k = 0; k < MAX_TIERS; ++k) { tier_counts[cbase + uint(k)] = local_counts[k]; } // Avoid unused-but-set warnings on inv_total if a future variant uses it. (void)(inv_total); } )glsl"; McSessionSim mc_session_sim_create(int n_sessions, int max_tiers) { McSessionSim s{}; if (n_sessions <= 0) return s; if (max_tiers <= 0 || max_tiers > kMaxTiersHard) max_tiers = kMaxTiersHard; auto rng = fn::gfx::glsl_rng_preamble(kSeedBinding); auto r = fn::gfx::compile_compute(k_body, 64, rng); if (!r.ok) { std::fprintf(stderr, "[mc_session_sim_gpu] compile error: %s\n", r.err_msg.c_str()); return s; } s.program = r.program; s.loc_n_sessions = static_cast(glGetUniformLocation(s.program, "u_n_sessions")); s.loc_n_spins = static_cast(glGetUniformLocation(s.program, "u_n_spins")); s.loc_n_tiers = static_cast(glGetUniformLocation(s.program, "u_n_tiers")); s.loc_p_base = static_cast(glGetUniformLocation(s.program, "u_p_base")); s.loc_cost = static_cast(glGetUniformLocation(s.program, "u_cost")); s.loc_start_bal = static_cast(glGetUniformLocation(s.program, "u_start_balance")); s.loc_mode = static_cast(glGetUniformLocation(s.program, "u_mode")); s.loc_mode_p1 = static_cast(glGetUniformLocation(s.program, "u_mode_p1")); s.loc_mode_p2 = static_cast(glGetUniformLocation(s.program, "u_mode_p2")); s.n_sessions = n_sessions; s.max_tiers = max_tiers; s.summary = fn::gfx::ssbo_create( static_cast(n_sessions) * 8u * sizeof(float), nullptr, GL_DYNAMIC_COPY); s.tier_counts = fn::gfx::ssbo_create( static_cast(n_sessions) * static_cast(max_tiers) * sizeof(unsigned int), nullptr, GL_DYNAMIC_COPY); s.tiers = fn::gfx::ssbo_create( static_cast(max_tiers) * 2u * sizeof(float), nullptr, GL_DYNAMIC_DRAW); s.rng_seeds = fn::gfx::ssbo_create( static_cast(n_sessions) * sizeof(unsigned int), nullptr, GL_DYNAMIC_COPY); return s; } void mc_session_sim_reseed(McSessionSim& s, unsigned long long master_seed) { if (s.rng_seeds.id == 0 || s.n_sessions <= 0) return; std::vector seeds(s.n_sessions); fn::gfx::seed_walkers_init(master_seed, seeds.data(), s.n_sessions); fn::gfx::ssbo_upload(s.rng_seeds, 0, static_cast(s.n_sessions) * sizeof(unsigned int), seeds.data()); } void mc_session_sim_run(McSessionSim& s, const McSessionParams& p) { if (s.program == 0 || s.n_sessions <= 0) return; if (p.n_tiers <= 0 || p.tiers_q == nullptr || p.tiers_m == nullptr) return; int nt = p.n_tiers; if (nt > s.max_tiers) nt = s.max_tiers; // Pack tiers como vec2 (q, m) — std430 vec2 = 8 bytes alineado. std::vector packed(static_cast(s.max_tiers) * 2u, 0.0f); for (int i = 0; i < nt; ++i) { packed[2 * i + 0] = p.tiers_q[i]; packed[2 * i + 1] = p.tiers_m[i]; } fn::gfx::ssbo_upload(s.tiers, 0, packed.size() * sizeof(float), packed.data()); glUseProgram(s.program); fn::gfx::ssbo_bind(s.summary, 0); fn::gfx::ssbo_bind(s.tier_counts, 1); fn::gfx::ssbo_bind(s.tiers, 2); fn::gfx::ssbo_bind(s.rng_seeds, kSeedBinding); glUniform1ui(static_cast(s.loc_n_sessions), static_cast(s.n_sessions)); glUniform1ui(static_cast(s.loc_n_spins), static_cast(p.n_spins)); glUniform1ui(static_cast(s.loc_n_tiers), static_cast(nt)); glUniform1f (static_cast(s.loc_p_base), p.p_base); glUniform1f (static_cast(s.loc_cost), p.cost); glUniform1f (static_cast(s.loc_start_bal), p.start_balance); glUniform1i (static_cast(s.loc_mode), static_cast(p.mode)); glUniform1f (static_cast(s.loc_mode_p1), p.mode_p1); glUniform1f (static_cast(s.loc_mode_p2), p.mode_p2); fn::gfx::dispatch_1d(s.n_sessions, 64); fn::gfx::barrier_buffer_update(); } void mc_session_sim_readback_summary(const McSessionSim& s, float* out) { if (s.summary.id == 0 || s.n_sessions <= 0 || out == nullptr) return; fn::gfx::ssbo_readback( s.summary, 0, static_cast(s.n_sessions) * 8u * sizeof(float), out); } void mc_session_sim_readback_tier_counts(const McSessionSim& s, unsigned int* out) { if (s.tier_counts.id == 0 || s.n_sessions <= 0 || out == nullptr) return; fn::gfx::ssbo_readback( s.tier_counts, 0, static_cast(s.n_sessions) * static_cast(s.max_tiers) * sizeof(unsigned int), out); } void mc_session_sim_destroy(McSessionSim& s) { fn::gfx::delete_compute_program(s.program); s.program = 0; fn::gfx::ssbo_destroy(s.summary); fn::gfx::ssbo_destroy(s.tier_counts); fn::gfx::ssbo_destroy(s.tiers); fn::gfx::ssbo_destroy(s.rng_seeds); s.n_sessions = 0; } } // namespace fn::ds