feat(cpp/datascience): CPU stats + MCMC primitives
Nuevo dominio cpp/functions/datascience con primitivas puras CPU para post- proceso de samples Monte Carlo y diagnostico de cadenas MCMC. Diseñadas como gemelas CPU de los kernels GPU (rng pareja con gpu_rng_glsl, MH 1D/ND con mc_metropolis_hastings_gpu) para validar numericamente y para datasets pequeños donde el dispatch GPU no compensa. - rng: xoshiro256++ con uniform / normal (Box-Muller) / below (Lemire) / categorical. Determinista bit-exacto dado seed. - stats_summary: sum (Kahan), mean, var/std (Welford one-pass), min, max, quantile / percentile (R type-7). - autocorr: r(k), ACF, tau_int (Sokal) — diagnostico ACF y ESS. - rhat_ess: Gelman-Rubin clasico y split + ESS basico (multi-chain). - beta_dist: lgamma (Lanczos), beta_pdf, beta_cdf (continued fraction), beta_quantile, mean/var/std — para inferencia Beta-Binomial. - drawdown: max_dd absoluto/pct + underwater series para sesiones simuladas y backtests. - samples_to_grid_2d: binning 2D CPU para alimentar heatmap_cpp_viz / contour_cpp_viz desde samples (x[], y[]). - metropolis_hastings: MH 1D y ND con target log-pdf como std::function (no normalizada). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,71 @@
|
||||
#include "datascience/samples_to_grid_2d.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
namespace fn::ds {
|
||||
|
||||
void samples_to_grid_2d_counts(const double* samples_x,
|
||||
const double* samples_y,
|
||||
std::size_t n,
|
||||
double xmin, double xmax,
|
||||
double ymin, double ymax,
|
||||
int nx, int ny,
|
||||
unsigned int* out_counts) {
|
||||
if (out_counts == nullptr || nx <= 0 || ny <= 0) return;
|
||||
if (samples_x == nullptr || samples_y == nullptr) return;
|
||||
double xr = xmax - xmin;
|
||||
double yr = ymax - ymin;
|
||||
if (xr <= 0.0 || yr <= 0.0) return;
|
||||
double inv_x = 1.0 / xr;
|
||||
double inv_y = 1.0 / yr;
|
||||
|
||||
for (std::size_t i = 0; i < n; ++i) {
|
||||
double tx = (samples_x[i] - xmin) * inv_x;
|
||||
double ty = (samples_y[i] - ymin) * inv_y;
|
||||
if (tx < 0.0 || tx >= 1.0 || ty < 0.0 || ty >= 1.0) continue;
|
||||
int bx = static_cast<int>(tx * static_cast<double>(nx));
|
||||
int by = static_cast<int>(ty * static_cast<double>(ny));
|
||||
if (bx >= nx) bx = nx - 1;
|
||||
if (by >= ny) by = ny - 1;
|
||||
++out_counts[by * nx + bx];
|
||||
}
|
||||
}
|
||||
|
||||
void counts_to_density(const unsigned int* counts, int nx, int ny,
|
||||
float* out_density) {
|
||||
if (counts == nullptr || out_density == nullptr || nx <= 0 || ny <= 0) return;
|
||||
std::size_t total = static_cast<std::size_t>(nx) *
|
||||
static_cast<std::size_t>(ny);
|
||||
unsigned int max_c = 0u;
|
||||
for (std::size_t i = 0; i < total; ++i) {
|
||||
if (counts[i] > max_c) max_c = counts[i];
|
||||
}
|
||||
if (max_c == 0u) {
|
||||
for (std::size_t i = 0; i < total; ++i) out_density[i] = 0.0f;
|
||||
return;
|
||||
}
|
||||
float inv = 1.0f / static_cast<float>(max_c);
|
||||
for (std::size_t i = 0; i < total; ++i) {
|
||||
out_density[i] = static_cast<float>(counts[i]) * inv;
|
||||
}
|
||||
}
|
||||
|
||||
void samples_to_grid_2d_density(const double* samples_x,
|
||||
const double* samples_y,
|
||||
std::size_t n,
|
||||
double xmin, double xmax,
|
||||
double ymin, double ymax,
|
||||
int nx, int ny,
|
||||
float* out_density) {
|
||||
if (out_density == nullptr || nx <= 0 || ny <= 0) return;
|
||||
std::size_t total = static_cast<std::size_t>(nx) *
|
||||
static_cast<std::size_t>(ny);
|
||||
std::vector<unsigned int> counts(total, 0u);
|
||||
samples_to_grid_2d_counts(samples_x, samples_y, n,
|
||||
xmin, xmax, ymin, ymax,
|
||||
nx, ny, counts.data());
|
||||
counts_to_density(counts.data(), nx, ny, out_density);
|
||||
}
|
||||
|
||||
} // namespace fn::ds
|
||||
Reference in New Issue
Block a user