d76c831247
Nuevo dominio cpp/functions/datascience con primitivas puras CPU para post- proceso de samples Monte Carlo y diagnostico de cadenas MCMC. Diseñadas como gemelas CPU de los kernels GPU (rng pareja con gpu_rng_glsl, MH 1D/ND con mc_metropolis_hastings_gpu) para validar numericamente y para datasets pequeños donde el dispatch GPU no compensa. - rng: xoshiro256++ con uniform / normal (Box-Muller) / below (Lemire) / categorical. Determinista bit-exacto dado seed. - stats_summary: sum (Kahan), mean, var/std (Welford one-pass), min, max, quantile / percentile (R type-7). - autocorr: r(k), ACF, tau_int (Sokal) — diagnostico ACF y ESS. - rhat_ess: Gelman-Rubin clasico y split + ESS basico (multi-chain). - beta_dist: lgamma (Lanczos), beta_pdf, beta_cdf (continued fraction), beta_quantile, mean/var/std — para inferencia Beta-Binomial. - drawdown: max_dd absoluto/pct + underwater series para sesiones simuladas y backtests. - samples_to_grid_2d: binning 2D CPU para alimentar heatmap_cpp_viz / contour_cpp_viz desde samples (x[], y[]). - metropolis_hastings: MH 1D y ND con target log-pdf como std::function (no normalizada). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
97 lines
3.0 KiB
C++
97 lines
3.0 KiB
C++
#include "datascience/metropolis_hastings.h"
|
|
|
|
#include <cmath>
|
|
#include <vector>
|
|
|
|
namespace fn::ds {
|
|
|
|
MHResult mh_run_1d(const std::function<double(double)>& target_log_pdf,
|
|
double x0,
|
|
double proposal_sigma,
|
|
std::size_t n_samples,
|
|
double* out_chain,
|
|
Rng& r) {
|
|
MHResult res{};
|
|
if (n_samples == 0 || out_chain == nullptr) return res;
|
|
out_chain[0] = x0;
|
|
double curr = x0;
|
|
double log_p_curr = target_log_pdf(curr);
|
|
std::size_t accepted = 0;
|
|
|
|
for (std::size_t i = 1; i < n_samples; ++i) {
|
|
double prop = curr + proposal_sigma * rng_normal(r);
|
|
double log_p_prop = target_log_pdf(prop);
|
|
double log_alpha = log_p_prop - log_p_curr;
|
|
bool accept = false;
|
|
if (log_alpha >= 0.0) {
|
|
accept = true;
|
|
} else {
|
|
double u = rng_uniform(r);
|
|
if (u < std::exp(log_alpha)) accept = true;
|
|
}
|
|
if (accept) {
|
|
curr = prop;
|
|
log_p_curr = log_p_prop;
|
|
++accepted;
|
|
}
|
|
out_chain[i] = curr;
|
|
}
|
|
res.n_samples = n_samples;
|
|
res.n_accepted = accepted;
|
|
res.accept_rate = (n_samples > 1)
|
|
? static_cast<double>(accepted) / static_cast<double>(n_samples - 1)
|
|
: 0.0;
|
|
return res;
|
|
}
|
|
|
|
MHResult mh_run_nd(const std::function<double(const double*)>& target_log_pdf,
|
|
const double* x0,
|
|
const double* proposal_sigma,
|
|
int d,
|
|
std::size_t n_samples,
|
|
double* out_chain,
|
|
Rng& r) {
|
|
MHResult res{};
|
|
if (d <= 0 || n_samples == 0 || out_chain == nullptr ||
|
|
x0 == nullptr || proposal_sigma == nullptr) return res;
|
|
|
|
std::vector<double> curr(d);
|
|
std::vector<double> prop(d);
|
|
for (int k = 0; k < d; ++k) curr[k] = x0[k];
|
|
for (int k = 0; k < d; ++k) out_chain[k] = curr[k];
|
|
|
|
double log_p_curr = target_log_pdf(curr.data());
|
|
std::size_t accepted = 0;
|
|
|
|
for (std::size_t i = 1; i < n_samples; ++i) {
|
|
for (int k = 0; k < d; ++k) {
|
|
prop[k] = curr[k] + proposal_sigma[k] * rng_normal(r);
|
|
}
|
|
double log_p_prop = target_log_pdf(prop.data());
|
|
double log_alpha = log_p_prop - log_p_curr;
|
|
bool accept = false;
|
|
if (log_alpha >= 0.0) {
|
|
accept = true;
|
|
} else {
|
|
double u = rng_uniform(r);
|
|
if (u < std::exp(log_alpha)) accept = true;
|
|
}
|
|
if (accept) {
|
|
for (int k = 0; k < d; ++k) curr[k] = prop[k];
|
|
log_p_curr = log_p_prop;
|
|
++accepted;
|
|
}
|
|
for (int k = 0; k < d; ++k) {
|
|
out_chain[i * static_cast<std::size_t>(d) + k] = curr[k];
|
|
}
|
|
}
|
|
res.n_samples = n_samples;
|
|
res.n_accepted = accepted;
|
|
res.accept_rate = (n_samples > 1)
|
|
? static_cast<double>(accepted) / static_cast<double>(n_samples - 1)
|
|
: 0.0;
|
|
return res;
|
|
}
|
|
|
|
} // namespace fn::ds
|