Files
fn_registry/cpp/functions/datascience/metropolis_hastings.cpp
T
egutierrez d115d8e830 feat(cpp/datascience): CPU stats + MCMC primitives
Nuevo dominio cpp/functions/datascience con primitivas puras CPU para post-
proceso de samples Monte Carlo y diagnostico de cadenas MCMC. Diseñadas como
gemelas CPU de los kernels GPU (rng pareja con gpu_rng_glsl, MH 1D/ND con
mc_metropolis_hastings_gpu) para validar numericamente y para datasets
pequeños donde el dispatch GPU no compensa.

- rng: xoshiro256++ con uniform / normal (Box-Muller) / below (Lemire) /
  categorical. Determinista bit-exacto dado seed.
- stats_summary: sum (Kahan), mean, var/std (Welford one-pass), min, max,
  quantile / percentile (R type-7).
- autocorr: r(k), ACF, tau_int (Sokal) — diagnostico ACF y ESS.
- rhat_ess: Gelman-Rubin clasico y split + ESS basico (multi-chain).
- beta_dist: lgamma (Lanczos), beta_pdf, beta_cdf (continued fraction),
  beta_quantile, mean/var/std — para inferencia Beta-Binomial.
- drawdown: max_dd absoluto/pct + underwater series para sesiones
  simuladas y backtests.
- samples_to_grid_2d: binning 2D CPU para alimentar heatmap_cpp_viz /
  contour_cpp_viz desde samples (x[], y[]).
- metropolis_hastings: MH 1D y ND con target log-pdf como std::function
  (no normalizada).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 11:52:26 +02:00

97 lines
3.0 KiB
C++

#include "datascience/metropolis_hastings.h"
#include <cmath>
#include <vector>
namespace fn::ds {
MHResult mh_run_1d(const std::function<double(double)>& target_log_pdf,
double x0,
double proposal_sigma,
std::size_t n_samples,
double* out_chain,
Rng& r) {
MHResult res{};
if (n_samples == 0 || out_chain == nullptr) return res;
out_chain[0] = x0;
double curr = x0;
double log_p_curr = target_log_pdf(curr);
std::size_t accepted = 0;
for (std::size_t i = 1; i < n_samples; ++i) {
double prop = curr + proposal_sigma * rng_normal(r);
double log_p_prop = target_log_pdf(prop);
double log_alpha = log_p_prop - log_p_curr;
bool accept = false;
if (log_alpha >= 0.0) {
accept = true;
} else {
double u = rng_uniform(r);
if (u < std::exp(log_alpha)) accept = true;
}
if (accept) {
curr = prop;
log_p_curr = log_p_prop;
++accepted;
}
out_chain[i] = curr;
}
res.n_samples = n_samples;
res.n_accepted = accepted;
res.accept_rate = (n_samples > 1)
? static_cast<double>(accepted) / static_cast<double>(n_samples - 1)
: 0.0;
return res;
}
MHResult mh_run_nd(const std::function<double(const double*)>& target_log_pdf,
const double* x0,
const double* proposal_sigma,
int d,
std::size_t n_samples,
double* out_chain,
Rng& r) {
MHResult res{};
if (d <= 0 || n_samples == 0 || out_chain == nullptr ||
x0 == nullptr || proposal_sigma == nullptr) return res;
std::vector<double> curr(d);
std::vector<double> prop(d);
for (int k = 0; k < d; ++k) curr[k] = x0[k];
for (int k = 0; k < d; ++k) out_chain[k] = curr[k];
double log_p_curr = target_log_pdf(curr.data());
std::size_t accepted = 0;
for (std::size_t i = 1; i < n_samples; ++i) {
for (int k = 0; k < d; ++k) {
prop[k] = curr[k] + proposal_sigma[k] * rng_normal(r);
}
double log_p_prop = target_log_pdf(prop.data());
double log_alpha = log_p_prop - log_p_curr;
bool accept = false;
if (log_alpha >= 0.0) {
accept = true;
} else {
double u = rng_uniform(r);
if (u < std::exp(log_alpha)) accept = true;
}
if (accept) {
for (int k = 0; k < d; ++k) curr[k] = prop[k];
log_p_curr = log_p_prop;
++accepted;
}
for (int k = 0; k < d; ++k) {
out_chain[i * static_cast<std::size_t>(d) + k] = curr[k];
}
}
res.n_samples = n_samples;
res.n_accepted = accepted;
res.accept_rate = (n_samples > 1)
? static_cast<double>(accepted) / static_cast<double>(n_samples - 1)
: 0.0;
return res;
}
} // namespace fn::ds