d115d8e830
Nuevo dominio cpp/functions/datascience con primitivas puras CPU para post- proceso de samples Monte Carlo y diagnostico de cadenas MCMC. Diseñadas como gemelas CPU de los kernels GPU (rng pareja con gpu_rng_glsl, MH 1D/ND con mc_metropolis_hastings_gpu) para validar numericamente y para datasets pequeños donde el dispatch GPU no compensa. - rng: xoshiro256++ con uniform / normal (Box-Muller) / below (Lemire) / categorical. Determinista bit-exacto dado seed. - stats_summary: sum (Kahan), mean, var/std (Welford one-pass), min, max, quantile / percentile (R type-7). - autocorr: r(k), ACF, tau_int (Sokal) — diagnostico ACF y ESS. - rhat_ess: Gelman-Rubin clasico y split + ESS basico (multi-chain). - beta_dist: lgamma (Lanczos), beta_pdf, beta_cdf (continued fraction), beta_quantile, mean/var/std — para inferencia Beta-Binomial. - drawdown: max_dd absoluto/pct + underwater series para sesiones simuladas y backtests. - samples_to_grid_2d: binning 2D CPU para alimentar heatmap_cpp_viz / contour_cpp_viz desde samples (x[], y[]). - metropolis_hastings: MH 1D y ND con target log-pdf como std::function (no normalizada). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
134 lines
3.9 KiB
C++
134 lines
3.9 KiB
C++
#include "datascience/beta_dist.h"
|
|
|
|
#include <cmath>
|
|
|
|
namespace fn::ds {
|
|
|
|
// Lanczos approx with g=7, n=9 — precision ~1e-15 sobre x > 0.5. Para
|
|
// x <= 0.5 usa la reflexion lgamma(x) = log(pi/sin(pi*x)) - lgamma(1-x).
|
|
double lgamma_lanczos(double x) {
|
|
static const double p[] = {
|
|
676.5203681218851,
|
|
-1259.1392167224028,
|
|
771.32342877765313,
|
|
-176.61502916214059,
|
|
12.507343278686905,
|
|
-0.13857109526572012,
|
|
9.9843695780195716e-6,
|
|
1.5056327351493116e-7
|
|
};
|
|
if (x < 0.5) {
|
|
return std::log(3.14159265358979323846 /
|
|
std::sin(3.14159265358979323846 * x))
|
|
- lgamma_lanczos(1.0 - x);
|
|
}
|
|
x -= 1.0;
|
|
double a = 0.99999999999980993;
|
|
double t = x + 7.5;
|
|
for (int i = 0; i < 8; ++i) {
|
|
a += p[i] / (x + static_cast<double>(i + 1));
|
|
}
|
|
return 0.5 * std::log(6.28318530717958647692) +
|
|
(x + 0.5) * std::log(t) - t + std::log(a);
|
|
}
|
|
|
|
double log_beta(double a, double b) {
|
|
return lgamma_lanczos(a) + lgamma_lanczos(b) - lgamma_lanczos(a + b);
|
|
}
|
|
|
|
double beta_pdf(double x, double a, double b) {
|
|
if (a <= 0.0 || b <= 0.0) return 0.0;
|
|
if (x <= 0.0 || x >= 1.0) return 0.0;
|
|
double lp = (a - 1.0) * std::log(x) +
|
|
(b - 1.0) * std::log(1.0 - x) -
|
|
log_beta(a, b);
|
|
return std::exp(lp);
|
|
}
|
|
|
|
// Continued fraction expansion de I_x(a, b). Numerical Recipes 6.4.
|
|
static double betacf(double x, double a, double b) {
|
|
constexpr int MAXIT = 200;
|
|
constexpr double EPS = 3.0e-15;
|
|
constexpr double FPMIN = 1.0e-300;
|
|
double qab = a + b;
|
|
double qap = a + 1.0;
|
|
double qam = a - 1.0;
|
|
double c = 1.0;
|
|
double d = 1.0 - qab * x / qap;
|
|
if (std::fabs(d) < FPMIN) d = FPMIN;
|
|
d = 1.0 / d;
|
|
double h = d;
|
|
for (int m = 1; m <= MAXIT; ++m) {
|
|
int m2 = 2 * m;
|
|
double aa = static_cast<double>(m) * (b - m) * x /
|
|
((qam + m2) * (a + m2));
|
|
d = 1.0 + aa * d;
|
|
if (std::fabs(d) < FPMIN) d = FPMIN;
|
|
c = 1.0 + aa / c;
|
|
if (std::fabs(c) < FPMIN) c = FPMIN;
|
|
d = 1.0 / d;
|
|
h *= d * c;
|
|
aa = -(a + m) * (qab + m) * x /
|
|
((a + m2) * (qap + m2));
|
|
d = 1.0 + aa * d;
|
|
if (std::fabs(d) < FPMIN) d = FPMIN;
|
|
c = 1.0 + aa / c;
|
|
if (std::fabs(c) < FPMIN) c = FPMIN;
|
|
d = 1.0 / d;
|
|
double del = d * c;
|
|
h *= del;
|
|
if (std::fabs(del - 1.0) < EPS) break;
|
|
}
|
|
return h;
|
|
}
|
|
|
|
double beta_cdf(double x, double a, double b) {
|
|
if (a <= 0.0 || b <= 0.0) return 0.0;
|
|
if (x <= 0.0) return 0.0;
|
|
if (x >= 1.0) return 1.0;
|
|
double bt = std::exp(lgamma_lanczos(a + b) -
|
|
lgamma_lanczos(a) -
|
|
lgamma_lanczos(b) +
|
|
a * std::log(x) +
|
|
b * std::log(1.0 - x));
|
|
if (x < (a + 1.0) / (a + b + 2.0)) {
|
|
return bt * betacf(x, a, b) / a;
|
|
}
|
|
return 1.0 - bt * betacf(1.0 - x, b, a) / b;
|
|
}
|
|
|
|
double beta_quantile(double p, double a, double b) {
|
|
if (a <= 0.0 || b <= 0.0) return 0.0;
|
|
if (p <= 0.0) return 0.0;
|
|
if (p >= 1.0) return 1.0;
|
|
// Bisection robusto. Newton tendria mejor convergencia pero requiere
|
|
// beta_pdf(x) que cerca de 0 o 1 puede oscilar; bisection en [0,1] es
|
|
// siempre estable. ~50 iter para 1e-6.
|
|
double lo = 0.0, hi = 1.0;
|
|
for (int i = 0; i < 60; ++i) {
|
|
double mid = 0.5 * (lo + hi);
|
|
double cdf = beta_cdf(mid, a, b);
|
|
if (cdf < p) lo = mid;
|
|
else hi = mid;
|
|
if (hi - lo < 1e-7) break;
|
|
}
|
|
return 0.5 * (lo + hi);
|
|
}
|
|
|
|
double beta_mean(double a, double b) {
|
|
if (a + b <= 0.0) return 0.5;
|
|
return a / (a + b);
|
|
}
|
|
|
|
double beta_variance(double a, double b) {
|
|
double s = a + b;
|
|
if (s <= 0.0 || (s + 1.0) <= 0.0) return 0.0;
|
|
return (a * b) / (s * s * (s + 1.0));
|
|
}
|
|
|
|
double beta_std(double a, double b) {
|
|
return std::sqrt(beta_variance(a, b));
|
|
}
|
|
|
|
} // namespace fn::ds
|