#include "datascience/stats_summary.h" #include #include #include #include namespace fn::ds { double stats_sum(const double* data, std::size_t n) { if (n == 0 || data == nullptr) return 0.0; // Kahan summation — coste despreciable, evita drift en sumas grandes. double s = 0.0, c = 0.0; for (std::size_t i = 0; i < n; ++i) { double y = data[i] - c; double t = s + y; c = (t - s) - y; s = t; } return s; } double stats_mean(const double* data, std::size_t n) { if (n == 0) return 0.0; return stats_sum(data, n) / static_cast(n); } double stats_min(const double* data, std::size_t n) { if (n == 0 || data == nullptr) return 0.0; double m = data[0]; for (std::size_t i = 1; i < n; ++i) if (data[i] < m) m = data[i]; return m; } double stats_max(const double* data, std::size_t n) { if (n == 0 || data == nullptr) return 0.0; double m = data[0]; for (std::size_t i = 1; i < n; ++i) if (data[i] > m) m = data[i]; return m; } double stats_variance(const double* data, std::size_t n, bool sample) { if (n == 0 || data == nullptr) return 0.0; if (sample && n < 2) return 0.0; // Welford one-pass. double mean = 0.0; double M2 = 0.0; for (std::size_t i = 0; i < n; ++i) { double x = data[i]; double delta = x - mean; mean += delta / static_cast(i + 1); double delta2 = x - mean; M2 += delta * delta2; } double denom = sample ? static_cast(n - 1) : static_cast(n); return M2 / denom; } double stats_std(const double* data, std::size_t n, bool sample) { return std::sqrt(stats_variance(data, n, sample)); } void stats_sort(const double* data, std::size_t n, double* out) { if (n == 0 || out == nullptr) return; if (out != data && data != nullptr) { std::memcpy(out, data, n * sizeof(double)); } std::sort(out, out + n); } double stats_quantile_sorted(const double* sorted, std::size_t n, double p) { if (n == 0 || sorted == nullptr) return 0.0; if (p <= 0.0) return sorted[0]; if (p >= 1.0) return sorted[n - 1]; // R type-7: h = (n-1) * p; result = sorted[floor(h)] + (h - floor(h)) * // (sorted[floor(h)+1] - sorted[floor(h)]) double h = (static_cast(n) - 1.0) * p; std::size_t lo = static_cast(std::floor(h)); std::size_t hi = lo + 1; if (hi >= n) hi = n - 1; double frac = h - static_cast(lo); return sorted[lo] + frac * (sorted[hi] - sorted[lo]); } double stats_quantile(const double* data, std::size_t n, double p) { if (n == 0) return 0.0; std::vector tmp(n); stats_sort(data, n, tmp.data()); return stats_quantile_sorted(tmp.data(), n, p); } double stats_percentile(const double* data, std::size_t n, double pct) { return stats_quantile(data, n, pct * 0.01); } } // namespace fn::ds