feat: 15 funciones datascience — estadística, DSP e IO de datos

12 funciones puras con implementación real:
Standardize, MinMaxScale, Clip, RollingWindow, ZipSlices, GroupBy,
Histogram, Pearson, Autocorrelation, FFT (Cooley-Tukey), DetectOutliers, Impute

3 funciones impuras (stubs):
LoadCSV, LoadParquet, FetchDataFrame

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-28 02:23:36 +01:00
parent 113c6dfd71
commit fc734029c1
30 changed files with 674 additions and 0 deletions
+12
View File
@@ -0,0 +1,12 @@
package datascience
// Autocorrelation calcula la autocorrelación de data con el desfase (lag) dado.
// Usa la correlación de Pearson entre data[0:n-lag] y data[lag:n].
// Si lag es inválido, retorna 0.
func Autocorrelation(data []float64, lag int) float64 {
n := len(data)
if lag < 0 || lag >= n {
return 0
}
return Pearson(data[:n-lag], data[lag:])
}
+21
View File
@@ -0,0 +1,21 @@
---
name: autocorrelation
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func Autocorrelation(data []float64, lag int) float64"
description: "Calcula la autocorrelación de una serie temporal con un desfase (lag) dado, usando correlación de Pearson."
tags: [datascience, statistics, autocorrelation, timeseries]
uses_functions: [pearson_go_datascience]
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/autocorrelation.go"
---
+17
View File
@@ -0,0 +1,17 @@
package datascience
// Clip recorta cada valor del slice para que quede dentro del rango [min, max].
func Clip(data []float64, min, max float64) []float64 {
result := make([]float64, len(data))
for i, v := range data {
switch {
case v < min:
result[i] = min
case v > max:
result[i] = max
default:
result[i] = v
}
}
return result
}
+21
View File
@@ -0,0 +1,21 @@
---
name: clip
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func Clip(data []float64, min, max float64) []float64"
description: "Recorta cada valor del slice para que quede dentro del rango [min, max]."
tags: [datascience, clamp, clip, range]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/clip.go"
---
+38
View File
@@ -0,0 +1,38 @@
package datascience
import "math"
// DetectOutliers devuelve un []bool donde true indica que el valor es un outlier
// según z-score. Un valor es outlier si |z-score| > threshold.
func DetectOutliers(data []float64, threshold float64) []bool {
n := len(data)
if n == 0 {
return []bool{}
}
var sum float64
for _, v := range data {
sum += v
}
mean := sum / float64(n)
var sqSum float64
for _, v := range data {
d := v - mean
sqSum += d * d
}
stddev := math.Sqrt(sqSum / float64(n))
result := make([]bool, n)
if stddev == 0 {
return result
}
for i, v := range data {
z := (v - mean) / stddev
if z < 0 {
z = -z
}
result[i] = z > threshold
}
return result
}
+21
View File
@@ -0,0 +1,21 @@
---
name: detect_outliers
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func DetectOutliers(data []float64, threshold float64) []bool"
description: "Detecta outliers en un slice de float64 usando z-score. Devuelve true para valores cuyo |z-score| supera el umbral."
tags: [datascience, statistics, outlier, anomaly]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["math"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/detect_outliers.go"
---
@@ -0,0 +1,8 @@
package datascience
import "fmt"
// FetchDataFrame ejecuta una consulta SQL contra un DSN y retorna los resultados como slice de mapas.
func FetchDataFrame(dsn, query string) ([]map[string]any, error) {
return nil, fmt.Errorf("not implemented")
}
+21
View File
@@ -0,0 +1,21 @@
---
name: fetch_data_frame
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: impure
signature: "func FetchDataFrame(dsn, query string) ([]map[string]any, error)"
description: "Ejecuta una consulta SQL contra un DSN y retorna los resultados como slice de mapas columna-valor."
tags: [datascience, io, bigquery, fetch]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: "error_go_core"
imports: ["fmt"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/fetch_data_frame.go"
---
+61
View File
@@ -0,0 +1,61 @@
package datascience
import (
"math"
"math/cmplx"
)
// FFT calcula la Fast Fourier Transform usando el algoritmo Cooley-Tukey radix-2.
// Si la longitud de data no es potencia de 2, se rellena con ceros (zero-padding).
func FFT(data []float64) []complex128 {
n := len(data)
if n == 0 {
return []complex128{}
}
// Calcular la siguiente potencia de 2.
size := nextPow2(n)
// Convertir a complex128 con zero-padding.
x := make([]complex128, size)
for i := 0; i < n; i++ {
x[i] = complex(data[i], 0)
}
fftRecursive(x)
return x
}
// nextPow2 retorna la menor potencia de 2 >= n.
func nextPow2(n int) int {
p := 1
for p < n {
p <<= 1
}
return p
}
// fftRecursive aplica Cooley-Tukey radix-2 DIT in-place.
func fftRecursive(x []complex128) {
n := len(x)
if n <= 1 {
return
}
// Separar pares e impares.
even := make([]complex128, n/2)
odd := make([]complex128, n/2)
for i := 0; i < n/2; i++ {
even[i] = x[2*i]
odd[i] = x[2*i+1]
}
fftRecursive(even)
fftRecursive(odd)
for k := 0; k < n/2; k++ {
t := cmplx.Rect(1, -2*math.Pi*float64(k)/float64(n)) * odd[k]
x[k] = even[k] + t
x[k+n/2] = even[k] - t
}
}
+21
View File
@@ -0,0 +1,21 @@
---
name: fft
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func FFT(data []float64) []complex128"
description: "Calcula la Transformada Rápida de Fourier (FFT) usando el algoritmo Cooley-Tukey radix-2. Aplica zero-padding si la longitud no es potencia de 2."
tags: [datascience, dsp, fft, fourier, frequency]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["math", "math/cmplx"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/fft.go"
---
+11
View File
@@ -0,0 +1,11 @@
package datascience
// GroupBy agrupa los elementos de un slice según la clave devuelta por keyFn.
func GroupBy[T any, K comparable](xs []T, keyFn func(T) K) map[K][]T {
groups := make(map[K][]T)
for _, x := range xs {
k := keyFn(x)
groups[k] = append(groups[k], x)
}
return groups
}
+21
View File
@@ -0,0 +1,21 @@
---
name: group_by
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func GroupBy[T any, K comparable](xs []T, keyFn func(T) K) map[K][]T"
description: "Agrupa los elementos de un slice según una función clave, devolviendo un mapa de clave a slice de elementos."
tags: [datascience, group, aggregate, generic]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/group_by.go"
---
+39
View File
@@ -0,0 +1,39 @@
package datascience
import "math"
// Histogram calcula las frecuencias de data distribuidas en la cantidad de buckets indicada.
// Retorna un slice de longitud buckets con el conteo de elementos por cada intervalo equiespaciado.
func Histogram(data []float64, buckets int) []int {
if buckets <= 0 || len(data) == 0 {
return make([]int, buckets)
}
minVal := math.Inf(1)
maxVal := math.Inf(-1)
for _, v := range data {
if v < minVal {
minVal = v
}
if v > maxVal {
maxVal = v
}
}
counts := make([]int, buckets)
rang := maxVal - minVal
if rang == 0 {
// Todos los valores son iguales; poner todo en el primer bucket.
counts[0] = len(data)
return counts
}
for _, v := range data {
idx := int(float64(buckets) * (v - minVal) / rang)
if idx >= buckets {
idx = buckets - 1
}
counts[idx]++
}
return counts
}
+21
View File
@@ -0,0 +1,21 @@
---
name: histogram
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func Histogram(data []float64, buckets int) []int"
description: "Calcula las frecuencias de un slice de float64 distribuidas en un número dado de buckets equiespaciados."
tags: [datascience, statistics, histogram, frequency]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["math"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/histogram.go"
---
+18
View File
@@ -0,0 +1,18 @@
package datascience
import "math"
// Impute rellena valores NaN usando forward-fill.
// Cada NaN se reemplaza con el último valor válido (no NaN) anterior.
// Si el primer valor es NaN y no hay valor anterior, se mantiene como NaN.
func Impute(data []float64) []float64 {
result := make([]float64, len(data))
last := math.NaN()
for i, v := range data {
if !math.IsNaN(v) {
last = v
}
result[i] = last
}
return result
}
+21
View File
@@ -0,0 +1,21 @@
---
name: impute
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func Impute(data []float64) []float64"
description: "Rellena valores NaN en un slice de float64 usando forward-fill, reemplazando cada NaN con el último valor válido anterior."
tags: [datascience, impute, missing, fill]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["math"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/impute.go"
---
+8
View File
@@ -0,0 +1,8 @@
package datascience
import "fmt"
// LoadCSV carga un archivo CSV y lo retorna como slice de mapas (columna -> valor).
func LoadCSV(path string) ([]map[string]string, error) {
return nil, fmt.Errorf("not implemented")
}
+21
View File
@@ -0,0 +1,21 @@
---
name: load_csv
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: impure
signature: "func LoadCSV(path string) ([]map[string]string, error)"
description: "Carga un archivo CSV desde disco y lo retorna como slice de mapas columna-valor."
tags: [datascience, io, csv, load]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: "error_go_core"
imports: ["fmt"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/load_csv.go"
---
+8
View File
@@ -0,0 +1,8 @@
package datascience
import "fmt"
// LoadParquet carga un archivo Parquet y lo retorna como slice de mapas.
func LoadParquet(path string) ([]map[string]any, error) {
return nil, fmt.Errorf("not implemented")
}
+21
View File
@@ -0,0 +1,21 @@
---
name: load_parquet
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: impure
signature: "func LoadParquet(path string) ([]map[string]any, error)"
description: "Carga un archivo Parquet desde disco y lo retorna como slice de mapas columna-valor."
tags: [datascience, io, parquet, load]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: "error_go_core"
imports: ["fmt"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/load_parquet.go"
---
+33
View File
@@ -0,0 +1,33 @@
package datascience
import "math"
// MinMaxScale escala los valores al rango [0, 1] usando min-max normalización.
// Si min == max, retorna un slice de ceros.
func MinMaxScale(data []float64) []float64 {
n := len(data)
if n == 0 {
return []float64{}
}
minVal := math.Inf(1)
maxVal := math.Inf(-1)
for _, v := range data {
if v < minVal {
minVal = v
}
if v > maxVal {
maxVal = v
}
}
rang := maxVal - minVal
result := make([]float64, n)
if rang == 0 {
return result
}
for i, v := range data {
result[i] = (v - minVal) / rang
}
return result
}
+21
View File
@@ -0,0 +1,21 @@
---
name: min_max_scale
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func MinMaxScale(data []float64) []float64"
description: "Escala los valores de un slice al rango [0, 1] usando normalización min-max."
tags: [datascience, statistics, normalize, scale]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["math"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/min_max_scale.go"
---
+39
View File
@@ -0,0 +1,39 @@
package datascience
import "math"
// Pearson calcula el coeficiente de correlación de Pearson entre dos slices.
// Si los slices tienen distinta longitud, usa la longitud mínima.
// Retorna 0 si alguna desviación estándar es 0.
func Pearson(xs, ys []float64) float64 {
n := len(xs)
if len(ys) < n {
n = len(ys)
}
if n == 0 {
return 0
}
var sumX, sumY float64
for i := 0; i < n; i++ {
sumX += xs[i]
sumY += ys[i]
}
meanX := sumX / float64(n)
meanY := sumY / float64(n)
var num, denomX, denomY float64
for i := 0; i < n; i++ {
dx := xs[i] - meanX
dy := ys[i] - meanY
num += dx * dy
denomX += dx * dx
denomY += dy * dy
}
denom := math.Sqrt(denomX * denomY)
if denom == 0 {
return 0
}
return num / denom
}
+21
View File
@@ -0,0 +1,21 @@
---
name: pearson
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func Pearson(xs, ys []float64) float64"
description: "Calcula el coeficiente de correlación de Pearson entre dos slices de float64."
tags: [datascience, statistics, correlation, pearson]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["math"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/pearson.go"
---
+17
View File
@@ -0,0 +1,17 @@
package datascience
// RollingWindow genera ventanas deslizantes de tamaño size sobre el slice xs.
// Si size <= 0 o size > len(xs), retorna nil.
func RollingWindow[T any](xs []T, size int) [][]T {
n := len(xs)
if size <= 0 || size > n {
return nil
}
windows := make([][]T, 0, n-size+1)
for i := 0; i <= n-size; i++ {
w := make([]T, size)
copy(w, xs[i:i+size])
windows = append(windows, w)
}
return windows
}
+21
View File
@@ -0,0 +1,21 @@
---
name: rolling_window
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func RollingWindow[T any](xs []T, size int) [][]T"
description: "Genera ventanas deslizantes de tamaño fijo sobre un slice genérico."
tags: [datascience, window, rolling, sliding, generic]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/rolling_window.go"
---
+35
View File
@@ -0,0 +1,35 @@
package datascience
import "math"
// Standardize aplica Z-score normalización a un slice de float64.
// Cada valor se transforma a (x - mean) / stddev.
// Si stddev es 0, retorna un slice de ceros.
func Standardize(data []float64) []float64 {
n := len(data)
if n == 0 {
return []float64{}
}
var sum float64
for _, v := range data {
sum += v
}
mean := sum / float64(n)
var sqSum float64
for _, v := range data {
d := v - mean
sqSum += d * d
}
stddev := math.Sqrt(sqSum / float64(n))
result := make([]float64, n)
if stddev == 0 {
return result
}
for i, v := range data {
result[i] = (v - mean) / stddev
}
return result
}
+21
View File
@@ -0,0 +1,21 @@
---
name: standardize
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func Standardize(data []float64) []float64"
description: "Aplica Z-score normalización a un slice de float64, transformando cada valor a (x - media) / desviación estándar."
tags: [datascience, statistics, normalize, zscore]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: ["math"]
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/standardize.go"
---
+15
View File
@@ -0,0 +1,15 @@
package datascience
// ZipSlices combina dos slices de float64 en pares [2]float64.
// El resultado tiene longitud igual al menor de los dos slices.
func ZipSlices(as, bs []float64) [][2]float64 {
n := len(as)
if len(bs) < n {
n = len(bs)
}
result := make([][2]float64, n)
for i := 0; i < n; i++ {
result[i] = [2]float64{as[i], bs[i]}
}
return result
}
+21
View File
@@ -0,0 +1,21 @@
---
name: zip_slices
kind: function
lang: go
domain: datascience
version: "1.0.0"
purity: pure
signature: "func ZipSlices(as, bs []float64) [][2]float64"
description: "Combina dos slices de float64 en un slice de pares [2]float64, truncando al más corto."
tags: [datascience, zip, combine, pair]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: false
tests: []
test_file_path: ""
file_path: "functions/datascience/zip_slices.go"
---