feat: 15 funciones datascience — estadística, DSP e IO de datos

12 funciones puras con implementación real: Standardize, MinMaxScale, Clip, RollingWindow, ZipSlices, GroupBy, Histogram, Pearson, Autocorrelation, FFT (Cooley-Tukey), DetectOutliers, Impute 3 funciones impuras (stubs): LoadCSV, LoadParquet, FetchDataFrame Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 02:23:36 +01:00
parent 113c6dfd71
commit fc734029c1
30 changed files with 674 additions and 0 deletions
@@ -0,0 +1,12 @@
+package datascience
+
+// Autocorrelation calcula la autocorrelación de data con el desfase (lag) dado.
+// Usa la correlación de Pearson entre data[0:n-lag] y data[lag:n].
+// Si lag es inválido, retorna 0.
+func Autocorrelation(data []float64, lag int) float64 {
+	n := len(data)
+	if lag < 0 || lag >= n {
+		return 0
+	}
+	return Pearson(data[:n-lag], data[lag:])
+}
@@ -0,0 +1,21 @@
+---
+name: autocorrelation
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func Autocorrelation(data []float64, lag int) float64"
+description: "Calcula la autocorrelación de una serie temporal con un desfase (lag) dado, usando correlación de Pearson."
+tags: [datascience, statistics, autocorrelation, timeseries]
+uses_functions: [pearson_go_datascience]
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/autocorrelation.go"
+---
@@ -0,0 +1,17 @@
+package datascience
+
+// Clip recorta cada valor del slice para que quede dentro del rango [min, max].
+func Clip(data []float64, min, max float64) []float64 {
+	result := make([]float64, len(data))
+	for i, v := range data {
+		switch {
+		case v < min:
+			result[i] = min
+		case v > max:
+			result[i] = max
+		default:
+			result[i] = v
+		}
+	}
+	return result
+}
@@ -0,0 +1,21 @@
+---
+name: clip
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func Clip(data []float64, min, max float64) []float64"
+description: "Recorta cada valor del slice para que quede dentro del rango [min, max]."
+tags: [datascience, clamp, clip, range]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/clip.go"
+---
@@ -0,0 +1,38 @@
+package datascience
+
+import "math"
+
+// DetectOutliers devuelve un []bool donde true indica que el valor es un outlier
+// según z-score. Un valor es outlier si |z-score| > threshold.
+func DetectOutliers(data []float64, threshold float64) []bool {
+	n := len(data)
+	if n == 0 {
+		return []bool{}
+	}
+
+	var sum float64
+	for _, v := range data {
+		sum += v
+	}
+	mean := sum / float64(n)
+
+	var sqSum float64
+	for _, v := range data {
+		d := v - mean
+		sqSum += d * d
+	}
+	stddev := math.Sqrt(sqSum / float64(n))
+
+	result := make([]bool, n)
+	if stddev == 0 {
+		return result
+	}
+	for i, v := range data {
+		z := (v - mean) / stddev
+		if z < 0 {
+			z = -z
+		}
+		result[i] = z > threshold
+	}
+	return result
+}
@@ -0,0 +1,21 @@
+---
+name: detect_outliers
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func DetectOutliers(data []float64, threshold float64) []bool"
+description: "Detecta outliers en un slice de float64 usando z-score. Devuelve true para valores cuyo |z-score| supera el umbral."
+tags: [datascience, statistics, outlier, anomaly]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["math"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/detect_outliers.go"
+---
@@ -0,0 +1,8 @@
+package datascience
+
+import "fmt"
+
+// FetchDataFrame ejecuta una consulta SQL contra un DSN y retorna los resultados como slice de mapas.
+func FetchDataFrame(dsn, query string) ([]map[string]any, error) {
+	return nil, fmt.Errorf("not implemented")
+}
@@ -0,0 +1,21 @@
+---
+name: fetch_data_frame
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: impure
+signature: "func FetchDataFrame(dsn, query string) ([]map[string]any, error)"
+description: "Ejecuta una consulta SQL contra un DSN y retorna los resultados como slice de mapas columna-valor."
+tags: [datascience, io, bigquery, fetch]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: ["fmt"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/fetch_data_frame.go"
+---
@@ -0,0 +1,61 @@
+package datascience
+
+import (
+	"math"
+	"math/cmplx"
+)
+
+// FFT calcula la Fast Fourier Transform usando el algoritmo Cooley-Tukey radix-2.
+// Si la longitud de data no es potencia de 2, se rellena con ceros (zero-padding).
+func FFT(data []float64) []complex128 {
+	n := len(data)
+	if n == 0 {
+		return []complex128{}
+	}
+
+	// Calcular la siguiente potencia de 2.
+	size := nextPow2(n)
+
+	// Convertir a complex128 con zero-padding.
+	x := make([]complex128, size)
+	for i := 0; i < n; i++ {
+		x[i] = complex(data[i], 0)
+	}
+
+	fftRecursive(x)
+	return x
+}
+
+// nextPow2 retorna la menor potencia de 2 >= n.
+func nextPow2(n int) int {
+	p := 1
+	for p < n {
+		p <<= 1
+	}
+	return p
+}
+
+// fftRecursive aplica Cooley-Tukey radix-2 DIT in-place.
+func fftRecursive(x []complex128) {
+	n := len(x)
+	if n <= 1 {
+		return
+	}
+
+	// Separar pares e impares.
+	even := make([]complex128, n/2)
+	odd := make([]complex128, n/2)
+	for i := 0; i < n/2; i++ {
+		even[i] = x[2*i]
+		odd[i] = x[2*i+1]
+	}
+
+	fftRecursive(even)
+	fftRecursive(odd)
+
+	for k := 0; k < n/2; k++ {
+		t := cmplx.Rect(1, -2*math.Pi*float64(k)/float64(n)) * odd[k]
+		x[k] = even[k] + t
+		x[k+n/2] = even[k] - t
+	}
+}
@@ -0,0 +1,21 @@
+---
+name: fft
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func FFT(data []float64) []complex128"
+description: "Calcula la Transformada Rápida de Fourier (FFT) usando el algoritmo Cooley-Tukey radix-2. Aplica zero-padding si la longitud no es potencia de 2."
+tags: [datascience, dsp, fft, fourier, frequency]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["math", "math/cmplx"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/fft.go"
+---
@@ -0,0 +1,11 @@
+package datascience
+
+// GroupBy agrupa los elementos de un slice según la clave devuelta por keyFn.
+func GroupBy[T any, K comparable](xs []T, keyFn func(T) K) map[K][]T {
+	groups := make(map[K][]T)
+	for _, x := range xs {
+		k := keyFn(x)
+		groups[k] = append(groups[k], x)
+	}
+	return groups
+}
@@ -0,0 +1,21 @@
+---
+name: group_by
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func GroupBy[T any, K comparable](xs []T, keyFn func(T) K) map[K][]T"
+description: "Agrupa los elementos de un slice según una función clave, devolviendo un mapa de clave a slice de elementos."
+tags: [datascience, group, aggregate, generic]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/group_by.go"
+---
@@ -0,0 +1,39 @@
+package datascience
+
+import "math"
+
+// Histogram calcula las frecuencias de data distribuidas en la cantidad de buckets indicada.
+// Retorna un slice de longitud buckets con el conteo de elementos por cada intervalo equiespaciado.
+func Histogram(data []float64, buckets int) []int {
+	if buckets <= 0 || len(data) == 0 {
+		return make([]int, buckets)
+	}
+
+	minVal := math.Inf(1)
+	maxVal := math.Inf(-1)
+	for _, v := range data {
+		if v < minVal {
+			minVal = v
+		}
+		if v > maxVal {
+			maxVal = v
+		}
+	}
+
+	counts := make([]int, buckets)
+	rang := maxVal - minVal
+	if rang == 0 {
+		// Todos los valores son iguales; poner todo en el primer bucket.
+		counts[0] = len(data)
+		return counts
+	}
+
+	for _, v := range data {
+		idx := int(float64(buckets) * (v - minVal) / rang)
+		if idx >= buckets {
+			idx = buckets - 1
+		}
+		counts[idx]++
+	}
+	return counts
+}
@@ -0,0 +1,21 @@
+---
+name: histogram
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func Histogram(data []float64, buckets int) []int"
+description: "Calcula las frecuencias de un slice de float64 distribuidas en un número dado de buckets equiespaciados."
+tags: [datascience, statistics, histogram, frequency]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["math"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/histogram.go"
+---
@@ -0,0 +1,18 @@
+package datascience
+
+import "math"
+
+// Impute rellena valores NaN usando forward-fill.
+// Cada NaN se reemplaza con el último valor válido (no NaN) anterior.
+// Si el primer valor es NaN y no hay valor anterior, se mantiene como NaN.
+func Impute(data []float64) []float64 {
+	result := make([]float64, len(data))
+	last := math.NaN()
+	for i, v := range data {
+		if !math.IsNaN(v) {
+			last = v
+		}
+		result[i] = last
+	}
+	return result
+}
@@ -0,0 +1,21 @@
+---
+name: impute
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func Impute(data []float64) []float64"
+description: "Rellena valores NaN en un slice de float64 usando forward-fill, reemplazando cada NaN con el último valor válido anterior."
+tags: [datascience, impute, missing, fill]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["math"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/impute.go"
+---
@@ -0,0 +1,8 @@
+package datascience
+
+import "fmt"
+
+// LoadCSV carga un archivo CSV y lo retorna como slice de mapas (columna -> valor).
+func LoadCSV(path string) ([]map[string]string, error) {
+	return nil, fmt.Errorf("not implemented")
+}
@@ -0,0 +1,21 @@
+---
+name: load_csv
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: impure
+signature: "func LoadCSV(path string) ([]map[string]string, error)"
+description: "Carga un archivo CSV desde disco y lo retorna como slice de mapas columna-valor."
+tags: [datascience, io, csv, load]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: ["fmt"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/load_csv.go"
+---
@@ -0,0 +1,8 @@
+package datascience
+
+import "fmt"
+
+// LoadParquet carga un archivo Parquet y lo retorna como slice de mapas.
+func LoadParquet(path string) ([]map[string]any, error) {
+	return nil, fmt.Errorf("not implemented")
+}
@@ -0,0 +1,21 @@
+---
+name: load_parquet
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: impure
+signature: "func LoadParquet(path string) ([]map[string]any, error)"
+description: "Carga un archivo Parquet desde disco y lo retorna como slice de mapas columna-valor."
+tags: [datascience, io, parquet, load]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: ["fmt"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/load_parquet.go"
+---
@@ -0,0 +1,33 @@
+package datascience
+
+import "math"
+
+// MinMaxScale escala los valores al rango [0, 1] usando min-max normalización.
+// Si min == max, retorna un slice de ceros.
+func MinMaxScale(data []float64) []float64 {
+	n := len(data)
+	if n == 0 {
+		return []float64{}
+	}
+
+	minVal := math.Inf(1)
+	maxVal := math.Inf(-1)
+	for _, v := range data {
+		if v < minVal {
+			minVal = v
+		}
+		if v > maxVal {
+			maxVal = v
+		}
+	}
+
+	rang := maxVal - minVal
+	result := make([]float64, n)
+	if rang == 0 {
+		return result
+	}
+	for i, v := range data {
+		result[i] = (v - minVal) / rang
+	}
+	return result
+}
@@ -0,0 +1,21 @@
+---
+name: min_max_scale
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func MinMaxScale(data []float64) []float64"
+description: "Escala los valores de un slice al rango [0, 1] usando normalización min-max."
+tags: [datascience, statistics, normalize, scale]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["math"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/min_max_scale.go"
+---
@@ -0,0 +1,39 @@
+package datascience
+
+import "math"
+
+// Pearson calcula el coeficiente de correlación de Pearson entre dos slices.
+// Si los slices tienen distinta longitud, usa la longitud mínima.
+// Retorna 0 si alguna desviación estándar es 0.
+func Pearson(xs, ys []float64) float64 {
+	n := len(xs)
+	if len(ys) < n {
+		n = len(ys)
+	}
+	if n == 0 {
+		return 0
+	}
+
+	var sumX, sumY float64
+	for i := 0; i < n; i++ {
+		sumX += xs[i]
+		sumY += ys[i]
+	}
+	meanX := sumX / float64(n)
+	meanY := sumY / float64(n)
+
+	var num, denomX, denomY float64
+	for i := 0; i < n; i++ {
+		dx := xs[i] - meanX
+		dy := ys[i] - meanY
+		num += dx * dy
+		denomX += dx * dx
+		denomY += dy * dy
+	}
+
+	denom := math.Sqrt(denomX * denomY)
+	if denom == 0 {
+		return 0
+	}
+	return num / denom
+}
@@ -0,0 +1,21 @@
+---
+name: pearson
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func Pearson(xs, ys []float64) float64"
+description: "Calcula el coeficiente de correlación de Pearson entre dos slices de float64."
+tags: [datascience, statistics, correlation, pearson]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["math"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/pearson.go"
+---
@@ -0,0 +1,17 @@
+package datascience
+
+// RollingWindow genera ventanas deslizantes de tamaño size sobre el slice xs.
+// Si size <= 0 o size > len(xs), retorna nil.
+func RollingWindow[T any](xs []T, size int) [][]T {
+	n := len(xs)
+	if size <= 0 || size > n {
+		return nil
+	}
+	windows := make([][]T, 0, n-size+1)
+	for i := 0; i <= n-size; i++ {
+		w := make([]T, size)
+		copy(w, xs[i:i+size])
+		windows = append(windows, w)
+	}
+	return windows
+}
@@ -0,0 +1,21 @@
+---
+name: rolling_window
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func RollingWindow[T any](xs []T, size int) [][]T"
+description: "Genera ventanas deslizantes de tamaño fijo sobre un slice genérico."
+tags: [datascience, window, rolling, sliding, generic]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/rolling_window.go"
+---
@@ -0,0 +1,35 @@
+package datascience
+
+import "math"
+
+// Standardize aplica Z-score normalización a un slice de float64.
+// Cada valor se transforma a (x - mean) / stddev.
+// Si stddev es 0, retorna un slice de ceros.
+func Standardize(data []float64) []float64 {
+	n := len(data)
+	if n == 0 {
+		return []float64{}
+	}
+
+	var sum float64
+	for _, v := range data {
+		sum += v
+	}
+	mean := sum / float64(n)
+
+	var sqSum float64
+	for _, v := range data {
+		d := v - mean
+		sqSum += d * d
+	}
+	stddev := math.Sqrt(sqSum / float64(n))
+
+	result := make([]float64, n)
+	if stddev == 0 {
+		return result
+	}
+	for i, v := range data {
+		result[i] = (v - mean) / stddev
+	}
+	return result
+}
@@ -0,0 +1,21 @@
+---
+name: standardize
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func Standardize(data []float64) []float64"
+description: "Aplica Z-score normalización a un slice de float64, transformando cada valor a (x - media) / desviación estándar."
+tags: [datascience, statistics, normalize, zscore]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["math"]
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/standardize.go"
+---
@@ -0,0 +1,15 @@
+package datascience
+
+// ZipSlices combina dos slices de float64 en pares [2]float64.
+// El resultado tiene longitud igual al menor de los dos slices.
+func ZipSlices(as, bs []float64) [][2]float64 {
+	n := len(as)
+	if len(bs) < n {
+		n = len(bs)
+	}
+	result := make([][2]float64, n)
+	for i := 0; i < n; i++ {
+		result[i] = [2]float64{as[i], bs[i]}
+	}
+	return result
+}
@@ -0,0 +1,21 @@
+---
+name: zip_slices
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func ZipSlices(as, bs []float64) [][2]float64"
+description: "Combina dos slices de float64 en un slice de pares [2]float64, truncando al más corto."
+tags: [datascience, zip, combine, pair]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: false
+tests: []
+test_file_path: ""
+file_path: "functions/datascience/zip_slices.go"
+---