feat: 15 funciones datascience — estadística, DSP e IO de datos

12 funciones puras con implementación real: Standardize, MinMaxScale, Clip, RollingWindow, ZipSlices, GroupBy, Histogram, Pearson, Autocorrelation, FFT (Cooley-Tukey), DetectOutliers, Impute 3 funciones impuras (stubs): LoadCSV, LoadParquet, FetchDataFrame Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 02:23:36 +01:00
parent 113c6dfd71
commit fc734029c1
30 changed files with 674 additions and 0 deletions
@@ -0,0 +1,12 @@
 package datascience
 // Autocorrelation calcula la autocorrelación de data con el desfase (lag) dado.
 // Usa la correlación de Pearson entre data[0:n-lag] y data[lag:n].
 // Si lag es inválido, retorna 0.
 func Autocorrelation(data []float64, lag int) float64 {
 	n := len(data)
 	if lag < 0 || lag >= n {
 		return 0
 	}
 	return Pearson(data[:n-lag], data[lag:])
 }
@@ -0,0 +1,21 @@
 ---
 name: autocorrelation
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func Autocorrelation(data []float64, lag int) float64"
 description: "Calcula la autocorrelación de una serie temporal con un desfase (lag) dado, usando correlación de Pearson."
 tags: [datascience, statistics, autocorrelation, timeseries]
 uses_functions: [pearson_go_datascience]
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: []
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/autocorrelation.go"
 ---
@@ -0,0 +1,17 @@
 package datascience
 // Clip recorta cada valor del slice para que quede dentro del rango [min, max].
 func Clip(data []float64, min, max float64) []float64 {
 	result := make([]float64, len(data))
 	for i, v := range data {
 		switch {
 		case v < min:
 			result[i] = min
 		case v > max:
 			result[i] = max
 		default:
 			result[i] = v
 		}
 	}
 	return result
 }
@@ -0,0 +1,21 @@
 ---
 name: clip
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func Clip(data []float64, min, max float64) []float64"
 description: "Recorta cada valor del slice para que quede dentro del rango [min, max]."
 tags: [datascience, clamp, clip, range]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: []
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/clip.go"
 ---
@@ -0,0 +1,38 @@
 package datascience
 import "math"
 // DetectOutliers devuelve un []bool donde true indica que el valor es un outlier
 // según z-score. Un valor es outlier si |z-score| > threshold.
 func DetectOutliers(data []float64, threshold float64) []bool {
 	n := len(data)
 	if n == 0 {
 		return []bool{}
 	}
 	var sum float64
 	for _, v := range data {
 		sum += v
 	}
 	mean := sum / float64(n)
 	var sqSum float64
 	for _, v := range data {
 		d := v - mean
 		sqSum += d * d
 	}
 	stddev := math.Sqrt(sqSum / float64(n))
 	result := make([]bool, n)
 	if stddev == 0 {
 		return result
 	}
 	for i, v := range data {
 		z := (v - mean) / stddev
 		if z < 0 {
 			z = -z
 		}
 		result[i] = z > threshold
 	}
 	return result
 }
@@ -0,0 +1,21 @@
 ---
 name: detect_outliers
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func DetectOutliers(data []float64, threshold float64) []bool"
 description: "Detecta outliers en un slice de float64 usando z-score. Devuelve true para valores cuyo |z-score| supera el umbral."
 tags: [datascience, statistics, outlier, anomaly]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["math"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/detect_outliers.go"
 ---
@@ -0,0 +1,8 @@
 package datascience
 import "fmt"
 // FetchDataFrame ejecuta una consulta SQL contra un DSN y retorna los resultados como slice de mapas.
 func FetchDataFrame(dsn, query string) ([]map[string]any, error) {
 	return nil, fmt.Errorf("not implemented")
 }
@@ -0,0 +1,21 @@
 ---
 name: fetch_data_frame
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: impure
 signature: "func FetchDataFrame(dsn, query string) ([]map[string]any, error)"
 description: "Ejecuta una consulta SQL contra un DSN y retorna los resultados como slice de mapas columna-valor."
 tags: [datascience, io, bigquery, fetch]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: ["fmt"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/fetch_data_frame.go"
 ---
@@ -0,0 +1,61 @@
 package datascience
 import (
 	"math"
 	"math/cmplx"
 )
 // FFT calcula la Fast Fourier Transform usando el algoritmo Cooley-Tukey radix-2.
 // Si la longitud de data no es potencia de 2, se rellena con ceros (zero-padding).
 func FFT(data []float64) []complex128 {
 	n := len(data)
 	if n == 0 {
 		return []complex128{}
 	}
 	// Calcular la siguiente potencia de 2.
 	size := nextPow2(n)
 	// Convertir a complex128 con zero-padding.
 	x := make([]complex128, size)
 	for i := 0; i < n; i++ {
 		x[i] = complex(data[i], 0)
 	}
 	fftRecursive(x)
 	return x
 }
 // nextPow2 retorna la menor potencia de 2 >= n.
 func nextPow2(n int) int {
 	p := 1
 	for p < n {
 		p <<= 1
 	}
 	return p
 }
 // fftRecursive aplica Cooley-Tukey radix-2 DIT in-place.
 func fftRecursive(x []complex128) {
 	n := len(x)
 	if n <= 1 {
 		return
 	}
 	// Separar pares e impares.
 	even := make([]complex128, n/2)
 	odd := make([]complex128, n/2)
 	for i := 0; i < n/2; i++ {
 		even[i] = x[2*i]
 		odd[i] = x[2*i+1]
 	}
 	fftRecursive(even)
 	fftRecursive(odd)
 	for k := 0; k < n/2; k++ {
 		t := cmplx.Rect(1, -2*math.Pi*float64(k)/float64(n)) * odd[k]
 		x[k] = even[k] + t
 		x[k+n/2] = even[k] - t
 	}
 }
@@ -0,0 +1,21 @@
 ---
 name: fft
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func FFT(data []float64) []complex128"
 description: "Calcula la Transformada Rápida de Fourier (FFT) usando el algoritmo Cooley-Tukey radix-2. Aplica zero-padding si la longitud no es potencia de 2."
 tags: [datascience, dsp, fft, fourier, frequency]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["math", "math/cmplx"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/fft.go"
 ---
@@ -0,0 +1,11 @@
 package datascience
 // GroupBy agrupa los elementos de un slice según la clave devuelta por keyFn.
 func GroupBy[T any, K comparable](xs []T, keyFn func(T) K) map[K][]T {
 	groups := make(map[K][]T)
 	for _, x := range xs {
 		k := keyFn(x)
 		groups[k] = append(groups[k], x)
 	}
 	return groups
 }
@@ -0,0 +1,21 @@
 ---
 name: group_by
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func GroupBy[T any, K comparable](xs []T, keyFn func(T) K) map[K][]T"
 description: "Agrupa los elementos de un slice según una función clave, devolviendo un mapa de clave a slice de elementos."
 tags: [datascience, group, aggregate, generic]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: []
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/group_by.go"
 ---
@@ -0,0 +1,39 @@
 package datascience
 import "math"
 // Histogram calcula las frecuencias de data distribuidas en la cantidad de buckets indicada.
 // Retorna un slice de longitud buckets con el conteo de elementos por cada intervalo equiespaciado.
 func Histogram(data []float64, buckets int) []int {
 	if buckets <= 0 || len(data) == 0 {
 		return make([]int, buckets)
 	}
 	minVal := math.Inf(1)
 	maxVal := math.Inf(-1)
 	for _, v := range data {
 		if v < minVal {
 			minVal = v
 		}
 		if v > maxVal {
 			maxVal = v
 		}
 	}
 	counts := make([]int, buckets)
 	rang := maxVal - minVal
 	if rang == 0 {
 		// Todos los valores son iguales; poner todo en el primer bucket.
 		counts[0] = len(data)
 		return counts
 	}
 	for _, v := range data {
 		idx := int(float64(buckets) * (v - minVal) / rang)
 		if idx >= buckets {
 			idx = buckets - 1
 		}
 		counts[idx]++
 	}
 	return counts
 }
@@ -0,0 +1,21 @@
 ---
 name: histogram
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func Histogram(data []float64, buckets int) []int"
 description: "Calcula las frecuencias de un slice de float64 distribuidas en un número dado de buckets equiespaciados."
 tags: [datascience, statistics, histogram, frequency]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["math"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/histogram.go"
 ---
@@ -0,0 +1,18 @@
 package datascience
 import "math"
 // Impute rellena valores NaN usando forward-fill.
 // Cada NaN se reemplaza con el último valor válido (no NaN) anterior.
 // Si el primer valor es NaN y no hay valor anterior, se mantiene como NaN.
 func Impute(data []float64) []float64 {
 	result := make([]float64, len(data))
 	last := math.NaN()
 	for i, v := range data {
 		if !math.IsNaN(v) {
 			last = v
 		}
 		result[i] = last
 	}
 	return result
 }
@@ -0,0 +1,21 @@
 ---
 name: impute
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func Impute(data []float64) []float64"
 description: "Rellena valores NaN en un slice de float64 usando forward-fill, reemplazando cada NaN con el último valor válido anterior."
 tags: [datascience, impute, missing, fill]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["math"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/impute.go"
 ---
@@ -0,0 +1,8 @@
 package datascience
 import "fmt"
 // LoadCSV carga un archivo CSV y lo retorna como slice de mapas (columna -> valor).
 func LoadCSV(path string) ([]map[string]string, error) {
 	return nil, fmt.Errorf("not implemented")
 }
@@ -0,0 +1,21 @@
 ---
 name: load_csv
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: impure
 signature: "func LoadCSV(path string) ([]map[string]string, error)"
 description: "Carga un archivo CSV desde disco y lo retorna como slice de mapas columna-valor."
 tags: [datascience, io, csv, load]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: ["fmt"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/load_csv.go"
 ---
@@ -0,0 +1,8 @@
 package datascience
 import "fmt"
 // LoadParquet carga un archivo Parquet y lo retorna como slice de mapas.
 func LoadParquet(path string) ([]map[string]any, error) {
 	return nil, fmt.Errorf("not implemented")
 }
@@ -0,0 +1,21 @@
 ---
 name: load_parquet
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: impure
 signature: "func LoadParquet(path string) ([]map[string]any, error)"
 description: "Carga un archivo Parquet desde disco y lo retorna como slice de mapas columna-valor."
 tags: [datascience, io, parquet, load]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: "error_go_core"
 imports: ["fmt"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/load_parquet.go"
 ---
@@ -0,0 +1,33 @@
 package datascience
 import "math"
 // MinMaxScale escala los valores al rango [0, 1] usando min-max normalización.
 // Si min == max, retorna un slice de ceros.
 func MinMaxScale(data []float64) []float64 {
 	n := len(data)
 	if n == 0 {
 		return []float64{}
 	}
 	minVal := math.Inf(1)
 	maxVal := math.Inf(-1)
 	for _, v := range data {
 		if v < minVal {
 			minVal = v
 		}
 		if v > maxVal {
 			maxVal = v
 		}
 	}
 	rang := maxVal - minVal
 	result := make([]float64, n)
 	if rang == 0 {
 		return result
 	}
 	for i, v := range data {
 		result[i] = (v - minVal) / rang
 	}
 	return result
 }
@@ -0,0 +1,21 @@
 ---
 name: min_max_scale
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func MinMaxScale(data []float64) []float64"
 description: "Escala los valores de un slice al rango [0, 1] usando normalización min-max."
 tags: [datascience, statistics, normalize, scale]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["math"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/min_max_scale.go"
 ---
@@ -0,0 +1,39 @@
 package datascience
 import "math"
 // Pearson calcula el coeficiente de correlación de Pearson entre dos slices.
 // Si los slices tienen distinta longitud, usa la longitud mínima.
 // Retorna 0 si alguna desviación estándar es 0.
 func Pearson(xs, ys []float64) float64 {
 	n := len(xs)
 	if len(ys) < n {
 		n = len(ys)
 	}
 	if n == 0 {
 		return 0
 	}
 	var sumX, sumY float64
 	for i := 0; i < n; i++ {
 		sumX += xs[i]
 		sumY += ys[i]
 	}
 	meanX := sumX / float64(n)
 	meanY := sumY / float64(n)
 	var num, denomX, denomY float64
 	for i := 0; i < n; i++ {
 		dx := xs[i] - meanX
 		dy := ys[i] - meanY
 		num += dx * dy
 		denomX += dx * dx
 		denomY += dy * dy
 	}
 	denom := math.Sqrt(denomX * denomY)
 	if denom == 0 {
 		return 0
 	}
 	return num / denom
 }
@@ -0,0 +1,21 @@
 ---
 name: pearson
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func Pearson(xs, ys []float64) float64"
 description: "Calcula el coeficiente de correlación de Pearson entre dos slices de float64."
 tags: [datascience, statistics, correlation, pearson]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["math"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/pearson.go"
 ---
@@ -0,0 +1,17 @@
 package datascience
 // RollingWindow genera ventanas deslizantes de tamaño size sobre el slice xs.
 // Si size <= 0 o size > len(xs), retorna nil.
 func RollingWindow[T any](xs []T, size int) [][]T {
 	n := len(xs)
 	if size <= 0 || size > n {
 		return nil
 	}
 	windows := make([][]T, 0, n-size+1)
 	for i := 0; i <= n-size; i++ {
 		w := make([]T, size)
 		copy(w, xs[i:i+size])
 		windows = append(windows, w)
 	}
 	return windows
 }
@@ -0,0 +1,21 @@
 ---
 name: rolling_window
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func RollingWindow[T any](xs []T, size int) [][]T"
 description: "Genera ventanas deslizantes de tamaño fijo sobre un slice genérico."
 tags: [datascience, window, rolling, sliding, generic]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: []
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/rolling_window.go"
 ---
@@ -0,0 +1,35 @@
 package datascience
 import "math"
 // Standardize aplica Z-score normalización a un slice de float64.
 // Cada valor se transforma a (x - mean) / stddev.
 // Si stddev es 0, retorna un slice de ceros.
 func Standardize(data []float64) []float64 {
 	n := len(data)
 	if n == 0 {
 		return []float64{}
 	}
 	var sum float64
 	for _, v := range data {
 		sum += v
 	}
 	mean := sum / float64(n)
 	var sqSum float64
 	for _, v := range data {
 		d := v - mean
 		sqSum += d * d
 	}
 	stddev := math.Sqrt(sqSum / float64(n))
 	result := make([]float64, n)
 	if stddev == 0 {
 		return result
 	}
 	for i, v := range data {
 		result[i] = (v - mean) / stddev
 	}
 	return result
 }
@@ -0,0 +1,21 @@
 ---
 name: standardize
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func Standardize(data []float64) []float64"
 description: "Aplica Z-score normalización a un slice de float64, transformando cada valor a (x - media) / desviación estándar."
 tags: [datascience, statistics, normalize, zscore]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: ["math"]
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/standardize.go"
 ---
@@ -0,0 +1,15 @@
 package datascience
 // ZipSlices combina dos slices de float64 en pares [2]float64.
 // El resultado tiene longitud igual al menor de los dos slices.
 func ZipSlices(as, bs []float64) [][2]float64 {
 	n := len(as)
 	if len(bs) < n {
 		n = len(bs)
 	}
 	result := make([][2]float64, n)
 	for i := 0; i < n; i++ {
 		result[i] = [2]float64{as[i], bs[i]}
 	}
 	return result
 }
@@ -0,0 +1,21 @@
 ---
 name: zip_slices
 kind: function
 lang: go
 domain: datascience
 version: "1.0.0"
 purity: pure
 signature: "func ZipSlices(as, bs []float64) [][2]float64"
 description: "Combina dos slices de float64 en un slice de pares [2]float64, truncando al más corto."
 tags: [datascience, zip, combine, pair]
 uses_functions: []
 uses_types: []
 returns: []
 returns_optional: false
 error_type: ""
 imports: []
 tested: false
 tests: []
 test_file_path: ""
 file_path: "functions/datascience/zip_slices.go"
 ---