From fc734029c1bfeeb104fab94045d079016dc81f08 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Sat, 28 Mar 2026 02:23:36 +0100 Subject: [PATCH] =?UTF-8?q?feat:=2015=20funciones=20datascience=20?= =?UTF-8?q?=E2=80=94=20estad=C3=ADstica,=20DSP=20e=20IO=20de=20datos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 12 funciones puras con implementación real: Standardize, MinMaxScale, Clip, RollingWindow, ZipSlices, GroupBy, Histogram, Pearson, Autocorrelation, FFT (Cooley-Tukey), DetectOutliers, Impute 3 funciones impuras (stubs): LoadCSV, LoadParquet, FetchDataFrame Co-Authored-By: Claude Opus 4.6 (1M context) --- functions/datascience/autocorrelation.go | 12 +++++ functions/datascience/autocorrelation.md | 21 ++++++++ functions/datascience/clip.go | 17 +++++++ functions/datascience/clip.md | 21 ++++++++ functions/datascience/detect_outliers.go | 38 ++++++++++++++ functions/datascience/detect_outliers.md | 21 ++++++++ functions/datascience/fetch_data_frame.go | 8 +++ functions/datascience/fetch_data_frame.md | 21 ++++++++ functions/datascience/fft.go | 61 +++++++++++++++++++++++ functions/datascience/fft.md | 21 ++++++++ functions/datascience/group_by.go | 11 ++++ functions/datascience/group_by.md | 21 ++++++++ functions/datascience/histogram.go | 39 +++++++++++++++ functions/datascience/histogram.md | 21 ++++++++ functions/datascience/impute.go | 18 +++++++ functions/datascience/impute.md | 21 ++++++++ functions/datascience/load_csv.go | 8 +++ functions/datascience/load_csv.md | 21 ++++++++ functions/datascience/load_parquet.go | 8 +++ functions/datascience/load_parquet.md | 21 ++++++++ functions/datascience/min_max_scale.go | 33 ++++++++++++ functions/datascience/min_max_scale.md | 21 ++++++++ functions/datascience/pearson.go | 39 +++++++++++++++ functions/datascience/pearson.md | 21 ++++++++ functions/datascience/rolling_window.go | 17 +++++++ functions/datascience/rolling_window.md | 21 ++++++++ functions/datascience/standardize.go | 35 +++++++++++++ functions/datascience/standardize.md | 21 ++++++++ functions/datascience/zip_slices.go | 15 ++++++ functions/datascience/zip_slices.md | 21 ++++++++ 30 files changed, 674 insertions(+) create mode 100644 functions/datascience/autocorrelation.go create mode 100644 functions/datascience/autocorrelation.md create mode 100644 functions/datascience/clip.go create mode 100644 functions/datascience/clip.md create mode 100644 functions/datascience/detect_outliers.go create mode 100644 functions/datascience/detect_outliers.md create mode 100644 functions/datascience/fetch_data_frame.go create mode 100644 functions/datascience/fetch_data_frame.md create mode 100644 functions/datascience/fft.go create mode 100644 functions/datascience/fft.md create mode 100644 functions/datascience/group_by.go create mode 100644 functions/datascience/group_by.md create mode 100644 functions/datascience/histogram.go create mode 100644 functions/datascience/histogram.md create mode 100644 functions/datascience/impute.go create mode 100644 functions/datascience/impute.md create mode 100644 functions/datascience/load_csv.go create mode 100644 functions/datascience/load_csv.md create mode 100644 functions/datascience/load_parquet.go create mode 100644 functions/datascience/load_parquet.md create mode 100644 functions/datascience/min_max_scale.go create mode 100644 functions/datascience/min_max_scale.md create mode 100644 functions/datascience/pearson.go create mode 100644 functions/datascience/pearson.md create mode 100644 functions/datascience/rolling_window.go create mode 100644 functions/datascience/rolling_window.md create mode 100644 functions/datascience/standardize.go create mode 100644 functions/datascience/standardize.md create mode 100644 functions/datascience/zip_slices.go create mode 100644 functions/datascience/zip_slices.md diff --git a/functions/datascience/autocorrelation.go b/functions/datascience/autocorrelation.go new file mode 100644 index 00000000..ca322ced --- /dev/null +++ b/functions/datascience/autocorrelation.go @@ -0,0 +1,12 @@ +package datascience + +// Autocorrelation calcula la autocorrelación de data con el desfase (lag) dado. +// Usa la correlación de Pearson entre data[0:n-lag] y data[lag:n]. +// Si lag es inválido, retorna 0. +func Autocorrelation(data []float64, lag int) float64 { + n := len(data) + if lag < 0 || lag >= n { + return 0 + } + return Pearson(data[:n-lag], data[lag:]) +} diff --git a/functions/datascience/autocorrelation.md b/functions/datascience/autocorrelation.md new file mode 100644 index 00000000..20020767 --- /dev/null +++ b/functions/datascience/autocorrelation.md @@ -0,0 +1,21 @@ +--- +name: autocorrelation +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func Autocorrelation(data []float64, lag int) float64" +description: "Calcula la autocorrelación de una serie temporal con un desfase (lag) dado, usando correlación de Pearson." +tags: [datascience, statistics, autocorrelation, timeseries] +uses_functions: [pearson_go_datascience] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/autocorrelation.go" +--- diff --git a/functions/datascience/clip.go b/functions/datascience/clip.go new file mode 100644 index 00000000..947a80ee --- /dev/null +++ b/functions/datascience/clip.go @@ -0,0 +1,17 @@ +package datascience + +// Clip recorta cada valor del slice para que quede dentro del rango [min, max]. +func Clip(data []float64, min, max float64) []float64 { + result := make([]float64, len(data)) + for i, v := range data { + switch { + case v < min: + result[i] = min + case v > max: + result[i] = max + default: + result[i] = v + } + } + return result +} diff --git a/functions/datascience/clip.md b/functions/datascience/clip.md new file mode 100644 index 00000000..73931624 --- /dev/null +++ b/functions/datascience/clip.md @@ -0,0 +1,21 @@ +--- +name: clip +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func Clip(data []float64, min, max float64) []float64" +description: "Recorta cada valor del slice para que quede dentro del rango [min, max]." +tags: [datascience, clamp, clip, range] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/clip.go" +--- diff --git a/functions/datascience/detect_outliers.go b/functions/datascience/detect_outliers.go new file mode 100644 index 00000000..b42b4484 --- /dev/null +++ b/functions/datascience/detect_outliers.go @@ -0,0 +1,38 @@ +package datascience + +import "math" + +// DetectOutliers devuelve un []bool donde true indica que el valor es un outlier +// según z-score. Un valor es outlier si |z-score| > threshold. +func DetectOutliers(data []float64, threshold float64) []bool { + n := len(data) + if n == 0 { + return []bool{} + } + + var sum float64 + for _, v := range data { + sum += v + } + mean := sum / float64(n) + + var sqSum float64 + for _, v := range data { + d := v - mean + sqSum += d * d + } + stddev := math.Sqrt(sqSum / float64(n)) + + result := make([]bool, n) + if stddev == 0 { + return result + } + for i, v := range data { + z := (v - mean) / stddev + if z < 0 { + z = -z + } + result[i] = z > threshold + } + return result +} diff --git a/functions/datascience/detect_outliers.md b/functions/datascience/detect_outliers.md new file mode 100644 index 00000000..617132ac --- /dev/null +++ b/functions/datascience/detect_outliers.md @@ -0,0 +1,21 @@ +--- +name: detect_outliers +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func DetectOutliers(data []float64, threshold float64) []bool" +description: "Detecta outliers en un slice de float64 usando z-score. Devuelve true para valores cuyo |z-score| supera el umbral." +tags: [datascience, statistics, outlier, anomaly] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: ["math"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/detect_outliers.go" +--- diff --git a/functions/datascience/fetch_data_frame.go b/functions/datascience/fetch_data_frame.go new file mode 100644 index 00000000..7205da42 --- /dev/null +++ b/functions/datascience/fetch_data_frame.go @@ -0,0 +1,8 @@ +package datascience + +import "fmt" + +// FetchDataFrame ejecuta una consulta SQL contra un DSN y retorna los resultados como slice de mapas. +func FetchDataFrame(dsn, query string) ([]map[string]any, error) { + return nil, fmt.Errorf("not implemented") +} diff --git a/functions/datascience/fetch_data_frame.md b/functions/datascience/fetch_data_frame.md new file mode 100644 index 00000000..874455c8 --- /dev/null +++ b/functions/datascience/fetch_data_frame.md @@ -0,0 +1,21 @@ +--- +name: fetch_data_frame +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: impure +signature: "func FetchDataFrame(dsn, query string) ([]map[string]any, error)" +description: "Ejecuta una consulta SQL contra un DSN y retorna los resultados como slice de mapas columna-valor." +tags: [datascience, io, bigquery, fetch] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: ["fmt"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/fetch_data_frame.go" +--- diff --git a/functions/datascience/fft.go b/functions/datascience/fft.go new file mode 100644 index 00000000..98d58bcc --- /dev/null +++ b/functions/datascience/fft.go @@ -0,0 +1,61 @@ +package datascience + +import ( + "math" + "math/cmplx" +) + +// FFT calcula la Fast Fourier Transform usando el algoritmo Cooley-Tukey radix-2. +// Si la longitud de data no es potencia de 2, se rellena con ceros (zero-padding). +func FFT(data []float64) []complex128 { + n := len(data) + if n == 0 { + return []complex128{} + } + + // Calcular la siguiente potencia de 2. + size := nextPow2(n) + + // Convertir a complex128 con zero-padding. + x := make([]complex128, size) + for i := 0; i < n; i++ { + x[i] = complex(data[i], 0) + } + + fftRecursive(x) + return x +} + +// nextPow2 retorna la menor potencia de 2 >= n. +func nextPow2(n int) int { + p := 1 + for p < n { + p <<= 1 + } + return p +} + +// fftRecursive aplica Cooley-Tukey radix-2 DIT in-place. +func fftRecursive(x []complex128) { + n := len(x) + if n <= 1 { + return + } + + // Separar pares e impares. + even := make([]complex128, n/2) + odd := make([]complex128, n/2) + for i := 0; i < n/2; i++ { + even[i] = x[2*i] + odd[i] = x[2*i+1] + } + + fftRecursive(even) + fftRecursive(odd) + + for k := 0; k < n/2; k++ { + t := cmplx.Rect(1, -2*math.Pi*float64(k)/float64(n)) * odd[k] + x[k] = even[k] + t + x[k+n/2] = even[k] - t + } +} diff --git a/functions/datascience/fft.md b/functions/datascience/fft.md new file mode 100644 index 00000000..4fbebad7 --- /dev/null +++ b/functions/datascience/fft.md @@ -0,0 +1,21 @@ +--- +name: fft +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func FFT(data []float64) []complex128" +description: "Calcula la Transformada Rápida de Fourier (FFT) usando el algoritmo Cooley-Tukey radix-2. Aplica zero-padding si la longitud no es potencia de 2." +tags: [datascience, dsp, fft, fourier, frequency] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: ["math", "math/cmplx"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/fft.go" +--- diff --git a/functions/datascience/group_by.go b/functions/datascience/group_by.go new file mode 100644 index 00000000..204a64b3 --- /dev/null +++ b/functions/datascience/group_by.go @@ -0,0 +1,11 @@ +package datascience + +// GroupBy agrupa los elementos de un slice según la clave devuelta por keyFn. +func GroupBy[T any, K comparable](xs []T, keyFn func(T) K) map[K][]T { + groups := make(map[K][]T) + for _, x := range xs { + k := keyFn(x) + groups[k] = append(groups[k], x) + } + return groups +} diff --git a/functions/datascience/group_by.md b/functions/datascience/group_by.md new file mode 100644 index 00000000..b0e1750e --- /dev/null +++ b/functions/datascience/group_by.md @@ -0,0 +1,21 @@ +--- +name: group_by +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func GroupBy[T any, K comparable](xs []T, keyFn func(T) K) map[K][]T" +description: "Agrupa los elementos de un slice según una función clave, devolviendo un mapa de clave a slice de elementos." +tags: [datascience, group, aggregate, generic] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/group_by.go" +--- diff --git a/functions/datascience/histogram.go b/functions/datascience/histogram.go new file mode 100644 index 00000000..42e3b4bf --- /dev/null +++ b/functions/datascience/histogram.go @@ -0,0 +1,39 @@ +package datascience + +import "math" + +// Histogram calcula las frecuencias de data distribuidas en la cantidad de buckets indicada. +// Retorna un slice de longitud buckets con el conteo de elementos por cada intervalo equiespaciado. +func Histogram(data []float64, buckets int) []int { + if buckets <= 0 || len(data) == 0 { + return make([]int, buckets) + } + + minVal := math.Inf(1) + maxVal := math.Inf(-1) + for _, v := range data { + if v < minVal { + minVal = v + } + if v > maxVal { + maxVal = v + } + } + + counts := make([]int, buckets) + rang := maxVal - minVal + if rang == 0 { + // Todos los valores son iguales; poner todo en el primer bucket. + counts[0] = len(data) + return counts + } + + for _, v := range data { + idx := int(float64(buckets) * (v - minVal) / rang) + if idx >= buckets { + idx = buckets - 1 + } + counts[idx]++ + } + return counts +} diff --git a/functions/datascience/histogram.md b/functions/datascience/histogram.md new file mode 100644 index 00000000..3ab07638 --- /dev/null +++ b/functions/datascience/histogram.md @@ -0,0 +1,21 @@ +--- +name: histogram +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func Histogram(data []float64, buckets int) []int" +description: "Calcula las frecuencias de un slice de float64 distribuidas en un número dado de buckets equiespaciados." +tags: [datascience, statistics, histogram, frequency] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: ["math"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/histogram.go" +--- diff --git a/functions/datascience/impute.go b/functions/datascience/impute.go new file mode 100644 index 00000000..b672a245 --- /dev/null +++ b/functions/datascience/impute.go @@ -0,0 +1,18 @@ +package datascience + +import "math" + +// Impute rellena valores NaN usando forward-fill. +// Cada NaN se reemplaza con el último valor válido (no NaN) anterior. +// Si el primer valor es NaN y no hay valor anterior, se mantiene como NaN. +func Impute(data []float64) []float64 { + result := make([]float64, len(data)) + last := math.NaN() + for i, v := range data { + if !math.IsNaN(v) { + last = v + } + result[i] = last + } + return result +} diff --git a/functions/datascience/impute.md b/functions/datascience/impute.md new file mode 100644 index 00000000..10daffe8 --- /dev/null +++ b/functions/datascience/impute.md @@ -0,0 +1,21 @@ +--- +name: impute +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func Impute(data []float64) []float64" +description: "Rellena valores NaN en un slice de float64 usando forward-fill, reemplazando cada NaN con el último valor válido anterior." +tags: [datascience, impute, missing, fill] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: ["math"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/impute.go" +--- diff --git a/functions/datascience/load_csv.go b/functions/datascience/load_csv.go new file mode 100644 index 00000000..9c35dc1e --- /dev/null +++ b/functions/datascience/load_csv.go @@ -0,0 +1,8 @@ +package datascience + +import "fmt" + +// LoadCSV carga un archivo CSV y lo retorna como slice de mapas (columna -> valor). +func LoadCSV(path string) ([]map[string]string, error) { + return nil, fmt.Errorf("not implemented") +} diff --git a/functions/datascience/load_csv.md b/functions/datascience/load_csv.md new file mode 100644 index 00000000..df198579 --- /dev/null +++ b/functions/datascience/load_csv.md @@ -0,0 +1,21 @@ +--- +name: load_csv +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: impure +signature: "func LoadCSV(path string) ([]map[string]string, error)" +description: "Carga un archivo CSV desde disco y lo retorna como slice de mapas columna-valor." +tags: [datascience, io, csv, load] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: ["fmt"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/load_csv.go" +--- diff --git a/functions/datascience/load_parquet.go b/functions/datascience/load_parquet.go new file mode 100644 index 00000000..4b71f2b5 --- /dev/null +++ b/functions/datascience/load_parquet.go @@ -0,0 +1,8 @@ +package datascience + +import "fmt" + +// LoadParquet carga un archivo Parquet y lo retorna como slice de mapas. +func LoadParquet(path string) ([]map[string]any, error) { + return nil, fmt.Errorf("not implemented") +} diff --git a/functions/datascience/load_parquet.md b/functions/datascience/load_parquet.md new file mode 100644 index 00000000..79914bc3 --- /dev/null +++ b/functions/datascience/load_parquet.md @@ -0,0 +1,21 @@ +--- +name: load_parquet +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: impure +signature: "func LoadParquet(path string) ([]map[string]any, error)" +description: "Carga un archivo Parquet desde disco y lo retorna como slice de mapas columna-valor." +tags: [datascience, io, parquet, load] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: ["fmt"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/load_parquet.go" +--- diff --git a/functions/datascience/min_max_scale.go b/functions/datascience/min_max_scale.go new file mode 100644 index 00000000..d0db8460 --- /dev/null +++ b/functions/datascience/min_max_scale.go @@ -0,0 +1,33 @@ +package datascience + +import "math" + +// MinMaxScale escala los valores al rango [0, 1] usando min-max normalización. +// Si min == max, retorna un slice de ceros. +func MinMaxScale(data []float64) []float64 { + n := len(data) + if n == 0 { + return []float64{} + } + + minVal := math.Inf(1) + maxVal := math.Inf(-1) + for _, v := range data { + if v < minVal { + minVal = v + } + if v > maxVal { + maxVal = v + } + } + + rang := maxVal - minVal + result := make([]float64, n) + if rang == 0 { + return result + } + for i, v := range data { + result[i] = (v - minVal) / rang + } + return result +} diff --git a/functions/datascience/min_max_scale.md b/functions/datascience/min_max_scale.md new file mode 100644 index 00000000..d3ccf08d --- /dev/null +++ b/functions/datascience/min_max_scale.md @@ -0,0 +1,21 @@ +--- +name: min_max_scale +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func MinMaxScale(data []float64) []float64" +description: "Escala los valores de un slice al rango [0, 1] usando normalización min-max." +tags: [datascience, statistics, normalize, scale] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: ["math"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/min_max_scale.go" +--- diff --git a/functions/datascience/pearson.go b/functions/datascience/pearson.go new file mode 100644 index 00000000..6a704140 --- /dev/null +++ b/functions/datascience/pearson.go @@ -0,0 +1,39 @@ +package datascience + +import "math" + +// Pearson calcula el coeficiente de correlación de Pearson entre dos slices. +// Si los slices tienen distinta longitud, usa la longitud mínima. +// Retorna 0 si alguna desviación estándar es 0. +func Pearson(xs, ys []float64) float64 { + n := len(xs) + if len(ys) < n { + n = len(ys) + } + if n == 0 { + return 0 + } + + var sumX, sumY float64 + for i := 0; i < n; i++ { + sumX += xs[i] + sumY += ys[i] + } + meanX := sumX / float64(n) + meanY := sumY / float64(n) + + var num, denomX, denomY float64 + for i := 0; i < n; i++ { + dx := xs[i] - meanX + dy := ys[i] - meanY + num += dx * dy + denomX += dx * dx + denomY += dy * dy + } + + denom := math.Sqrt(denomX * denomY) + if denom == 0 { + return 0 + } + return num / denom +} diff --git a/functions/datascience/pearson.md b/functions/datascience/pearson.md new file mode 100644 index 00000000..df7d48e1 --- /dev/null +++ b/functions/datascience/pearson.md @@ -0,0 +1,21 @@ +--- +name: pearson +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func Pearson(xs, ys []float64) float64" +description: "Calcula el coeficiente de correlación de Pearson entre dos slices de float64." +tags: [datascience, statistics, correlation, pearson] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: ["math"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/pearson.go" +--- diff --git a/functions/datascience/rolling_window.go b/functions/datascience/rolling_window.go new file mode 100644 index 00000000..54b31941 --- /dev/null +++ b/functions/datascience/rolling_window.go @@ -0,0 +1,17 @@ +package datascience + +// RollingWindow genera ventanas deslizantes de tamaño size sobre el slice xs. +// Si size <= 0 o size > len(xs), retorna nil. +func RollingWindow[T any](xs []T, size int) [][]T { + n := len(xs) + if size <= 0 || size > n { + return nil + } + windows := make([][]T, 0, n-size+1) + for i := 0; i <= n-size; i++ { + w := make([]T, size) + copy(w, xs[i:i+size]) + windows = append(windows, w) + } + return windows +} diff --git a/functions/datascience/rolling_window.md b/functions/datascience/rolling_window.md new file mode 100644 index 00000000..29af39f9 --- /dev/null +++ b/functions/datascience/rolling_window.md @@ -0,0 +1,21 @@ +--- +name: rolling_window +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func RollingWindow[T any](xs []T, size int) [][]T" +description: "Genera ventanas deslizantes de tamaño fijo sobre un slice genérico." +tags: [datascience, window, rolling, sliding, generic] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/rolling_window.go" +--- diff --git a/functions/datascience/standardize.go b/functions/datascience/standardize.go new file mode 100644 index 00000000..eaa5d8b3 --- /dev/null +++ b/functions/datascience/standardize.go @@ -0,0 +1,35 @@ +package datascience + +import "math" + +// Standardize aplica Z-score normalización a un slice de float64. +// Cada valor se transforma a (x - mean) / stddev. +// Si stddev es 0, retorna un slice de ceros. +func Standardize(data []float64) []float64 { + n := len(data) + if n == 0 { + return []float64{} + } + + var sum float64 + for _, v := range data { + sum += v + } + mean := sum / float64(n) + + var sqSum float64 + for _, v := range data { + d := v - mean + sqSum += d * d + } + stddev := math.Sqrt(sqSum / float64(n)) + + result := make([]float64, n) + if stddev == 0 { + return result + } + for i, v := range data { + result[i] = (v - mean) / stddev + } + return result +} diff --git a/functions/datascience/standardize.md b/functions/datascience/standardize.md new file mode 100644 index 00000000..339b518a --- /dev/null +++ b/functions/datascience/standardize.md @@ -0,0 +1,21 @@ +--- +name: standardize +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func Standardize(data []float64) []float64" +description: "Aplica Z-score normalización a un slice de float64, transformando cada valor a (x - media) / desviación estándar." +tags: [datascience, statistics, normalize, zscore] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: ["math"] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/standardize.go" +--- diff --git a/functions/datascience/zip_slices.go b/functions/datascience/zip_slices.go new file mode 100644 index 00000000..d50cc081 --- /dev/null +++ b/functions/datascience/zip_slices.go @@ -0,0 +1,15 @@ +package datascience + +// ZipSlices combina dos slices de float64 en pares [2]float64. +// El resultado tiene longitud igual al menor de los dos slices. +func ZipSlices(as, bs []float64) [][2]float64 { + n := len(as) + if len(bs) < n { + n = len(bs) + } + result := make([][2]float64, n) + for i := 0; i < n; i++ { + result[i] = [2]float64{as[i], bs[i]} + } + return result +} diff --git a/functions/datascience/zip_slices.md b/functions/datascience/zip_slices.md new file mode 100644 index 00000000..69eed967 --- /dev/null +++ b/functions/datascience/zip_slices.md @@ -0,0 +1,21 @@ +--- +name: zip_slices +kind: function +lang: go +domain: datascience +version: "1.0.0" +purity: pure +signature: "func ZipSlices(as, bs []float64) [][2]float64" +description: "Combina dos slices de float64 en un slice de pares [2]float64, truncando al más corto." +tags: [datascience, zip, combine, pair] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/datascience/zip_slices.go" +---