feat: funciones Go — core (cron, join_by_key, validate_struct), datascience (pivot, diff_entities), infra (http, cache, cron_ticker)

Nuevas funciones Go con tests en tres dominios: - core: parse_cron_expr, next_cron_time, join_by_key, validate_struct_fields + tipo CronSchedule - datascience: pivot (tabla dinámica), diff_entities (comparación de entidades) - infra: http_get_json, http_post_json, http_download_file, cache_to_sqlite, cron_ticker Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 17:11:12 +02:00
parent bee3b0d946
commit 9c0d24d3ef
35 changed files with 3042 additions and 0 deletions
@@ -0,0 +1,96 @@
+package datascience
+
+import "fmt"
+
+// DiffEntities compares two snapshots of entities and returns field-level differences.
+// Detects added, removed, modified, and unchanged entities.
+// ignoreFields specifies fields to exclude from comparison (defaults to ["created_at", "updated_at"] when nil).
+func DiffEntities(before, after []map[string]any, key string, ignoreFields []string) map[string]any {
+	if ignoreFields == nil {
+		ignoreFields = []string{"created_at", "updated_at"}
+	}
+
+	ignoreSet := make(map[string]bool, len(ignoreFields))
+	for _, f := range ignoreFields {
+		ignoreSet[f] = true
+	}
+
+	beforeMap := make(map[string]map[string]any, len(before))
+	for _, e := range before {
+		if k, ok := e[key]; ok {
+			beforeMap[fmt.Sprintf("%v", k)] = e
+		}
+	}
+
+	afterMap := make(map[string]map[string]any, len(after))
+	for _, e := range after {
+		if k, ok := e[key]; ok {
+			afterMap[fmt.Sprintf("%v", k)] = e
+		}
+	}
+
+	added := []map[string]any{}
+	for k, e := range afterMap {
+		if _, exists := beforeMap[k]; !exists {
+			added = append(added, e)
+		}
+	}
+
+	removed := []map[string]any{}
+	for k, e := range beforeMap {
+		if _, exists := afterMap[k]; !exists {
+			removed = append(removed, e)
+		}
+	}
+
+	modified := []map[string]any{}
+	unchanged := 0
+
+	for k, b := range beforeMap {
+		a, exists := afterMap[k]
+		if !exists {
+			continue
+		}
+
+		// Collect all fields from both entities
+		allFields := make(map[string]bool)
+		for f := range b {
+			allFields[f] = true
+		}
+		for f := range a {
+			allFields[f] = true
+		}
+
+		changes := map[string]any{}
+		for field := range allFields {
+			if ignoreSet[field] || field == key {
+				continue
+			}
+			oldVal := b[field]
+			newVal := a[field]
+			if fmt.Sprintf("%v", oldVal) != fmt.Sprintf("%v", newVal) {
+				changes[field] = map[string]any{"old": oldVal, "new": newVal}
+			}
+		}
+
+		if len(changes) > 0 {
+			modified = append(modified, map[string]any{"key": k, "changes": changes})
+		} else {
+			unchanged++
+		}
+	}
+
+	nAdded := len(added)
+	nRemoved := len(removed)
+	nModified := len(modified)
+	summary := fmt.Sprintf("%d added, %d removed, %d modified, %d unchanged",
+		nAdded, nRemoved, nModified, unchanged)
+
+	return map[string]any{
+		"added":     added,
+		"removed":   removed,
+		"modified":  modified,
+		"unchanged": unchanged,
+		"summary":   summary,
+	}
+}
@@ -0,0 +1,52 @@
+---
+name: diff_entities
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func DiffEntities(before, after []map[string]any, key string, ignoreFields []string) map[string]any"
+description: "Compara dos snapshots de entities y devuelve diferencias campo a campo. Detecta añadidas, eliminadas, modificadas e inalteradas. Ignora created_at y updated_at por defecto (pasar nil para usar defaults)."
+tags: [datascience, diff, entities, operations, snapshot, comparison]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: ["fmt"]
+tested: true
+tests:
+  - "entity añadida"
+  - "entity eliminada"
+  - "entity modificada con detalle de campos"
+  - "entities identicas → unchanged"
+  - "ignore_fields funciona"
+  - "lista vacia vs lista con datos"
+  - "summary format correcto"
+test_file_path: "functions/datascience/diff_entities_test.go"
+file_path: "functions/datascience/diff_entities.go"
+---
+
+## Ejemplo
+
+```go
+before := []map[string]any{
+    {"id": "1", "name": "Alice", "status": "active"},
+    {"id": "2", "name": "Bob"},
+}
+after := []map[string]any{
+    {"id": "1", "name": "Alice", "status": "inactive"},
+    {"id": "3", "name": "Carol"},
+}
+result := DiffEntities(before, after, "id", nil)
+// result["summary"] = "1 added, 1 removed, 1 modified, 0 unchanged"
+// result["added"] = [{"id": "3", "name": "Carol"}]
+// result["removed"] = [{"id": "2", "name": "Bob"}]
+// result["modified"] = [{"key": "1", "changes": {"status": {"old": "active", "new": "inactive"}}}]
+```
+
+## Notas
+
+Funcion pura. Compara valores con fmt.Sprintf("%v", ...) para manejar tipos heterogeneos en map[string]any.
+ignoreFields nil usa los defaults ["created_at", "updated_at"]. Para no ignorar ningun campo, pasar []string{}.
+Semantica identica a diff_entities_py_datascience, permite comparar resultados entre ejecuciones del mismo pipeline.
@@ -0,0 +1,138 @@
+package datascience
+
+import (
+	"testing"
+)
+
+func TestDiffEntities(t *testing.T) {
+	t.Run("entity añadida", func(t *testing.T) {
+		before := []map[string]any{
+			{"id": "1", "name": "Alice"},
+		}
+		after := []map[string]any{
+			{"id": "1", "name": "Alice"},
+			{"id": "2", "name": "Bob"},
+		}
+		result := DiffEntities(before, after, "id", nil)
+		added := result["added"].([]map[string]any)
+		if len(added) != 1 {
+			t.Errorf("expected 1 added, got %d", len(added))
+		}
+		if added[0]["id"] != "2" {
+			t.Errorf("expected added id=2, got %v", added[0]["id"])
+		}
+		if result["unchanged"].(int) != 1 {
+			t.Errorf("expected 1 unchanged, got %v", result["unchanged"])
+		}
+	})
+
+	t.Run("entity eliminada", func(t *testing.T) {
+		before := []map[string]any{
+			{"id": "1", "name": "Alice"},
+			{"id": "2", "name": "Bob"},
+		}
+		after := []map[string]any{
+			{"id": "1", "name": "Alice"},
+		}
+		result := DiffEntities(before, after, "id", nil)
+		removed := result["removed"].([]map[string]any)
+		if len(removed) != 1 {
+			t.Errorf("expected 1 removed, got %d", len(removed))
+		}
+		if removed[0]["id"] != "2" {
+			t.Errorf("expected removed id=2, got %v", removed[0]["id"])
+		}
+	})
+
+	t.Run("entity modificada con detalle de campos", func(t *testing.T) {
+		before := []map[string]any{
+			{"id": "1", "name": "Alice", "status": "active"},
+		}
+		after := []map[string]any{
+			{"id": "1", "name": "Alice", "status": "inactive"},
+		}
+		result := DiffEntities(before, after, "id", nil)
+		modified := result["modified"].([]map[string]any)
+		if len(modified) != 1 {
+			t.Errorf("expected 1 modified, got %d", len(modified))
+		}
+		changes := modified[0]["changes"].(map[string]any)
+		statusChange, ok := changes["status"].(map[string]any)
+		if !ok {
+			t.Fatalf("expected status change, got %v", changes)
+		}
+		if statusChange["old"] != "active" {
+			t.Errorf("expected old=active, got %v", statusChange["old"])
+		}
+		if statusChange["new"] != "inactive" {
+			t.Errorf("expected new=inactive, got %v", statusChange["new"])
+		}
+	})
+
+	t.Run("entities identicas → unchanged", func(t *testing.T) {
+		entities := []map[string]any{
+			{"id": "1", "name": "Alice"},
+			{"id": "2", "name": "Bob"},
+		}
+		result := DiffEntities(entities, entities, "id", nil)
+		if result["unchanged"].(int) != 2 {
+			t.Errorf("expected 2 unchanged, got %v", result["unchanged"])
+		}
+		if len(result["added"].([]map[string]any)) != 0 {
+			t.Errorf("expected 0 added")
+		}
+		if len(result["modified"].([]map[string]any)) != 0 {
+			t.Errorf("expected 0 modified")
+		}
+	})
+
+	t.Run("ignore_fields funciona", func(t *testing.T) {
+		before := []map[string]any{
+			{"id": "1", "name": "Alice", "updated_at": "2024-01-01"},
+		}
+		after := []map[string]any{
+			{"id": "1", "name": "Alice", "updated_at": "2024-06-01"},
+		}
+		// Default ignores updated_at
+		result := DiffEntities(before, after, "id", nil)
+		if result["unchanged"].(int) != 1 {
+			t.Errorf("expected 1 unchanged (updated_at ignored), got %v", result["unchanged"])
+		}
+		modified := result["modified"].([]map[string]any)
+		if len(modified) != 0 {
+			t.Errorf("expected 0 modified when updated_at is ignored, got %d", len(modified))
+		}
+	})
+
+	t.Run("lista vacia vs lista con datos", func(t *testing.T) {
+		before := []map[string]any{}
+		after := []map[string]any{
+			{"id": "1", "name": "Alice"},
+		}
+		result := DiffEntities(before, after, "id", nil)
+		added := result["added"].([]map[string]any)
+		if len(added) != 1 {
+			t.Errorf("expected 1 added, got %d", len(added))
+		}
+		if result["unchanged"].(int) != 0 {
+			t.Errorf("expected 0 unchanged")
+		}
+	})
+
+	t.Run("summary format correcto", func(t *testing.T) {
+		before := []map[string]any{
+			{"id": "1", "name": "Alice"},
+			{"id": "3", "name": "Carol"},
+		}
+		after := []map[string]any{
+			{"id": "1", "name": "Alice Changed"},
+			{"id": "2", "name": "Bob"},
+		}
+		result := DiffEntities(before, after, "id", nil)
+		summary := result["summary"].(string)
+		expected := "1 added, 1 removed, 1 modified, 0 unchanged"
+		if summary != expected {
+			t.Errorf("expected summary %q, got %q", expected, summary)
+		}
+	})
+}
@@ -0,0 +1,110 @@
+package datascience
+
+// Pivot transforma datos del formato largo al formato ancho (pivot table).
+// Agrupa por index, expande los valores unicos de columns como nuevas columnas
+// y agrega values con la funcion indicada.
+// Funciones de agregacion soportadas: sum, count, mean, min, max, first, last.
+// Valores numericos faltantes se rellenan con 0.
+func Pivot(rows []map[string]any, index, columns, values, agg string) []map[string]any {
+	// Mantener orden de aparicion de index y column values
+	indexOrder := []any{}
+	seenIndex := map[any]bool{}
+	colOrder := []any{}
+	seenCols := map[any]bool{}
+
+	for _, row := range rows {
+		idx := row[index]
+		col := row[columns]
+		if !seenIndex[idx] {
+			seenIndex[idx] = true
+			indexOrder = append(indexOrder, idx)
+		}
+		if !seenCols[col] {
+			seenCols[col] = true
+			colOrder = append(colOrder, col)
+		}
+	}
+
+	// Acumular: groups[indexVal][colVal] = lista de valores
+	type key struct{ idx, col any }
+	groups := map[key][]any{}
+	for _, row := range rows {
+		idx := row[index]
+		col := row[columns]
+		val := row[values]
+		if val != nil {
+			k := key{idx, col}
+			groups[k] = append(groups[k], val)
+		}
+	}
+
+	aggregate := func(vals []any, fn string) any {
+		if len(vals) == 0 {
+			return 0
+		}
+		switch fn {
+		case "count":
+			return len(vals)
+		case "first":
+			return vals[0]
+		case "last":
+			return vals[len(vals)-1]
+		}
+		// Funciones numericas: sum, mean, min, max
+		toFloat := func(v any) float64 {
+			switch n := v.(type) {
+			case float64:
+				return n
+			case float32:
+				return float64(n)
+			case int:
+				return float64(n)
+			case int64:
+				return float64(n)
+			case int32:
+				return float64(n)
+			}
+			return 0
+		}
+		sum := 0.0
+		mn := toFloat(vals[0])
+		mx := toFloat(vals[0])
+		for _, v := range vals {
+			f := toFloat(v)
+			sum += f
+			if f < mn {
+				mn = f
+			}
+			if f > mx {
+				mx = f
+			}
+		}
+		switch fn {
+		case "sum":
+			return sum
+		case "mean":
+			return sum / float64(len(vals))
+		case "min":
+			return mn
+		case "max":
+			return mx
+		}
+		return sum
+	}
+
+	result := make([]map[string]any, 0, len(indexOrder))
+	for _, idx := range indexOrder {
+		record := map[string]any{index: idx}
+		for _, col := range colOrder {
+			k := key{idx, col}
+			vals := groups[k]
+			if len(vals) > 0 {
+				record[col.(string)] = aggregate(vals, agg)
+			} else {
+				record[col.(string)] = 0
+			}
+		}
+		result = append(result, record)
+	}
+	return result
+}
@@ -0,0 +1,43 @@
+---
+name: pivot
+kind: function
+lang: go
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "func Pivot(rows []map[string]any, index, columns, values, agg string) []map[string]any"
+description: "Pivot table sin dependencias. Agrupa por index, expande valores unicos de columns como nuevas columnas y agrega values con la funcion indicada (sum, count, mean, min, max, first, last). Valores faltantes se rellenan con 0."
+tags: [datascience, tabular, pivot, transform, aggregation, go]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+tested: true
+tests:
+  - "Pivot basico con sum"
+  - "Pivot con count y mean"
+  - "Valores faltantes rellenados con 0"
+  - "Una sola fila"
+  - "Multiples valores por celda requieren agregacion"
+test_file_path: "functions/datascience/pivot_test.go"
+file_path: "functions/datascience/pivot.go"
+---
+
+## Ejemplo
+
+```go
+rows := []map[string]any{
+    {"region": "US", "product": "A", "sales": 10},
+    {"region": "US", "product": "B", "sales": 20},
+    {"region": "EU", "product": "A", "sales": 15},
+}
+result := Pivot(rows, "region", "product", "sales", "sum")
+// [{"region": "US", "A": 10.0, "B": 20.0}, {"region": "EU", "A": 15.0, "B": 0}]
+```
+
+## Notas
+
+Funcion pura sin dependencias externas. Usa map[string]any para trabajar con datos JSON/SQL deserializados.
+Las agregaciones numericas (sum, mean, min, max) convierten valores a float64 via type assertion.
@@ -0,0 +1,111 @@
+package datascience
+
+import (
+	"testing"
+)
+
+func TestPivot(t *testing.T) {
+	t.Run("Pivot basico con sum", func(t *testing.T) {
+		rows := []map[string]any{
+			{"region": "US", "product": "A", "sales": 10},
+			{"region": "US", "product": "B", "sales": 20},
+			{"region": "EU", "product": "A", "sales": 15},
+		}
+		result := Pivot(rows, "region", "product", "sales", "sum")
+		if len(result) != 2 {
+			t.Fatalf("got %d rows, want 2", len(result))
+		}
+		var us, eu map[string]any
+		for _, r := range result {
+			if r["region"] == "US" {
+				us = r
+			} else {
+				eu = r
+			}
+		}
+		if us["A"] != 10 {
+			t.Errorf("US.A: got %v, want 10", us["A"])
+		}
+		if us["B"] != 20 {
+			t.Errorf("US.B: got %v, want 20", us["B"])
+		}
+		if eu["A"] != 15 {
+			t.Errorf("EU.A: got %v, want 15", eu["A"])
+		}
+		if eu["B"] != 0 {
+			t.Errorf("EU.B: got %v, want 0", eu["B"])
+		}
+	})
+
+	t.Run("Pivot con count y mean", func(t *testing.T) {
+		rows := []map[string]any{
+			{"region": "US", "product": "A", "sales": 10},
+			{"region": "US", "product": "A", "sales": 20},
+			{"region": "EU", "product": "A", "sales": 15},
+		}
+		resultCount := Pivot(rows, "region", "product", "sales", "count")
+		for _, r := range resultCount {
+			if r["region"] == "US" && r["A"] != 2 {
+				t.Errorf("count US.A: got %v, want 2", r["A"])
+			}
+		}
+
+		resultMean := Pivot(rows, "region", "product", "sales", "mean")
+		for _, r := range resultMean {
+			if r["region"] == "US" {
+				mean, ok := r["A"].(float64)
+				if !ok || mean != 15.0 {
+					t.Errorf("mean US.A: got %v, want 15.0", r["A"])
+				}
+			}
+		}
+	})
+
+	t.Run("Valores faltantes rellenados con 0", func(t *testing.T) {
+		rows := []map[string]any{
+			{"region": "US", "product": "A", "sales": 5},
+			{"region": "EU", "product": "B", "sales": 8},
+		}
+		result := Pivot(rows, "region", "product", "sales", "sum")
+		for _, r := range result {
+			if r["region"] == "US" && r["B"] != 0 {
+				t.Errorf("US.B: got %v, want 0", r["B"])
+			}
+			if r["region"] == "EU" && r["A"] != 0 {
+				t.Errorf("EU.A: got %v, want 0", r["A"])
+			}
+		}
+	})
+
+	t.Run("Una sola fila", func(t *testing.T) {
+		rows := []map[string]any{
+			{"region": "US", "product": "A", "sales": 42},
+		}
+		result := Pivot(rows, "region", "product", "sales", "sum")
+		if len(result) != 1 {
+			t.Fatalf("got %d rows, want 1", len(result))
+		}
+		if result[0]["A"] != 42 {
+			t.Errorf("got %v, want 42", result[0]["A"])
+		}
+	})
+
+	t.Run("Multiples valores por celda requieren agregacion", func(t *testing.T) {
+		rows := []map[string]any{
+			{"region": "US", "product": "A", "sales": 10},
+			{"region": "US", "product": "A", "sales": 30},
+		}
+		resultSum := Pivot(rows, "region", "product", "sales", "sum")
+		if resultSum[0]["A"] != 40.0 {
+			t.Errorf("sum: got %v, want 40.0", resultSum[0]["A"])
+		}
+		resultMin := Pivot(rows, "region", "product", "sales", "min")
+		if resultMin[0]["A"] != 10.0 {
+			t.Errorf("min: got %v, want 10.0", resultMin[0]["A"])
+		}
+		resultMax := Pivot(rows, "region", "product", "sales", "max")
+		if resultMax[0]["A"] != 30.0 {
+			t.Errorf("max: got %v, want 30.0", resultMax[0]["A"])
+		}
+	})
+}