feat: funciones Go — core (cron, join_by_key, validate_struct), datascience (pivot, diff_entities), infra (http, cache, cron_ticker)
Nuevas funciones Go con tests en tres dominios: - core: parse_cron_expr, next_cron_time, join_by_key, validate_struct_fields + tipo CronSchedule - datascience: pivot (tabla dinámica), diff_entities (comparación de entidades) - infra: http_get_json, http_post_json, http_download_file, cache_to_sqlite, cron_ticker Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
package datascience
|
||||
|
||||
import "fmt"
|
||||
|
||||
// DiffEntities compares two snapshots of entities and returns field-level differences.
|
||||
// Detects added, removed, modified, and unchanged entities.
|
||||
// ignoreFields specifies fields to exclude from comparison (defaults to ["created_at", "updated_at"] when nil).
|
||||
func DiffEntities(before, after []map[string]any, key string, ignoreFields []string) map[string]any {
|
||||
if ignoreFields == nil {
|
||||
ignoreFields = []string{"created_at", "updated_at"}
|
||||
}
|
||||
|
||||
ignoreSet := make(map[string]bool, len(ignoreFields))
|
||||
for _, f := range ignoreFields {
|
||||
ignoreSet[f] = true
|
||||
}
|
||||
|
||||
beforeMap := make(map[string]map[string]any, len(before))
|
||||
for _, e := range before {
|
||||
if k, ok := e[key]; ok {
|
||||
beforeMap[fmt.Sprintf("%v", k)] = e
|
||||
}
|
||||
}
|
||||
|
||||
afterMap := make(map[string]map[string]any, len(after))
|
||||
for _, e := range after {
|
||||
if k, ok := e[key]; ok {
|
||||
afterMap[fmt.Sprintf("%v", k)] = e
|
||||
}
|
||||
}
|
||||
|
||||
added := []map[string]any{}
|
||||
for k, e := range afterMap {
|
||||
if _, exists := beforeMap[k]; !exists {
|
||||
added = append(added, e)
|
||||
}
|
||||
}
|
||||
|
||||
removed := []map[string]any{}
|
||||
for k, e := range beforeMap {
|
||||
if _, exists := afterMap[k]; !exists {
|
||||
removed = append(removed, e)
|
||||
}
|
||||
}
|
||||
|
||||
modified := []map[string]any{}
|
||||
unchanged := 0
|
||||
|
||||
for k, b := range beforeMap {
|
||||
a, exists := afterMap[k]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
// Collect all fields from both entities
|
||||
allFields := make(map[string]bool)
|
||||
for f := range b {
|
||||
allFields[f] = true
|
||||
}
|
||||
for f := range a {
|
||||
allFields[f] = true
|
||||
}
|
||||
|
||||
changes := map[string]any{}
|
||||
for field := range allFields {
|
||||
if ignoreSet[field] || field == key {
|
||||
continue
|
||||
}
|
||||
oldVal := b[field]
|
||||
newVal := a[field]
|
||||
if fmt.Sprintf("%v", oldVal) != fmt.Sprintf("%v", newVal) {
|
||||
changes[field] = map[string]any{"old": oldVal, "new": newVal}
|
||||
}
|
||||
}
|
||||
|
||||
if len(changes) > 0 {
|
||||
modified = append(modified, map[string]any{"key": k, "changes": changes})
|
||||
} else {
|
||||
unchanged++
|
||||
}
|
||||
}
|
||||
|
||||
nAdded := len(added)
|
||||
nRemoved := len(removed)
|
||||
nModified := len(modified)
|
||||
summary := fmt.Sprintf("%d added, %d removed, %d modified, %d unchanged",
|
||||
nAdded, nRemoved, nModified, unchanged)
|
||||
|
||||
return map[string]any{
|
||||
"added": added,
|
||||
"removed": removed,
|
||||
"modified": modified,
|
||||
"unchanged": unchanged,
|
||||
"summary": summary,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
---
|
||||
name: diff_entities
|
||||
kind: function
|
||||
lang: go
|
||||
domain: datascience
|
||||
version: "1.0.0"
|
||||
purity: pure
|
||||
signature: "func DiffEntities(before, after []map[string]any, key string, ignoreFields []string) map[string]any"
|
||||
description: "Compara dos snapshots de entities y devuelve diferencias campo a campo. Detecta añadidas, eliminadas, modificadas e inalteradas. Ignora created_at y updated_at por defecto (pasar nil para usar defaults)."
|
||||
tags: [datascience, diff, entities, operations, snapshot, comparison]
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
returns: []
|
||||
returns_optional: false
|
||||
error_type: ""
|
||||
imports: ["fmt"]
|
||||
tested: true
|
||||
tests:
|
||||
- "entity añadida"
|
||||
- "entity eliminada"
|
||||
- "entity modificada con detalle de campos"
|
||||
- "entities identicas → unchanged"
|
||||
- "ignore_fields funciona"
|
||||
- "lista vacia vs lista con datos"
|
||||
- "summary format correcto"
|
||||
test_file_path: "functions/datascience/diff_entities_test.go"
|
||||
file_path: "functions/datascience/diff_entities.go"
|
||||
---
|
||||
|
||||
## Ejemplo
|
||||
|
||||
```go
|
||||
before := []map[string]any{
|
||||
{"id": "1", "name": "Alice", "status": "active"},
|
||||
{"id": "2", "name": "Bob"},
|
||||
}
|
||||
after := []map[string]any{
|
||||
{"id": "1", "name": "Alice", "status": "inactive"},
|
||||
{"id": "3", "name": "Carol"},
|
||||
}
|
||||
result := DiffEntities(before, after, "id", nil)
|
||||
// result["summary"] = "1 added, 1 removed, 1 modified, 0 unchanged"
|
||||
// result["added"] = [{"id": "3", "name": "Carol"}]
|
||||
// result["removed"] = [{"id": "2", "name": "Bob"}]
|
||||
// result["modified"] = [{"key": "1", "changes": {"status": {"old": "active", "new": "inactive"}}}]
|
||||
```
|
||||
|
||||
## Notas
|
||||
|
||||
Funcion pura. Compara valores con fmt.Sprintf("%v", ...) para manejar tipos heterogeneos en map[string]any.
|
||||
ignoreFields nil usa los defaults ["created_at", "updated_at"]. Para no ignorar ningun campo, pasar []string{}.
|
||||
Semantica identica a diff_entities_py_datascience, permite comparar resultados entre ejecuciones del mismo pipeline.
|
||||
@@ -0,0 +1,138 @@
|
||||
package datascience
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDiffEntities(t *testing.T) {
|
||||
t.Run("entity añadida", func(t *testing.T) {
|
||||
before := []map[string]any{
|
||||
{"id": "1", "name": "Alice"},
|
||||
}
|
||||
after := []map[string]any{
|
||||
{"id": "1", "name": "Alice"},
|
||||
{"id": "2", "name": "Bob"},
|
||||
}
|
||||
result := DiffEntities(before, after, "id", nil)
|
||||
added := result["added"].([]map[string]any)
|
||||
if len(added) != 1 {
|
||||
t.Errorf("expected 1 added, got %d", len(added))
|
||||
}
|
||||
if added[0]["id"] != "2" {
|
||||
t.Errorf("expected added id=2, got %v", added[0]["id"])
|
||||
}
|
||||
if result["unchanged"].(int) != 1 {
|
||||
t.Errorf("expected 1 unchanged, got %v", result["unchanged"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("entity eliminada", func(t *testing.T) {
|
||||
before := []map[string]any{
|
||||
{"id": "1", "name": "Alice"},
|
||||
{"id": "2", "name": "Bob"},
|
||||
}
|
||||
after := []map[string]any{
|
||||
{"id": "1", "name": "Alice"},
|
||||
}
|
||||
result := DiffEntities(before, after, "id", nil)
|
||||
removed := result["removed"].([]map[string]any)
|
||||
if len(removed) != 1 {
|
||||
t.Errorf("expected 1 removed, got %d", len(removed))
|
||||
}
|
||||
if removed[0]["id"] != "2" {
|
||||
t.Errorf("expected removed id=2, got %v", removed[0]["id"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("entity modificada con detalle de campos", func(t *testing.T) {
|
||||
before := []map[string]any{
|
||||
{"id": "1", "name": "Alice", "status": "active"},
|
||||
}
|
||||
after := []map[string]any{
|
||||
{"id": "1", "name": "Alice", "status": "inactive"},
|
||||
}
|
||||
result := DiffEntities(before, after, "id", nil)
|
||||
modified := result["modified"].([]map[string]any)
|
||||
if len(modified) != 1 {
|
||||
t.Errorf("expected 1 modified, got %d", len(modified))
|
||||
}
|
||||
changes := modified[0]["changes"].(map[string]any)
|
||||
statusChange, ok := changes["status"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("expected status change, got %v", changes)
|
||||
}
|
||||
if statusChange["old"] != "active" {
|
||||
t.Errorf("expected old=active, got %v", statusChange["old"])
|
||||
}
|
||||
if statusChange["new"] != "inactive" {
|
||||
t.Errorf("expected new=inactive, got %v", statusChange["new"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("entities identicas → unchanged", func(t *testing.T) {
|
||||
entities := []map[string]any{
|
||||
{"id": "1", "name": "Alice"},
|
||||
{"id": "2", "name": "Bob"},
|
||||
}
|
||||
result := DiffEntities(entities, entities, "id", nil)
|
||||
if result["unchanged"].(int) != 2 {
|
||||
t.Errorf("expected 2 unchanged, got %v", result["unchanged"])
|
||||
}
|
||||
if len(result["added"].([]map[string]any)) != 0 {
|
||||
t.Errorf("expected 0 added")
|
||||
}
|
||||
if len(result["modified"].([]map[string]any)) != 0 {
|
||||
t.Errorf("expected 0 modified")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("ignore_fields funciona", func(t *testing.T) {
|
||||
before := []map[string]any{
|
||||
{"id": "1", "name": "Alice", "updated_at": "2024-01-01"},
|
||||
}
|
||||
after := []map[string]any{
|
||||
{"id": "1", "name": "Alice", "updated_at": "2024-06-01"},
|
||||
}
|
||||
// Default ignores updated_at
|
||||
result := DiffEntities(before, after, "id", nil)
|
||||
if result["unchanged"].(int) != 1 {
|
||||
t.Errorf("expected 1 unchanged (updated_at ignored), got %v", result["unchanged"])
|
||||
}
|
||||
modified := result["modified"].([]map[string]any)
|
||||
if len(modified) != 0 {
|
||||
t.Errorf("expected 0 modified when updated_at is ignored, got %d", len(modified))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("lista vacia vs lista con datos", func(t *testing.T) {
|
||||
before := []map[string]any{}
|
||||
after := []map[string]any{
|
||||
{"id": "1", "name": "Alice"},
|
||||
}
|
||||
result := DiffEntities(before, after, "id", nil)
|
||||
added := result["added"].([]map[string]any)
|
||||
if len(added) != 1 {
|
||||
t.Errorf("expected 1 added, got %d", len(added))
|
||||
}
|
||||
if result["unchanged"].(int) != 0 {
|
||||
t.Errorf("expected 0 unchanged")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("summary format correcto", func(t *testing.T) {
|
||||
before := []map[string]any{
|
||||
{"id": "1", "name": "Alice"},
|
||||
{"id": "3", "name": "Carol"},
|
||||
}
|
||||
after := []map[string]any{
|
||||
{"id": "1", "name": "Alice Changed"},
|
||||
{"id": "2", "name": "Bob"},
|
||||
}
|
||||
result := DiffEntities(before, after, "id", nil)
|
||||
summary := result["summary"].(string)
|
||||
expected := "1 added, 1 removed, 1 modified, 0 unchanged"
|
||||
if summary != expected {
|
||||
t.Errorf("expected summary %q, got %q", expected, summary)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
package datascience
|
||||
|
||||
// Pivot transforma datos del formato largo al formato ancho (pivot table).
|
||||
// Agrupa por index, expande los valores unicos de columns como nuevas columnas
|
||||
// y agrega values con la funcion indicada.
|
||||
// Funciones de agregacion soportadas: sum, count, mean, min, max, first, last.
|
||||
// Valores numericos faltantes se rellenan con 0.
|
||||
func Pivot(rows []map[string]any, index, columns, values, agg string) []map[string]any {
|
||||
// Mantener orden de aparicion de index y column values
|
||||
indexOrder := []any{}
|
||||
seenIndex := map[any]bool{}
|
||||
colOrder := []any{}
|
||||
seenCols := map[any]bool{}
|
||||
|
||||
for _, row := range rows {
|
||||
idx := row[index]
|
||||
col := row[columns]
|
||||
if !seenIndex[idx] {
|
||||
seenIndex[idx] = true
|
||||
indexOrder = append(indexOrder, idx)
|
||||
}
|
||||
if !seenCols[col] {
|
||||
seenCols[col] = true
|
||||
colOrder = append(colOrder, col)
|
||||
}
|
||||
}
|
||||
|
||||
// Acumular: groups[indexVal][colVal] = lista de valores
|
||||
type key struct{ idx, col any }
|
||||
groups := map[key][]any{}
|
||||
for _, row := range rows {
|
||||
idx := row[index]
|
||||
col := row[columns]
|
||||
val := row[values]
|
||||
if val != nil {
|
||||
k := key{idx, col}
|
||||
groups[k] = append(groups[k], val)
|
||||
}
|
||||
}
|
||||
|
||||
aggregate := func(vals []any, fn string) any {
|
||||
if len(vals) == 0 {
|
||||
return 0
|
||||
}
|
||||
switch fn {
|
||||
case "count":
|
||||
return len(vals)
|
||||
case "first":
|
||||
return vals[0]
|
||||
case "last":
|
||||
return vals[len(vals)-1]
|
||||
}
|
||||
// Funciones numericas: sum, mean, min, max
|
||||
toFloat := func(v any) float64 {
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return n
|
||||
case float32:
|
||||
return float64(n)
|
||||
case int:
|
||||
return float64(n)
|
||||
case int64:
|
||||
return float64(n)
|
||||
case int32:
|
||||
return float64(n)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
sum := 0.0
|
||||
mn := toFloat(vals[0])
|
||||
mx := toFloat(vals[0])
|
||||
for _, v := range vals {
|
||||
f := toFloat(v)
|
||||
sum += f
|
||||
if f < mn {
|
||||
mn = f
|
||||
}
|
||||
if f > mx {
|
||||
mx = f
|
||||
}
|
||||
}
|
||||
switch fn {
|
||||
case "sum":
|
||||
return sum
|
||||
case "mean":
|
||||
return sum / float64(len(vals))
|
||||
case "min":
|
||||
return mn
|
||||
case "max":
|
||||
return mx
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
result := make([]map[string]any, 0, len(indexOrder))
|
||||
for _, idx := range indexOrder {
|
||||
record := map[string]any{index: idx}
|
||||
for _, col := range colOrder {
|
||||
k := key{idx, col}
|
||||
vals := groups[k]
|
||||
if len(vals) > 0 {
|
||||
record[col.(string)] = aggregate(vals, agg)
|
||||
} else {
|
||||
record[col.(string)] = 0
|
||||
}
|
||||
}
|
||||
result = append(result, record)
|
||||
}
|
||||
return result
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
---
|
||||
name: pivot
|
||||
kind: function
|
||||
lang: go
|
||||
domain: datascience
|
||||
version: "1.0.0"
|
||||
purity: pure
|
||||
signature: "func Pivot(rows []map[string]any, index, columns, values, agg string) []map[string]any"
|
||||
description: "Pivot table sin dependencias. Agrupa por index, expande valores unicos de columns como nuevas columnas y agrega values con la funcion indicada (sum, count, mean, min, max, first, last). Valores faltantes se rellenan con 0."
|
||||
tags: [datascience, tabular, pivot, transform, aggregation, go]
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
returns: []
|
||||
returns_optional: false
|
||||
error_type: ""
|
||||
imports: []
|
||||
tested: true
|
||||
tests:
|
||||
- "Pivot basico con sum"
|
||||
- "Pivot con count y mean"
|
||||
- "Valores faltantes rellenados con 0"
|
||||
- "Una sola fila"
|
||||
- "Multiples valores por celda requieren agregacion"
|
||||
test_file_path: "functions/datascience/pivot_test.go"
|
||||
file_path: "functions/datascience/pivot.go"
|
||||
---
|
||||
|
||||
## Ejemplo
|
||||
|
||||
```go
|
||||
rows := []map[string]any{
|
||||
{"region": "US", "product": "A", "sales": 10},
|
||||
{"region": "US", "product": "B", "sales": 20},
|
||||
{"region": "EU", "product": "A", "sales": 15},
|
||||
}
|
||||
result := Pivot(rows, "region", "product", "sales", "sum")
|
||||
// [{"region": "US", "A": 10.0, "B": 20.0}, {"region": "EU", "A": 15.0, "B": 0}]
|
||||
```
|
||||
|
||||
## Notas
|
||||
|
||||
Funcion pura sin dependencias externas. Usa map[string]any para trabajar con datos JSON/SQL deserializados.
|
||||
Las agregaciones numericas (sum, mean, min, max) convierten valores a float64 via type assertion.
|
||||
@@ -0,0 +1,111 @@
|
||||
package datascience
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPivot(t *testing.T) {
|
||||
t.Run("Pivot basico con sum", func(t *testing.T) {
|
||||
rows := []map[string]any{
|
||||
{"region": "US", "product": "A", "sales": 10},
|
||||
{"region": "US", "product": "B", "sales": 20},
|
||||
{"region": "EU", "product": "A", "sales": 15},
|
||||
}
|
||||
result := Pivot(rows, "region", "product", "sales", "sum")
|
||||
if len(result) != 2 {
|
||||
t.Fatalf("got %d rows, want 2", len(result))
|
||||
}
|
||||
var us, eu map[string]any
|
||||
for _, r := range result {
|
||||
if r["region"] == "US" {
|
||||
us = r
|
||||
} else {
|
||||
eu = r
|
||||
}
|
||||
}
|
||||
if us["A"] != 10 {
|
||||
t.Errorf("US.A: got %v, want 10", us["A"])
|
||||
}
|
||||
if us["B"] != 20 {
|
||||
t.Errorf("US.B: got %v, want 20", us["B"])
|
||||
}
|
||||
if eu["A"] != 15 {
|
||||
t.Errorf("EU.A: got %v, want 15", eu["A"])
|
||||
}
|
||||
if eu["B"] != 0 {
|
||||
t.Errorf("EU.B: got %v, want 0", eu["B"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Pivot con count y mean", func(t *testing.T) {
|
||||
rows := []map[string]any{
|
||||
{"region": "US", "product": "A", "sales": 10},
|
||||
{"region": "US", "product": "A", "sales": 20},
|
||||
{"region": "EU", "product": "A", "sales": 15},
|
||||
}
|
||||
resultCount := Pivot(rows, "region", "product", "sales", "count")
|
||||
for _, r := range resultCount {
|
||||
if r["region"] == "US" && r["A"] != 2 {
|
||||
t.Errorf("count US.A: got %v, want 2", r["A"])
|
||||
}
|
||||
}
|
||||
|
||||
resultMean := Pivot(rows, "region", "product", "sales", "mean")
|
||||
for _, r := range resultMean {
|
||||
if r["region"] == "US" {
|
||||
mean, ok := r["A"].(float64)
|
||||
if !ok || mean != 15.0 {
|
||||
t.Errorf("mean US.A: got %v, want 15.0", r["A"])
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Valores faltantes rellenados con 0", func(t *testing.T) {
|
||||
rows := []map[string]any{
|
||||
{"region": "US", "product": "A", "sales": 5},
|
||||
{"region": "EU", "product": "B", "sales": 8},
|
||||
}
|
||||
result := Pivot(rows, "region", "product", "sales", "sum")
|
||||
for _, r := range result {
|
||||
if r["region"] == "US" && r["B"] != 0 {
|
||||
t.Errorf("US.B: got %v, want 0", r["B"])
|
||||
}
|
||||
if r["region"] == "EU" && r["A"] != 0 {
|
||||
t.Errorf("EU.A: got %v, want 0", r["A"])
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Una sola fila", func(t *testing.T) {
|
||||
rows := []map[string]any{
|
||||
{"region": "US", "product": "A", "sales": 42},
|
||||
}
|
||||
result := Pivot(rows, "region", "product", "sales", "sum")
|
||||
if len(result) != 1 {
|
||||
t.Fatalf("got %d rows, want 1", len(result))
|
||||
}
|
||||
if result[0]["A"] != 42 {
|
||||
t.Errorf("got %v, want 42", result[0]["A"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Multiples valores por celda requieren agregacion", func(t *testing.T) {
|
||||
rows := []map[string]any{
|
||||
{"region": "US", "product": "A", "sales": 10},
|
||||
{"region": "US", "product": "A", "sales": 30},
|
||||
}
|
||||
resultSum := Pivot(rows, "region", "product", "sales", "sum")
|
||||
if resultSum[0]["A"] != 40.0 {
|
||||
t.Errorf("sum: got %v, want 40.0", resultSum[0]["A"])
|
||||
}
|
||||
resultMin := Pivot(rows, "region", "product", "sales", "min")
|
||||
if resultMin[0]["A"] != 10.0 {
|
||||
t.Errorf("min: got %v, want 10.0", resultMin[0]["A"])
|
||||
}
|
||||
resultMax := Pivot(rows, "region", "product", "sales", "max")
|
||||
if resultMax[0]["A"] != 30.0 {
|
||||
t.Errorf("max: got %v, want 30.0", resultMax[0]["A"])
|
||||
}
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user