Files
fn_registry/registry/indexer.go
T
egutierrez e3bb9c3b38 feat: content hash y timestamps inteligentes en registry
Agrega content_hash a functions, types y apps para detectar cambios reales
entre reindexaciones. Los timestamps created_at se preservan si el contenido
no cambió, y updated_at solo se actualiza cuando hay cambios efectivos.
Incluye migración 005, hash.go con SHA256 determinístico, y ajustes en
store/indexer/models para el nuevo flujo de timestamps.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-30 14:23:45 +02:00

202 lines
5.7 KiB
Go

package registry
import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
)
// IndexResult holds stats from an indexing run.
type IndexResult struct {
Functions int
Types int
Apps int
ValidationErrors []string
Errors []string
}
// Index walks the registry root, parses all .md files, validates integrity,
// and populates the database. It uses two passes:
// 1. Parse all entries and collect known IDs
// 2. Validate references against known IDs, then insert valid entries
//
// Scans functions/ and types/ at the root level, plus any language-specific
// directories (e.g. python/functions/, python/types/).
func Index(db *DB, root string) (*IndexResult, error) {
// Load existing timestamps before purging so we can preserve created_at
oldFuncs, oldTypes, oldApps, err := db.LoadTimestamps()
if err != nil {
return nil, fmt.Errorf("loading timestamps: %w", err)
}
if err := db.Purge(); err != nil {
return nil, fmt.Errorf("purging database: %w", err)
}
result := &IndexResult{}
// Pass 1: parse everything from all source directories
var functions []*Function
var types []*Type
// Directories to scan for functions and types.
// Base dirs + language-specific dirs discovered automatically.
funcDirs := []string{filepath.Join(root, "functions")}
typeDirs := []string{filepath.Join(root, "types")}
// Discover language-specific directories (e.g. python/functions/, python/types/)
entries, _ := os.ReadDir(root)
for _, e := range entries {
if !e.IsDir() {
continue
}
langFuncs := filepath.Join(root, e.Name(), "functions")
if fi, err := os.Stat(langFuncs); err == nil && fi.IsDir() {
funcDirs = append(funcDirs, langFuncs)
}
langTypes := filepath.Join(root, e.Name(), "types")
if fi, err := os.Stat(langTypes); err == nil && fi.IsDir() {
typeDirs = append(typeDirs, langTypes)
}
}
for _, dir := range funcDirs {
walkMD(dir, func(path string) {
f, err := ParseFunctionMD(path, root)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err))
return
}
functions = append(functions, f)
})
}
for _, dir := range typeDirs {
walkMD(dir, func(path string) {
t, err := ParseTypeMD(path, root)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err))
return
}
types = append(types, t)
})
}
// Parse apps from apps/*/app.md
var apps []*App
appsDir := filepath.Join(root, "apps")
if fi, err := os.Stat(appsDir); err == nil && fi.IsDir() {
entries, _ := os.ReadDir(appsDir)
for _, e := range entries {
if !e.IsDir() {
continue
}
appMD := filepath.Join(appsDir, e.Name(), "app.md")
if _, err := os.Stat(appMD); err != nil {
continue
}
a, err := ParseAppMD(appMD, root)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", appMD, err))
continue
}
apps = append(apps, a)
}
}
// Build known ID sets
knownFunctions := make(map[string]bool, len(functions))
for _, f := range functions {
knownFunctions[f.ID] = true
}
knownTypes := make(map[string]bool, len(types))
for _, t := range types {
knownTypes[t.ID] = true
}
now := time.Now().UTC()
// Pass 2: validate, assign timestamps via hash comparison, and insert
for _, t := range types {
if verr := ValidateType(t, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
t.ContentHash = ComputeTypeHash(t)
applyTimestamps(&t.CreatedAt, &t.UpdatedAt, t.ContentHash, oldTypes[t.ID], now)
if err := db.InsertType(t); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
continue
}
result.Types++
}
for _, f := range functions {
if verr := ValidateFunction(f, knownFunctions, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
f.ContentHash = ComputeFunctionHash(f)
applyTimestamps(&f.CreatedAt, &f.UpdatedAt, f.ContentHash, oldFuncs[f.ID], now)
if err := db.InsertFunction(f); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
continue
}
result.Functions++
}
for _, a := range apps {
if verr := ValidateApp(a, knownFunctions, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
a.ContentHash = ComputeAppHash(a)
applyTimestamps(&a.CreatedAt, &a.UpdatedAt, a.ContentHash, oldApps[a.ID], now)
if err := db.InsertApp(a); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", a.ID, err))
continue
}
result.Apps++
}
return result, nil
}
// applyTimestamps sets created_at and updated_at based on whether the entry
// existed before and whether its content changed.
// - New entry (no old record): both set to now
// - Unchanged (hash matches): both preserved from old record
// - Changed (hash differs): created_at preserved, updated_at set to now
func applyTimestamps(createdAt, updatedAt *time.Time, newHash string, old timestampRecord, now time.Time) {
if old.CreatedAt.IsZero() {
// New entry
*createdAt = now
*updatedAt = now
return
}
// Existing entry — always preserve created_at
*createdAt = old.CreatedAt
if old.ContentHash == newHash {
// No changes — preserve updated_at too
*updatedAt = old.UpdatedAt
} else {
*updatedAt = now
}
}
// walkMD walks a directory recursively and calls fn for each .md file found.
func walkMD(dir string, fn func(path string)) {
if _, err := os.Stat(dir); err != nil {
return
}
filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
fn(path)
return nil
})
}