feat: content hash y timestamps inteligentes en registry

Agrega content_hash a functions, types y apps para detectar cambios reales
entre reindexaciones. Los timestamps created_at se preservan si el contenido
no cambió, y updated_at solo se actualiza cuando hay cambios efectivos.
Incluye migración 005, hash.go con SHA256 determinístico, y ajustes en
store/indexer/models para el nuevo flujo de timestamps.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-30 14:23:45 +02:00
parent 48caec5665
commit e3bb9c3b38
5 changed files with 188 additions and 24 deletions
+101
View File
@@ -0,0 +1,101 @@
package registry
import (
"crypto/sha256"
"fmt"
"time"
)
// timestampRecord holds preserved timestamps and hash for an existing entry.
type timestampRecord struct {
CreatedAt time.Time
UpdatedAt time.Time
ContentHash string
}
// ComputeFunctionHash computes a deterministic hash of all content fields of a Function
// (excluding created_at, updated_at, and content_hash itself).
func ComputeFunctionHash(f *Function) string {
h := sha256.New()
fmt.Fprintf(h, "%s|%s|%s|%s|%s|%s|%s|%s|%s",
f.ID, f.Name, f.Kind, f.Lang, f.Domain, f.Version, f.Purity, f.Signature, f.Description)
fmt.Fprintf(h, "|%s", marshalStrings(f.Tags))
fmt.Fprintf(h, "|%s", marshalStrings(f.UsesFunctions))
fmt.Fprintf(h, "|%s", marshalStrings(f.UsesTypes))
fmt.Fprintf(h, "|%s", marshalStrings(f.Returns))
fmt.Fprintf(h, "|%t|%s", f.ReturnsOptional, f.ErrorType)
fmt.Fprintf(h, "|%s", marshalStrings(f.Imports))
fmt.Fprintf(h, "|%s|%t", f.Example, f.Tested)
fmt.Fprintf(h, "|%s", marshalStrings(f.Tests))
fmt.Fprintf(h, "|%s|%s", f.TestFilePath, f.FilePath)
fmt.Fprintf(h, "|%s", marshalProps(f.Props))
fmt.Fprintf(h, "|%s", marshalStrings(f.Emits))
if f.HasState != nil {
fmt.Fprintf(h, "|%t", *f.HasState)
}
fmt.Fprintf(h, "|%s", f.Framework)
fmt.Fprintf(h, "|%s", marshalStrings(f.Variant))
fmt.Fprintf(h, "|%s|%s|%s", f.Notes, f.Documentation, f.Code)
fmt.Fprintf(h, "|%s|%s|%s", f.SourceRepo, f.SourceLicense, f.SourceFile)
return fmt.Sprintf("%x", h.Sum(nil))
}
// ComputeTypeHash computes a deterministic hash of all content fields of a Type.
func ComputeTypeHash(t *Type) string {
h := sha256.New()
fmt.Fprintf(h, "%s|%s|%s|%s|%s|%s|%s|%s",
t.ID, t.Name, t.Lang, t.Domain, t.Version, t.Algebraic, t.Definition, t.Description)
fmt.Fprintf(h, "|%s", marshalStrings(t.Tags))
fmt.Fprintf(h, "|%s", marshalStrings(t.UsesTypes))
fmt.Fprintf(h, "|%s|%s|%s|%s|%s", t.FilePath, t.Examples, t.Notes, t.Documentation, t.Code)
fmt.Fprintf(h, "|%s|%s|%s", t.SourceRepo, t.SourceLicense, t.SourceFile)
return fmt.Sprintf("%x", h.Sum(nil))
}
// ComputeAppHash computes a deterministic hash of all content fields of an App.
func ComputeAppHash(a *App) string {
h := sha256.New()
fmt.Fprintf(h, "%s|%s|%s|%s|%s",
a.ID, a.Name, a.Lang, a.Domain, a.Description)
fmt.Fprintf(h, "|%s", marshalStrings(a.Tags))
fmt.Fprintf(h, "|%s", marshalStrings(a.UsesFunctions))
fmt.Fprintf(h, "|%s", marshalStrings(a.UsesTypes))
fmt.Fprintf(h, "|%s|%s|%s|%s|%s", a.Framework, a.EntryPoint, a.Documentation, a.Notes, a.DirPath)
return fmt.Sprintf("%x", h.Sum(nil))
}
// LoadTimestamps reads existing id → {created_at, updated_at, content_hash} from all tables.
// Called before Purge so we can preserve dates across reindexing.
func (db *DB) LoadTimestamps() (funcs, types, apps map[string]timestampRecord, err error) {
funcs, err = loadTable(db, "functions")
if err != nil {
return
}
types, err = loadTable(db, "types")
if err != nil {
return
}
apps, err = loadTable(db, "apps")
return
}
func loadTable(db *DB, table string) (map[string]timestampRecord, error) {
rows, err := db.conn.Query(fmt.Sprintf("SELECT id, created_at, updated_at, content_hash FROM %s", table))
if err != nil {
return nil, err
}
defer rows.Close()
m := make(map[string]timestampRecord)
for rows.Next() {
var id, ca, ua, ch string
if err := rows.Scan(&id, &ca, &ua, &ch); err != nil {
return nil, err
}
rec := timestampRecord{ContentHash: ch}
rec.CreatedAt, _ = time.Parse(time.RFC3339, ca)
rec.UpdatedAt, _ = time.Parse(time.RFC3339, ua)
m[id] = rec
}
return m, rows.Err()
}
+38 -1
View File
@@ -5,6 +5,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
)
// IndexResult holds stats from an indexing run.
@@ -24,6 +25,12 @@ type IndexResult struct {
// Scans functions/ and types/ at the root level, plus any language-specific
// directories (e.g. python/functions/, python/types/).
func Index(db *DB, root string) (*IndexResult, error) {
// Load existing timestamps before purging so we can preserve created_at
oldFuncs, oldTypes, oldApps, err := db.LoadTimestamps()
if err != nil {
return nil, fmt.Errorf("loading timestamps: %w", err)
}
if err := db.Purge(); err != nil {
return nil, fmt.Errorf("purging database: %w", err)
}
@@ -109,12 +116,16 @@ func Index(db *DB, root string) (*IndexResult, error) {
knownTypes[t.ID] = true
}
// Pass 2: validate and insert
now := time.Now().UTC()
// Pass 2: validate, assign timestamps via hash comparison, and insert
for _, t := range types {
if verr := ValidateType(t, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
t.ContentHash = ComputeTypeHash(t)
applyTimestamps(&t.CreatedAt, &t.UpdatedAt, t.ContentHash, oldTypes[t.ID], now)
if err := db.InsertType(t); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
continue
@@ -127,6 +138,8 @@ func Index(db *DB, root string) (*IndexResult, error) {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
f.ContentHash = ComputeFunctionHash(f)
applyTimestamps(&f.CreatedAt, &f.UpdatedAt, f.ContentHash, oldFuncs[f.ID], now)
if err := db.InsertFunction(f); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
continue
@@ -139,6 +152,8 @@ func Index(db *DB, root string) (*IndexResult, error) {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
a.ContentHash = ComputeAppHash(a)
applyTimestamps(&a.CreatedAt, &a.UpdatedAt, a.ContentHash, oldApps[a.ID], now)
if err := db.InsertApp(a); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", a.ID, err))
continue
@@ -149,6 +164,28 @@ func Index(db *DB, root string) (*IndexResult, error) {
return result, nil
}
// applyTimestamps sets created_at and updated_at based on whether the entry
// existed before and whether its content changed.
// - New entry (no old record): both set to now
// - Unchanged (hash matches): both preserved from old record
// - Changed (hash differs): created_at preserved, updated_at set to now
func applyTimestamps(createdAt, updatedAt *time.Time, newHash string, old timestampRecord, now time.Time) {
if old.CreatedAt.IsZero() {
// New entry
*createdAt = now
*updatedAt = now
return
}
// Existing entry — always preserve created_at
*createdAt = old.CreatedAt
if old.ContentHash == newHash {
// No changes — preserve updated_at too
*updatedAt = old.UpdatedAt
} else {
*updatedAt = now
}
}
// walkMD walks a directory recursively and calls fn for each .md file found.
func walkMD(dir string, fn func(path string)) {
if _, err := os.Stat(dir); err != nil {
+4
View File
@@ -0,0 +1,4 @@
-- Add content_hash to detect changes across reindexing and preserve timestamps.
ALTER TABLE functions ADD COLUMN content_hash TEXT NOT NULL DEFAULT '';
ALTER TABLE types ADD COLUMN content_hash TEXT NOT NULL DEFAULT '';
ALTER TABLE apps ADD COLUMN content_hash TEXT NOT NULL DEFAULT '';
+9
View File
@@ -54,6 +54,10 @@ type Function struct {
Tests []string `json:"tests"`
TestFilePath string `json:"test_file_path"`
FilePath string `json:"file_path"`
ContentHash string `json:"content_hash"`
SourceRepo string `json:"source_repo"`
SourceLicense string `json:"source_license"`
SourceFile string `json:"source_file"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
@@ -90,6 +94,10 @@ type Type struct {
Documentation string `json:"documentation"`
Code string `json:"code"`
FilePath string `json:"file_path"`
ContentHash string `json:"content_hash"`
SourceRepo string `json:"source_repo"`
SourceLicense string `json:"source_license"`
SourceFile string `json:"source_file"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
@@ -109,6 +117,7 @@ type App struct {
Documentation string `json:"documentation"`
Notes string `json:"notes"`
DirPath string `json:"dir_path"`
ContentHash string `json:"content_hash"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
+36 -23
View File
@@ -57,11 +57,13 @@ func unmarshalProps(s string) []PropDef {
// InsertFunction inserts or replaces a function entry.
func (db *DB) InsertFunction(f *Function) error {
now := time.Now().UTC().Format(time.RFC3339)
now := time.Now().UTC()
if f.CreatedAt.IsZero() {
f.CreatedAt = time.Now().UTC()
f.CreatedAt = now
}
if f.UpdatedAt.IsZero() {
f.UpdatedAt = now
}
f.UpdatedAt = time.Now().UTC()
if f.ID == "" {
f.ID = GenerateID(f.Name, f.Lang, f.Domain)
@@ -81,34 +83,39 @@ func (db *DB) InsertFunction(f *Function) error {
id, name, kind, lang, domain, version, purity, signature,
description, tags, uses_functions, uses_types, returns,
returns_optional, error_type, imports, example, tested,
tests, test_file_path, file_path, created_at, updated_at,
tests, test_file_path, file_path, content_hash, created_at, updated_at,
props, emits, has_state, framework, variant,
notes, documentation, code
notes, documentation, code,
source_repo, source_license, source_file
) VALUES (
?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?,
?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?,
?, ?, ?, ?, ?,
?, ?, ?,
?, ?, ?
)`,
f.ID, f.Name, string(f.Kind), f.Lang, f.Domain, f.Version, string(f.Purity), f.Signature,
f.Description, marshalStrings(f.Tags), marshalStrings(f.UsesFunctions), marshalStrings(f.UsesTypes), marshalStrings(f.Returns),
f.ReturnsOptional, f.ErrorType, marshalStrings(f.Imports), f.Example, f.Tested,
marshalStrings(f.Tests), f.TestFilePath, f.FilePath, f.CreatedAt.Format(time.RFC3339), now,
marshalStrings(f.Tests), f.TestFilePath, f.FilePath, f.ContentHash, f.CreatedAt.Format(time.RFC3339), f.UpdatedAt.Format(time.RFC3339),
marshalProps(f.Props), marshalStrings(f.Emits), hasState, f.Framework, marshalStrings(f.Variant),
f.Notes, f.Documentation, f.Code,
f.SourceRepo, f.SourceLicense, f.SourceFile,
)
return err
}
// InsertType inserts or replaces a type entry.
func (db *DB) InsertType(t *Type) error {
now := time.Now().UTC().Format(time.RFC3339)
now := time.Now().UTC()
if t.CreatedAt.IsZero() {
t.CreatedAt = time.Now().UTC()
t.CreatedAt = now
}
if t.UpdatedAt.IsZero() {
t.UpdatedAt = now
}
t.UpdatedAt = time.Now().UTC()
if t.ID == "" {
t.ID = GenerateID(t.Name, t.Lang, t.Domain)
@@ -118,13 +125,15 @@ func (db *DB) InsertType(t *Type) error {
INSERT OR REPLACE INTO types (
id, name, lang, domain, version, algebraic,
definition, description, tags, uses_types,
file_path, created_at, updated_at,
examples, notes, documentation, code
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
file_path, content_hash, created_at, updated_at,
examples, notes, documentation, code,
source_repo, source_license, source_file
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
t.ID, t.Name, t.Lang, t.Domain, t.Version, string(t.Algebraic),
t.Definition, t.Description, marshalStrings(t.Tags), marshalStrings(t.UsesTypes),
t.FilePath, t.CreatedAt.Format(time.RFC3339), now,
t.FilePath, t.ContentHash, t.CreatedAt.Format(time.RFC3339), t.UpdatedAt.Format(time.RFC3339),
t.Examples, t.Notes, t.Documentation, t.Code,
t.SourceRepo, t.SourceLicense, t.SourceFile,
)
return err
}
@@ -263,11 +272,13 @@ func (db *DB) DeleteType(id string) error {
// InsertApp inserts or replaces an app entry.
func (db *DB) InsertApp(a *App) error {
now := time.Now().UTC().Format(time.RFC3339)
now := time.Now().UTC()
if a.CreatedAt.IsZero() {
a.CreatedAt = time.Now().UTC()
a.CreatedAt = now
}
if a.UpdatedAt.IsZero() {
a.UpdatedAt = now
}
a.UpdatedAt = time.Now().UTC()
if a.ID == "" {
a.ID = GenerateID(a.Name, a.Lang, a.Domain)
@@ -277,11 +288,11 @@ func (db *DB) InsertApp(a *App) error {
INSERT OR REPLACE INTO apps (
id, name, lang, domain, description, tags,
uses_functions, uses_types, framework, entry_point,
documentation, notes, dir_path, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
documentation, notes, dir_path, content_hash, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
a.ID, a.Name, a.Lang, a.Domain, a.Description, marshalStrings(a.Tags),
marshalStrings(a.UsesFunctions), marshalStrings(a.UsesTypes), a.Framework, a.EntryPoint,
a.Documentation, a.Notes, a.DirPath, a.CreatedAt.Format(time.RFC3339), now,
a.Documentation, a.Notes, a.DirPath, a.ContentHash, a.CreatedAt.Format(time.RFC3339), a.UpdatedAt.Format(time.RFC3339),
)
return err
}
@@ -347,7 +358,7 @@ func scanApps(rows interface{ Next() bool; Scan(...any) error }) ([]App, error)
err := rows.Scan(
&a.ID, &a.Name, &a.Lang, &a.Domain, &a.Description, &tagsJSON,
&usesFnJSON, &usesTypJSON, &a.Framework, &a.EntryPoint,
&a.Documentation, &a.Notes, &a.DirPath, &createdAt, &updatedAt,
&a.Documentation, &a.Notes, &a.DirPath, &createdAt, &updatedAt, &a.ContentHash,
)
if err != nil {
return nil, fmt.Errorf("scanning app: %w", err)
@@ -391,7 +402,8 @@ func scanFunctions(rows interface{ Next() bool; Scan(...any) error }) ([]Functio
&f.ReturnsOptional, &f.ErrorType, &importsJSON, &f.Example, &f.Tested,
&testsJSON, &f.TestFilePath, &f.FilePath, &createdAt, &updatedAt,
&propsJSON, &emitsJSON, &hasState, &f.Framework, &variantJSON,
&f.Notes, &f.Documentation, &f.Code,
&f.Notes, &f.Documentation, &f.Code, &f.ContentHash,
&f.SourceRepo, &f.SourceLicense, &f.SourceFile,
)
if err != nil {
return nil, fmt.Errorf("scanning function: %w", err)
@@ -430,7 +442,8 @@ func scanTypes(rows interface{ Next() bool; Scan(...any) error }) ([]Type, error
&t.ID, &t.Name, &t.Lang, &t.Domain, &t.Version, &t.Algebraic,
&t.Definition, &t.Description, &tagsJSON, &usesTypJSON,
&t.FilePath, &createdAt, &updatedAt,
&t.Examples, &t.Notes, &t.Documentation, &t.Code,
&t.Examples, &t.Notes, &t.Documentation, &t.Code, &t.ContentHash,
&t.SourceRepo, &t.SourceLicense, &t.SourceFile,
)
if err != nil {
return nil, fmt.Errorf("scanning type: %w", err)