From e3bb9c3b389bfcce032796429ce3435ff90712a1 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Mon, 30 Mar 2026 14:23:45 +0200 Subject: [PATCH] feat: content hash y timestamps inteligentes en registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agrega content_hash a functions, types y apps para detectar cambios reales entre reindexaciones. Los timestamps created_at se preservan si el contenido no cambió, y updated_at solo se actualiza cuando hay cambios efectivos. Incluye migración 005, hash.go con SHA256 determinístico, y ajustes en store/indexer/models para el nuevo flujo de timestamps. Co-Authored-By: Claude Opus 4.6 (1M context) --- registry/hash.go | 101 +++++++++++++++++++++++ registry/indexer.go | 39 ++++++++- registry/migrations/005_content_hash.sql | 4 + registry/models.go | 9 ++ registry/store.go | 59 +++++++------ 5 files changed, 188 insertions(+), 24 deletions(-) create mode 100644 registry/hash.go create mode 100644 registry/migrations/005_content_hash.sql diff --git a/registry/hash.go b/registry/hash.go new file mode 100644 index 00000000..25c8c885 --- /dev/null +++ b/registry/hash.go @@ -0,0 +1,101 @@ +package registry + +import ( + "crypto/sha256" + "fmt" + "time" +) + +// timestampRecord holds preserved timestamps and hash for an existing entry. +type timestampRecord struct { + CreatedAt time.Time + UpdatedAt time.Time + ContentHash string +} + +// ComputeFunctionHash computes a deterministic hash of all content fields of a Function +// (excluding created_at, updated_at, and content_hash itself). +func ComputeFunctionHash(f *Function) string { + h := sha256.New() + fmt.Fprintf(h, "%s|%s|%s|%s|%s|%s|%s|%s|%s", + f.ID, f.Name, f.Kind, f.Lang, f.Domain, f.Version, f.Purity, f.Signature, f.Description) + fmt.Fprintf(h, "|%s", marshalStrings(f.Tags)) + fmt.Fprintf(h, "|%s", marshalStrings(f.UsesFunctions)) + fmt.Fprintf(h, "|%s", marshalStrings(f.UsesTypes)) + fmt.Fprintf(h, "|%s", marshalStrings(f.Returns)) + fmt.Fprintf(h, "|%t|%s", f.ReturnsOptional, f.ErrorType) + fmt.Fprintf(h, "|%s", marshalStrings(f.Imports)) + fmt.Fprintf(h, "|%s|%t", f.Example, f.Tested) + fmt.Fprintf(h, "|%s", marshalStrings(f.Tests)) + fmt.Fprintf(h, "|%s|%s", f.TestFilePath, f.FilePath) + fmt.Fprintf(h, "|%s", marshalProps(f.Props)) + fmt.Fprintf(h, "|%s", marshalStrings(f.Emits)) + if f.HasState != nil { + fmt.Fprintf(h, "|%t", *f.HasState) + } + fmt.Fprintf(h, "|%s", f.Framework) + fmt.Fprintf(h, "|%s", marshalStrings(f.Variant)) + fmt.Fprintf(h, "|%s|%s|%s", f.Notes, f.Documentation, f.Code) + fmt.Fprintf(h, "|%s|%s|%s", f.SourceRepo, f.SourceLicense, f.SourceFile) + return fmt.Sprintf("%x", h.Sum(nil)) +} + +// ComputeTypeHash computes a deterministic hash of all content fields of a Type. +func ComputeTypeHash(t *Type) string { + h := sha256.New() + fmt.Fprintf(h, "%s|%s|%s|%s|%s|%s|%s|%s", + t.ID, t.Name, t.Lang, t.Domain, t.Version, t.Algebraic, t.Definition, t.Description) + fmt.Fprintf(h, "|%s", marshalStrings(t.Tags)) + fmt.Fprintf(h, "|%s", marshalStrings(t.UsesTypes)) + fmt.Fprintf(h, "|%s|%s|%s|%s|%s", t.FilePath, t.Examples, t.Notes, t.Documentation, t.Code) + fmt.Fprintf(h, "|%s|%s|%s", t.SourceRepo, t.SourceLicense, t.SourceFile) + return fmt.Sprintf("%x", h.Sum(nil)) +} + +// ComputeAppHash computes a deterministic hash of all content fields of an App. +func ComputeAppHash(a *App) string { + h := sha256.New() + fmt.Fprintf(h, "%s|%s|%s|%s|%s", + a.ID, a.Name, a.Lang, a.Domain, a.Description) + fmt.Fprintf(h, "|%s", marshalStrings(a.Tags)) + fmt.Fprintf(h, "|%s", marshalStrings(a.UsesFunctions)) + fmt.Fprintf(h, "|%s", marshalStrings(a.UsesTypes)) + fmt.Fprintf(h, "|%s|%s|%s|%s|%s", a.Framework, a.EntryPoint, a.Documentation, a.Notes, a.DirPath) + return fmt.Sprintf("%x", h.Sum(nil)) +} + +// LoadTimestamps reads existing id → {created_at, updated_at, content_hash} from all tables. +// Called before Purge so we can preserve dates across reindexing. +func (db *DB) LoadTimestamps() (funcs, types, apps map[string]timestampRecord, err error) { + funcs, err = loadTable(db, "functions") + if err != nil { + return + } + types, err = loadTable(db, "types") + if err != nil { + return + } + apps, err = loadTable(db, "apps") + return +} + +func loadTable(db *DB, table string) (map[string]timestampRecord, error) { + rows, err := db.conn.Query(fmt.Sprintf("SELECT id, created_at, updated_at, content_hash FROM %s", table)) + if err != nil { + return nil, err + } + defer rows.Close() + + m := make(map[string]timestampRecord) + for rows.Next() { + var id, ca, ua, ch string + if err := rows.Scan(&id, &ca, &ua, &ch); err != nil { + return nil, err + } + rec := timestampRecord{ContentHash: ch} + rec.CreatedAt, _ = time.Parse(time.RFC3339, ca) + rec.UpdatedAt, _ = time.Parse(time.RFC3339, ua) + m[id] = rec + } + return m, rows.Err() +} diff --git a/registry/indexer.go b/registry/indexer.go index 7dd6ba32..030a6aad 100644 --- a/registry/indexer.go +++ b/registry/indexer.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "strings" + "time" ) // IndexResult holds stats from an indexing run. @@ -24,6 +25,12 @@ type IndexResult struct { // Scans functions/ and types/ at the root level, plus any language-specific // directories (e.g. python/functions/, python/types/). func Index(db *DB, root string) (*IndexResult, error) { + // Load existing timestamps before purging so we can preserve created_at + oldFuncs, oldTypes, oldApps, err := db.LoadTimestamps() + if err != nil { + return nil, fmt.Errorf("loading timestamps: %w", err) + } + if err := db.Purge(); err != nil { return nil, fmt.Errorf("purging database: %w", err) } @@ -109,12 +116,16 @@ func Index(db *DB, root string) (*IndexResult, error) { knownTypes[t.ID] = true } - // Pass 2: validate and insert + now := time.Now().UTC() + + // Pass 2: validate, assign timestamps via hash comparison, and insert for _, t := range types { if verr := ValidateType(t, knownTypes); verr != nil { result.ValidationErrors = append(result.ValidationErrors, verr.Error()) continue } + t.ContentHash = ComputeTypeHash(t) + applyTimestamps(&t.CreatedAt, &t.UpdatedAt, t.ContentHash, oldTypes[t.ID], now) if err := db.InsertType(t); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err)) continue @@ -127,6 +138,8 @@ func Index(db *DB, root string) (*IndexResult, error) { result.ValidationErrors = append(result.ValidationErrors, verr.Error()) continue } + f.ContentHash = ComputeFunctionHash(f) + applyTimestamps(&f.CreatedAt, &f.UpdatedAt, f.ContentHash, oldFuncs[f.ID], now) if err := db.InsertFunction(f); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err)) continue @@ -139,6 +152,8 @@ func Index(db *DB, root string) (*IndexResult, error) { result.ValidationErrors = append(result.ValidationErrors, verr.Error()) continue } + a.ContentHash = ComputeAppHash(a) + applyTimestamps(&a.CreatedAt, &a.UpdatedAt, a.ContentHash, oldApps[a.ID], now) if err := db.InsertApp(a); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", a.ID, err)) continue @@ -149,6 +164,28 @@ func Index(db *DB, root string) (*IndexResult, error) { return result, nil } +// applyTimestamps sets created_at and updated_at based on whether the entry +// existed before and whether its content changed. +// - New entry (no old record): both set to now +// - Unchanged (hash matches): both preserved from old record +// - Changed (hash differs): created_at preserved, updated_at set to now +func applyTimestamps(createdAt, updatedAt *time.Time, newHash string, old timestampRecord, now time.Time) { + if old.CreatedAt.IsZero() { + // New entry + *createdAt = now + *updatedAt = now + return + } + // Existing entry — always preserve created_at + *createdAt = old.CreatedAt + if old.ContentHash == newHash { + // No changes — preserve updated_at too + *updatedAt = old.UpdatedAt + } else { + *updatedAt = now + } +} + // walkMD walks a directory recursively and calls fn for each .md file found. func walkMD(dir string, fn func(path string)) { if _, err := os.Stat(dir); err != nil { diff --git a/registry/migrations/005_content_hash.sql b/registry/migrations/005_content_hash.sql new file mode 100644 index 00000000..bbcc39ad --- /dev/null +++ b/registry/migrations/005_content_hash.sql @@ -0,0 +1,4 @@ +-- Add content_hash to detect changes across reindexing and preserve timestamps. +ALTER TABLE functions ADD COLUMN content_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE types ADD COLUMN content_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE apps ADD COLUMN content_hash TEXT NOT NULL DEFAULT ''; diff --git a/registry/models.go b/registry/models.go index b725f377..c8959ab6 100644 --- a/registry/models.go +++ b/registry/models.go @@ -54,6 +54,10 @@ type Function struct { Tests []string `json:"tests"` TestFilePath string `json:"test_file_path"` FilePath string `json:"file_path"` + ContentHash string `json:"content_hash"` + SourceRepo string `json:"source_repo"` + SourceLicense string `json:"source_license"` + SourceFile string `json:"source_file"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` @@ -90,6 +94,10 @@ type Type struct { Documentation string `json:"documentation"` Code string `json:"code"` FilePath string `json:"file_path"` + ContentHash string `json:"content_hash"` + SourceRepo string `json:"source_repo"` + SourceLicense string `json:"source_license"` + SourceFile string `json:"source_file"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` } @@ -109,6 +117,7 @@ type App struct { Documentation string `json:"documentation"` Notes string `json:"notes"` DirPath string `json:"dir_path"` + ContentHash string `json:"content_hash"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` } diff --git a/registry/store.go b/registry/store.go index 947d33e7..fd010f5a 100644 --- a/registry/store.go +++ b/registry/store.go @@ -57,11 +57,13 @@ func unmarshalProps(s string) []PropDef { // InsertFunction inserts or replaces a function entry. func (db *DB) InsertFunction(f *Function) error { - now := time.Now().UTC().Format(time.RFC3339) + now := time.Now().UTC() if f.CreatedAt.IsZero() { - f.CreatedAt = time.Now().UTC() + f.CreatedAt = now + } + if f.UpdatedAt.IsZero() { + f.UpdatedAt = now } - f.UpdatedAt = time.Now().UTC() if f.ID == "" { f.ID = GenerateID(f.Name, f.Lang, f.Domain) @@ -81,34 +83,39 @@ func (db *DB) InsertFunction(f *Function) error { id, name, kind, lang, domain, version, purity, signature, description, tags, uses_functions, uses_types, returns, returns_optional, error_type, imports, example, tested, - tests, test_file_path, file_path, created_at, updated_at, + tests, test_file_path, file_path, content_hash, created_at, updated_at, props, emits, has_state, framework, variant, - notes, documentation, code + notes, documentation, code, + source_repo, source_license, source_file ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, - ?, ?, ?, ?, ?, + ?, ?, ?, ?, ?, ? )`, f.ID, f.Name, string(f.Kind), f.Lang, f.Domain, f.Version, string(f.Purity), f.Signature, f.Description, marshalStrings(f.Tags), marshalStrings(f.UsesFunctions), marshalStrings(f.UsesTypes), marshalStrings(f.Returns), f.ReturnsOptional, f.ErrorType, marshalStrings(f.Imports), f.Example, f.Tested, - marshalStrings(f.Tests), f.TestFilePath, f.FilePath, f.CreatedAt.Format(time.RFC3339), now, + marshalStrings(f.Tests), f.TestFilePath, f.FilePath, f.ContentHash, f.CreatedAt.Format(time.RFC3339), f.UpdatedAt.Format(time.RFC3339), marshalProps(f.Props), marshalStrings(f.Emits), hasState, f.Framework, marshalStrings(f.Variant), f.Notes, f.Documentation, f.Code, + f.SourceRepo, f.SourceLicense, f.SourceFile, ) return err } // InsertType inserts or replaces a type entry. func (db *DB) InsertType(t *Type) error { - now := time.Now().UTC().Format(time.RFC3339) + now := time.Now().UTC() if t.CreatedAt.IsZero() { - t.CreatedAt = time.Now().UTC() + t.CreatedAt = now + } + if t.UpdatedAt.IsZero() { + t.UpdatedAt = now } - t.UpdatedAt = time.Now().UTC() if t.ID == "" { t.ID = GenerateID(t.Name, t.Lang, t.Domain) @@ -118,13 +125,15 @@ func (db *DB) InsertType(t *Type) error { INSERT OR REPLACE INTO types ( id, name, lang, domain, version, algebraic, definition, description, tags, uses_types, - file_path, created_at, updated_at, - examples, notes, documentation, code - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + file_path, content_hash, created_at, updated_at, + examples, notes, documentation, code, + source_repo, source_license, source_file + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, t.ID, t.Name, t.Lang, t.Domain, t.Version, string(t.Algebraic), t.Definition, t.Description, marshalStrings(t.Tags), marshalStrings(t.UsesTypes), - t.FilePath, t.CreatedAt.Format(time.RFC3339), now, + t.FilePath, t.ContentHash, t.CreatedAt.Format(time.RFC3339), t.UpdatedAt.Format(time.RFC3339), t.Examples, t.Notes, t.Documentation, t.Code, + t.SourceRepo, t.SourceLicense, t.SourceFile, ) return err } @@ -263,11 +272,13 @@ func (db *DB) DeleteType(id string) error { // InsertApp inserts or replaces an app entry. func (db *DB) InsertApp(a *App) error { - now := time.Now().UTC().Format(time.RFC3339) + now := time.Now().UTC() if a.CreatedAt.IsZero() { - a.CreatedAt = time.Now().UTC() + a.CreatedAt = now + } + if a.UpdatedAt.IsZero() { + a.UpdatedAt = now } - a.UpdatedAt = time.Now().UTC() if a.ID == "" { a.ID = GenerateID(a.Name, a.Lang, a.Domain) @@ -277,11 +288,11 @@ func (db *DB) InsertApp(a *App) error { INSERT OR REPLACE INTO apps ( id, name, lang, domain, description, tags, uses_functions, uses_types, framework, entry_point, - documentation, notes, dir_path, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + documentation, notes, dir_path, content_hash, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, a.ID, a.Name, a.Lang, a.Domain, a.Description, marshalStrings(a.Tags), marshalStrings(a.UsesFunctions), marshalStrings(a.UsesTypes), a.Framework, a.EntryPoint, - a.Documentation, a.Notes, a.DirPath, a.CreatedAt.Format(time.RFC3339), now, + a.Documentation, a.Notes, a.DirPath, a.ContentHash, a.CreatedAt.Format(time.RFC3339), a.UpdatedAt.Format(time.RFC3339), ) return err } @@ -347,7 +358,7 @@ func scanApps(rows interface{ Next() bool; Scan(...any) error }) ([]App, error) err := rows.Scan( &a.ID, &a.Name, &a.Lang, &a.Domain, &a.Description, &tagsJSON, &usesFnJSON, &usesTypJSON, &a.Framework, &a.EntryPoint, - &a.Documentation, &a.Notes, &a.DirPath, &createdAt, &updatedAt, + &a.Documentation, &a.Notes, &a.DirPath, &createdAt, &updatedAt, &a.ContentHash, ) if err != nil { return nil, fmt.Errorf("scanning app: %w", err) @@ -391,7 +402,8 @@ func scanFunctions(rows interface{ Next() bool; Scan(...any) error }) ([]Functio &f.ReturnsOptional, &f.ErrorType, &importsJSON, &f.Example, &f.Tested, &testsJSON, &f.TestFilePath, &f.FilePath, &createdAt, &updatedAt, &propsJSON, &emitsJSON, &hasState, &f.Framework, &variantJSON, - &f.Notes, &f.Documentation, &f.Code, + &f.Notes, &f.Documentation, &f.Code, &f.ContentHash, + &f.SourceRepo, &f.SourceLicense, &f.SourceFile, ) if err != nil { return nil, fmt.Errorf("scanning function: %w", err) @@ -430,7 +442,8 @@ func scanTypes(rows interface{ Next() bool; Scan(...any) error }) ([]Type, error &t.ID, &t.Name, &t.Lang, &t.Domain, &t.Version, &t.Algebraic, &t.Definition, &t.Description, &tagsJSON, &usesTypJSON, &t.FilePath, &createdAt, &updatedAt, - &t.Examples, &t.Notes, &t.Documentation, &t.Code, + &t.Examples, &t.Notes, &t.Documentation, &t.Code, &t.ContentHash, + &t.SourceRepo, &t.SourceLicense, &t.SourceFile, ) if err != nil { return nil, fmt.Errorf("scanning type: %w", err)