feat: content hash y timestamps inteligentes en registry
Agrega content_hash a functions, types y apps para detectar cambios reales entre reindexaciones. Los timestamps created_at se preservan si el contenido no cambió, y updated_at solo se actualiza cuando hay cambios efectivos. Incluye migración 005, hash.go con SHA256 determinístico, y ajustes en store/indexer/models para el nuevo flujo de timestamps. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,101 @@
|
||||
package registry
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// timestampRecord holds preserved timestamps and hash for an existing entry.
|
||||
type timestampRecord struct {
|
||||
CreatedAt time.Time
|
||||
UpdatedAt time.Time
|
||||
ContentHash string
|
||||
}
|
||||
|
||||
// ComputeFunctionHash computes a deterministic hash of all content fields of a Function
|
||||
// (excluding created_at, updated_at, and content_hash itself).
|
||||
func ComputeFunctionHash(f *Function) string {
|
||||
h := sha256.New()
|
||||
fmt.Fprintf(h, "%s|%s|%s|%s|%s|%s|%s|%s|%s",
|
||||
f.ID, f.Name, f.Kind, f.Lang, f.Domain, f.Version, f.Purity, f.Signature, f.Description)
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(f.Tags))
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(f.UsesFunctions))
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(f.UsesTypes))
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(f.Returns))
|
||||
fmt.Fprintf(h, "|%t|%s", f.ReturnsOptional, f.ErrorType)
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(f.Imports))
|
||||
fmt.Fprintf(h, "|%s|%t", f.Example, f.Tested)
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(f.Tests))
|
||||
fmt.Fprintf(h, "|%s|%s", f.TestFilePath, f.FilePath)
|
||||
fmt.Fprintf(h, "|%s", marshalProps(f.Props))
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(f.Emits))
|
||||
if f.HasState != nil {
|
||||
fmt.Fprintf(h, "|%t", *f.HasState)
|
||||
}
|
||||
fmt.Fprintf(h, "|%s", f.Framework)
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(f.Variant))
|
||||
fmt.Fprintf(h, "|%s|%s|%s", f.Notes, f.Documentation, f.Code)
|
||||
fmt.Fprintf(h, "|%s|%s|%s", f.SourceRepo, f.SourceLicense, f.SourceFile)
|
||||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
|
||||
// ComputeTypeHash computes a deterministic hash of all content fields of a Type.
|
||||
func ComputeTypeHash(t *Type) string {
|
||||
h := sha256.New()
|
||||
fmt.Fprintf(h, "%s|%s|%s|%s|%s|%s|%s|%s",
|
||||
t.ID, t.Name, t.Lang, t.Domain, t.Version, t.Algebraic, t.Definition, t.Description)
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(t.Tags))
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(t.UsesTypes))
|
||||
fmt.Fprintf(h, "|%s|%s|%s|%s|%s", t.FilePath, t.Examples, t.Notes, t.Documentation, t.Code)
|
||||
fmt.Fprintf(h, "|%s|%s|%s", t.SourceRepo, t.SourceLicense, t.SourceFile)
|
||||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
|
||||
// ComputeAppHash computes a deterministic hash of all content fields of an App.
|
||||
func ComputeAppHash(a *App) string {
|
||||
h := sha256.New()
|
||||
fmt.Fprintf(h, "%s|%s|%s|%s|%s",
|
||||
a.ID, a.Name, a.Lang, a.Domain, a.Description)
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(a.Tags))
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(a.UsesFunctions))
|
||||
fmt.Fprintf(h, "|%s", marshalStrings(a.UsesTypes))
|
||||
fmt.Fprintf(h, "|%s|%s|%s|%s|%s", a.Framework, a.EntryPoint, a.Documentation, a.Notes, a.DirPath)
|
||||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
|
||||
// LoadTimestamps reads existing id → {created_at, updated_at, content_hash} from all tables.
|
||||
// Called before Purge so we can preserve dates across reindexing.
|
||||
func (db *DB) LoadTimestamps() (funcs, types, apps map[string]timestampRecord, err error) {
|
||||
funcs, err = loadTable(db, "functions")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
types, err = loadTable(db, "types")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
apps, err = loadTable(db, "apps")
|
||||
return
|
||||
}
|
||||
|
||||
func loadTable(db *DB, table string) (map[string]timestampRecord, error) {
|
||||
rows, err := db.conn.Query(fmt.Sprintf("SELECT id, created_at, updated_at, content_hash FROM %s", table))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
m := make(map[string]timestampRecord)
|
||||
for rows.Next() {
|
||||
var id, ca, ua, ch string
|
||||
if err := rows.Scan(&id, &ca, &ua, &ch); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rec := timestampRecord{ContentHash: ch}
|
||||
rec.CreatedAt, _ = time.Parse(time.RFC3339, ca)
|
||||
rec.UpdatedAt, _ = time.Parse(time.RFC3339, ua)
|
||||
m[id] = rec
|
||||
}
|
||||
return m, rows.Err()
|
||||
}
|
||||
+38
-1
@@ -5,6 +5,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// IndexResult holds stats from an indexing run.
|
||||
@@ -24,6 +25,12 @@ type IndexResult struct {
|
||||
// Scans functions/ and types/ at the root level, plus any language-specific
|
||||
// directories (e.g. python/functions/, python/types/).
|
||||
func Index(db *DB, root string) (*IndexResult, error) {
|
||||
// Load existing timestamps before purging so we can preserve created_at
|
||||
oldFuncs, oldTypes, oldApps, err := db.LoadTimestamps()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading timestamps: %w", err)
|
||||
}
|
||||
|
||||
if err := db.Purge(); err != nil {
|
||||
return nil, fmt.Errorf("purging database: %w", err)
|
||||
}
|
||||
@@ -109,12 +116,16 @@ func Index(db *DB, root string) (*IndexResult, error) {
|
||||
knownTypes[t.ID] = true
|
||||
}
|
||||
|
||||
// Pass 2: validate and insert
|
||||
now := time.Now().UTC()
|
||||
|
||||
// Pass 2: validate, assign timestamps via hash comparison, and insert
|
||||
for _, t := range types {
|
||||
if verr := ValidateType(t, knownTypes); verr != nil {
|
||||
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
|
||||
continue
|
||||
}
|
||||
t.ContentHash = ComputeTypeHash(t)
|
||||
applyTimestamps(&t.CreatedAt, &t.UpdatedAt, t.ContentHash, oldTypes[t.ID], now)
|
||||
if err := db.InsertType(t); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
|
||||
continue
|
||||
@@ -127,6 +138,8 @@ func Index(db *DB, root string) (*IndexResult, error) {
|
||||
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
|
||||
continue
|
||||
}
|
||||
f.ContentHash = ComputeFunctionHash(f)
|
||||
applyTimestamps(&f.CreatedAt, &f.UpdatedAt, f.ContentHash, oldFuncs[f.ID], now)
|
||||
if err := db.InsertFunction(f); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
|
||||
continue
|
||||
@@ -139,6 +152,8 @@ func Index(db *DB, root string) (*IndexResult, error) {
|
||||
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
|
||||
continue
|
||||
}
|
||||
a.ContentHash = ComputeAppHash(a)
|
||||
applyTimestamps(&a.CreatedAt, &a.UpdatedAt, a.ContentHash, oldApps[a.ID], now)
|
||||
if err := db.InsertApp(a); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", a.ID, err))
|
||||
continue
|
||||
@@ -149,6 +164,28 @@ func Index(db *DB, root string) (*IndexResult, error) {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// applyTimestamps sets created_at and updated_at based on whether the entry
|
||||
// existed before and whether its content changed.
|
||||
// - New entry (no old record): both set to now
|
||||
// - Unchanged (hash matches): both preserved from old record
|
||||
// - Changed (hash differs): created_at preserved, updated_at set to now
|
||||
func applyTimestamps(createdAt, updatedAt *time.Time, newHash string, old timestampRecord, now time.Time) {
|
||||
if old.CreatedAt.IsZero() {
|
||||
// New entry
|
||||
*createdAt = now
|
||||
*updatedAt = now
|
||||
return
|
||||
}
|
||||
// Existing entry — always preserve created_at
|
||||
*createdAt = old.CreatedAt
|
||||
if old.ContentHash == newHash {
|
||||
// No changes — preserve updated_at too
|
||||
*updatedAt = old.UpdatedAt
|
||||
} else {
|
||||
*updatedAt = now
|
||||
}
|
||||
}
|
||||
|
||||
// walkMD walks a directory recursively and calls fn for each .md file found.
|
||||
func walkMD(dir string, fn func(path string)) {
|
||||
if _, err := os.Stat(dir); err != nil {
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Add content_hash to detect changes across reindexing and preserve timestamps.
|
||||
ALTER TABLE functions ADD COLUMN content_hash TEXT NOT NULL DEFAULT '';
|
||||
ALTER TABLE types ADD COLUMN content_hash TEXT NOT NULL DEFAULT '';
|
||||
ALTER TABLE apps ADD COLUMN content_hash TEXT NOT NULL DEFAULT '';
|
||||
@@ -54,6 +54,10 @@ type Function struct {
|
||||
Tests []string `json:"tests"`
|
||||
TestFilePath string `json:"test_file_path"`
|
||||
FilePath string `json:"file_path"`
|
||||
ContentHash string `json:"content_hash"`
|
||||
SourceRepo string `json:"source_repo"`
|
||||
SourceLicense string `json:"source_license"`
|
||||
SourceFile string `json:"source_file"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
|
||||
@@ -90,6 +94,10 @@ type Type struct {
|
||||
Documentation string `json:"documentation"`
|
||||
Code string `json:"code"`
|
||||
FilePath string `json:"file_path"`
|
||||
ContentHash string `json:"content_hash"`
|
||||
SourceRepo string `json:"source_repo"`
|
||||
SourceLicense string `json:"source_license"`
|
||||
SourceFile string `json:"source_file"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
@@ -109,6 +117,7 @@ type App struct {
|
||||
Documentation string `json:"documentation"`
|
||||
Notes string `json:"notes"`
|
||||
DirPath string `json:"dir_path"`
|
||||
ContentHash string `json:"content_hash"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
+36
-23
@@ -57,11 +57,13 @@ func unmarshalProps(s string) []PropDef {
|
||||
|
||||
// InsertFunction inserts or replaces a function entry.
|
||||
func (db *DB) InsertFunction(f *Function) error {
|
||||
now := time.Now().UTC().Format(time.RFC3339)
|
||||
now := time.Now().UTC()
|
||||
if f.CreatedAt.IsZero() {
|
||||
f.CreatedAt = time.Now().UTC()
|
||||
f.CreatedAt = now
|
||||
}
|
||||
if f.UpdatedAt.IsZero() {
|
||||
f.UpdatedAt = now
|
||||
}
|
||||
f.UpdatedAt = time.Now().UTC()
|
||||
|
||||
if f.ID == "" {
|
||||
f.ID = GenerateID(f.Name, f.Lang, f.Domain)
|
||||
@@ -81,34 +83,39 @@ func (db *DB) InsertFunction(f *Function) error {
|
||||
id, name, kind, lang, domain, version, purity, signature,
|
||||
description, tags, uses_functions, uses_types, returns,
|
||||
returns_optional, error_type, imports, example, tested,
|
||||
tests, test_file_path, file_path, created_at, updated_at,
|
||||
tests, test_file_path, file_path, content_hash, created_at, updated_at,
|
||||
props, emits, has_state, framework, variant,
|
||||
notes, documentation, code
|
||||
notes, documentation, code,
|
||||
source_repo, source_license, source_file
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?,
|
||||
?, ?, ?,
|
||||
?, ?, ?
|
||||
)`,
|
||||
f.ID, f.Name, string(f.Kind), f.Lang, f.Domain, f.Version, string(f.Purity), f.Signature,
|
||||
f.Description, marshalStrings(f.Tags), marshalStrings(f.UsesFunctions), marshalStrings(f.UsesTypes), marshalStrings(f.Returns),
|
||||
f.ReturnsOptional, f.ErrorType, marshalStrings(f.Imports), f.Example, f.Tested,
|
||||
marshalStrings(f.Tests), f.TestFilePath, f.FilePath, f.CreatedAt.Format(time.RFC3339), now,
|
||||
marshalStrings(f.Tests), f.TestFilePath, f.FilePath, f.ContentHash, f.CreatedAt.Format(time.RFC3339), f.UpdatedAt.Format(time.RFC3339),
|
||||
marshalProps(f.Props), marshalStrings(f.Emits), hasState, f.Framework, marshalStrings(f.Variant),
|
||||
f.Notes, f.Documentation, f.Code,
|
||||
f.SourceRepo, f.SourceLicense, f.SourceFile,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
// InsertType inserts or replaces a type entry.
|
||||
func (db *DB) InsertType(t *Type) error {
|
||||
now := time.Now().UTC().Format(time.RFC3339)
|
||||
now := time.Now().UTC()
|
||||
if t.CreatedAt.IsZero() {
|
||||
t.CreatedAt = time.Now().UTC()
|
||||
t.CreatedAt = now
|
||||
}
|
||||
if t.UpdatedAt.IsZero() {
|
||||
t.UpdatedAt = now
|
||||
}
|
||||
t.UpdatedAt = time.Now().UTC()
|
||||
|
||||
if t.ID == "" {
|
||||
t.ID = GenerateID(t.Name, t.Lang, t.Domain)
|
||||
@@ -118,13 +125,15 @@ func (db *DB) InsertType(t *Type) error {
|
||||
INSERT OR REPLACE INTO types (
|
||||
id, name, lang, domain, version, algebraic,
|
||||
definition, description, tags, uses_types,
|
||||
file_path, created_at, updated_at,
|
||||
examples, notes, documentation, code
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
file_path, content_hash, created_at, updated_at,
|
||||
examples, notes, documentation, code,
|
||||
source_repo, source_license, source_file
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
t.ID, t.Name, t.Lang, t.Domain, t.Version, string(t.Algebraic),
|
||||
t.Definition, t.Description, marshalStrings(t.Tags), marshalStrings(t.UsesTypes),
|
||||
t.FilePath, t.CreatedAt.Format(time.RFC3339), now,
|
||||
t.FilePath, t.ContentHash, t.CreatedAt.Format(time.RFC3339), t.UpdatedAt.Format(time.RFC3339),
|
||||
t.Examples, t.Notes, t.Documentation, t.Code,
|
||||
t.SourceRepo, t.SourceLicense, t.SourceFile,
|
||||
)
|
||||
return err
|
||||
}
|
||||
@@ -263,11 +272,13 @@ func (db *DB) DeleteType(id string) error {
|
||||
|
||||
// InsertApp inserts or replaces an app entry.
|
||||
func (db *DB) InsertApp(a *App) error {
|
||||
now := time.Now().UTC().Format(time.RFC3339)
|
||||
now := time.Now().UTC()
|
||||
if a.CreatedAt.IsZero() {
|
||||
a.CreatedAt = time.Now().UTC()
|
||||
a.CreatedAt = now
|
||||
}
|
||||
if a.UpdatedAt.IsZero() {
|
||||
a.UpdatedAt = now
|
||||
}
|
||||
a.UpdatedAt = time.Now().UTC()
|
||||
|
||||
if a.ID == "" {
|
||||
a.ID = GenerateID(a.Name, a.Lang, a.Domain)
|
||||
@@ -277,11 +288,11 @@ func (db *DB) InsertApp(a *App) error {
|
||||
INSERT OR REPLACE INTO apps (
|
||||
id, name, lang, domain, description, tags,
|
||||
uses_functions, uses_types, framework, entry_point,
|
||||
documentation, notes, dir_path, created_at, updated_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
documentation, notes, dir_path, content_hash, created_at, updated_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
a.ID, a.Name, a.Lang, a.Domain, a.Description, marshalStrings(a.Tags),
|
||||
marshalStrings(a.UsesFunctions), marshalStrings(a.UsesTypes), a.Framework, a.EntryPoint,
|
||||
a.Documentation, a.Notes, a.DirPath, a.CreatedAt.Format(time.RFC3339), now,
|
||||
a.Documentation, a.Notes, a.DirPath, a.ContentHash, a.CreatedAt.Format(time.RFC3339), a.UpdatedAt.Format(time.RFC3339),
|
||||
)
|
||||
return err
|
||||
}
|
||||
@@ -347,7 +358,7 @@ func scanApps(rows interface{ Next() bool; Scan(...any) error }) ([]App, error)
|
||||
err := rows.Scan(
|
||||
&a.ID, &a.Name, &a.Lang, &a.Domain, &a.Description, &tagsJSON,
|
||||
&usesFnJSON, &usesTypJSON, &a.Framework, &a.EntryPoint,
|
||||
&a.Documentation, &a.Notes, &a.DirPath, &createdAt, &updatedAt,
|
||||
&a.Documentation, &a.Notes, &a.DirPath, &createdAt, &updatedAt, &a.ContentHash,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning app: %w", err)
|
||||
@@ -391,7 +402,8 @@ func scanFunctions(rows interface{ Next() bool; Scan(...any) error }) ([]Functio
|
||||
&f.ReturnsOptional, &f.ErrorType, &importsJSON, &f.Example, &f.Tested,
|
||||
&testsJSON, &f.TestFilePath, &f.FilePath, &createdAt, &updatedAt,
|
||||
&propsJSON, &emitsJSON, &hasState, &f.Framework, &variantJSON,
|
||||
&f.Notes, &f.Documentation, &f.Code,
|
||||
&f.Notes, &f.Documentation, &f.Code, &f.ContentHash,
|
||||
&f.SourceRepo, &f.SourceLicense, &f.SourceFile,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning function: %w", err)
|
||||
@@ -430,7 +442,8 @@ func scanTypes(rows interface{ Next() bool; Scan(...any) error }) ([]Type, error
|
||||
&t.ID, &t.Name, &t.Lang, &t.Domain, &t.Version, &t.Algebraic,
|
||||
&t.Definition, &t.Description, &tagsJSON, &usesTypJSON,
|
||||
&t.FilePath, &createdAt, &updatedAt,
|
||||
&t.Examples, &t.Notes, &t.Documentation, &t.Code,
|
||||
&t.Examples, &t.Notes, &t.Documentation, &t.Code, &t.ContentHash,
|
||||
&t.SourceRepo, &t.SourceLicense, &t.SourceFile,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning type: %w", err)
|
||||
|
||||
Reference in New Issue
Block a user