From 2d87d6affc97973fc205f20c79584f57ccc0571c Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Sat, 28 Mar 2026 02:13:44 +0100 Subject: [PATCH] feat: indexer con validacion en dos pasadas y CLI con output de errores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reescribe el indexer con estrategia de dos pasadas: 1. Parsea todos los .md y construye sets de IDs conocidos 2. Valida integridad contra IDs conocidos, inserta solo los validos El CLI ahora muestra INVALID para errores de validacion y ERROR para errores de insercion, separando claramente ambos. Añade test de integracion que verifica que entradas invalidas se rechazan sin afectar a las validas. Co-Authored-By: Claude Opus 4.6 (1M context) --- cmd/fn/main.go | 5 ++- registry/indexer.go | 94 +++++++++++++++++++++++------------------ registry/parser_test.go | 61 ++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 41 deletions(-) diff --git a/cmd/fn/main.go b/cmd/fn/main.go index 74f836ea..a3b928bf 100644 --- a/cmd/fn/main.go +++ b/cmd/fn/main.go @@ -94,8 +94,11 @@ func cmdIndex() { } fmt.Printf("Indexed %d functions, %d types\n", result.Functions, result.Types) + for _, e := range result.ValidationErrors { + fmt.Fprintf(os.Stderr, " INVALID: %s\n", e) + } for _, e := range result.Errors { - fmt.Fprintf(os.Stderr, " warn: %s\n", e) + fmt.Fprintf(os.Stderr, " ERROR: %s\n", e) } } diff --git a/registry/indexer.go b/registry/indexer.go index b2602d1b..6aee63bf 100644 --- a/registry/indexer.go +++ b/registry/indexer.go @@ -9,13 +9,16 @@ import ( // IndexResult holds stats from an indexing run. type IndexResult struct { - Functions int - Types int - Errors []string + Functions int + Types int + ValidationErrors []string + Errors []string } -// Index walks the registry root, parses all .md files, and populates the database. -// It purges existing data first to ensure a clean rebuild. +// Index walks the registry root, parses all .md files, validates integrity, +// and populates the database. It uses two passes: +// 1. Parse all entries and collect known IDs +// 2. Validate references against known IDs, then insert valid entries func Index(db *DB, root string) (*IndexResult, error) { if err := db.Purge(); err != nil { return nil, fmt.Errorf("purging database: %w", err) @@ -23,64 +26,75 @@ func Index(db *DB, root string) (*IndexResult, error) { result := &IndexResult{} - // Index functions + // Pass 1: parse everything + var functions []*Function + var types []*Type + functionsDir := filepath.Join(root, "functions") if _, err := os.Stat(functionsDir); err == nil { - err := filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error { - if err != nil { + filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") { return nil } - if info.IsDir() || !strings.HasSuffix(path, ".md") { - return nil - } - f, err := ParseFunctionMD(path) if err != nil { - result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err)) + result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err)) return nil } - - if err := db.InsertFunction(f); err != nil { - result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err)) - return nil - } - - result.Functions++ + functions = append(functions, f) return nil }) - if err != nil { - return nil, fmt.Errorf("walking functions: %w", err) - } } - // Index types typesDir := filepath.Join(root, "types") if _, err := os.Stat(typesDir); err == nil { - err := filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error { - if err != nil { + filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") { return nil } - if info.IsDir() || !strings.HasSuffix(path, ".md") { - return nil - } - t, err := ParseTypeMD(path) if err != nil { - result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err)) + result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err)) return nil } - - if err := db.InsertType(t); err != nil { - result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err)) - return nil - } - - result.Types++ + types = append(types, t) return nil }) - if err != nil { - return nil, fmt.Errorf("walking types: %w", err) + } + + // Build known ID sets + knownFunctions := make(map[string]bool, len(functions)) + for _, f := range functions { + knownFunctions[f.ID] = true + } + knownTypes := make(map[string]bool, len(types)) + for _, t := range types { + knownTypes[t.ID] = true + } + + // Pass 2: validate and insert + for _, t := range types { + if verr := ValidateType(t, knownTypes); verr != nil { + result.ValidationErrors = append(result.ValidationErrors, verr.Error()) + continue } + if err := db.InsertType(t); err != nil { + result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err)) + continue + } + result.Types++ + } + + for _, f := range functions { + if verr := ValidateFunction(f, knownFunctions, knownTypes); verr != nil { + result.ValidationErrors = append(result.ValidationErrors, verr.Error()) + continue + } + if err := db.InsertFunction(f); err != nil { + result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err)) + continue + } + result.Functions++ } return result, nil diff --git a/registry/parser_test.go b/registry/parser_test.go index 2cbbbaf8..f393c13f 100644 --- a/registry/parser_test.go +++ b/registry/parser_test.go @@ -209,6 +209,9 @@ func TestIndexFullCycle(t *testing.T) { if len(result.Errors) != 0 { t.Errorf("unexpected errors: %v", result.Errors) } + if len(result.ValidationErrors) != 0 { + t.Errorf("unexpected validation errors: %v", result.ValidationErrors) + } // Verify searchable fns, err := db.SearchFunctions("filter", "", "", "", "") @@ -236,3 +239,61 @@ func TestIndexFullCycle(t *testing.T) { t.Error("re-index should produce same counts") } } + +const invalidPipelineMD = `--- +name: bad_pipeline +kind: pipeline +lang: go +domain: core +version: "1.0.0" +purity: pure +description: "Pipeline puro sin uses_functions — debe fallar." +tags: [] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/pipelines/bad.go" +--- +` + +func TestIndexRejectsInvalid(t *testing.T) { + root := t.TempDir() + + // Valid function + writeTempFile(t, root, "functions/core/filter_slice.md", functionMD) + // Invalid pipeline (pure + empty uses_functions) + writeTempFile(t, root, "functions/pipelines/bad.md", invalidPipelineMD) + // Valid type + writeTempFile(t, root, "types/finance/ohlcv.md", typeMD) + + dbPath := filepath.Join(root, "registry.db") + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + + result, err := Index(db, root) + if err != nil { + t.Fatal(err) + } + + // Valid entries should be indexed + if result.Functions != 1 { + t.Errorf("functions: got %d, want 1 (only the valid one)", result.Functions) + } + if result.Types != 1 { + t.Errorf("types: got %d, want 1", result.Types) + } + + // Invalid pipeline should produce validation error + if len(result.ValidationErrors) == 0 { + t.Error("expected validation errors for invalid pipeline") + } +}