diff --git a/cmd/fn/main.go b/cmd/fn/main.go index 74f836ea..a3b928bf 100644 --- a/cmd/fn/main.go +++ b/cmd/fn/main.go @@ -94,8 +94,11 @@ func cmdIndex() { } fmt.Printf("Indexed %d functions, %d types\n", result.Functions, result.Types) + for _, e := range result.ValidationErrors { + fmt.Fprintf(os.Stderr, " INVALID: %s\n", e) + } for _, e := range result.Errors { - fmt.Fprintf(os.Stderr, " warn: %s\n", e) + fmt.Fprintf(os.Stderr, " ERROR: %s\n", e) } } diff --git a/registry/indexer.go b/registry/indexer.go index b2602d1b..6aee63bf 100644 --- a/registry/indexer.go +++ b/registry/indexer.go @@ -9,13 +9,16 @@ import ( // IndexResult holds stats from an indexing run. type IndexResult struct { - Functions int - Types int - Errors []string + Functions int + Types int + ValidationErrors []string + Errors []string } -// Index walks the registry root, parses all .md files, and populates the database. -// It purges existing data first to ensure a clean rebuild. +// Index walks the registry root, parses all .md files, validates integrity, +// and populates the database. It uses two passes: +// 1. Parse all entries and collect known IDs +// 2. Validate references against known IDs, then insert valid entries func Index(db *DB, root string) (*IndexResult, error) { if err := db.Purge(); err != nil { return nil, fmt.Errorf("purging database: %w", err) @@ -23,64 +26,75 @@ func Index(db *DB, root string) (*IndexResult, error) { result := &IndexResult{} - // Index functions + // Pass 1: parse everything + var functions []*Function + var types []*Type + functionsDir := filepath.Join(root, "functions") if _, err := os.Stat(functionsDir); err == nil { - err := filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error { - if err != nil { + filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") { return nil } - if info.IsDir() || !strings.HasSuffix(path, ".md") { - return nil - } - f, err := ParseFunctionMD(path) if err != nil { - result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err)) + result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err)) return nil } - - if err := db.InsertFunction(f); err != nil { - result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err)) - return nil - } - - result.Functions++ + functions = append(functions, f) return nil }) - if err != nil { - return nil, fmt.Errorf("walking functions: %w", err) - } } - // Index types typesDir := filepath.Join(root, "types") if _, err := os.Stat(typesDir); err == nil { - err := filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error { - if err != nil { + filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") { return nil } - if info.IsDir() || !strings.HasSuffix(path, ".md") { - return nil - } - t, err := ParseTypeMD(path) if err != nil { - result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err)) + result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err)) return nil } - - if err := db.InsertType(t); err != nil { - result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err)) - return nil - } - - result.Types++ + types = append(types, t) return nil }) - if err != nil { - return nil, fmt.Errorf("walking types: %w", err) + } + + // Build known ID sets + knownFunctions := make(map[string]bool, len(functions)) + for _, f := range functions { + knownFunctions[f.ID] = true + } + knownTypes := make(map[string]bool, len(types)) + for _, t := range types { + knownTypes[t.ID] = true + } + + // Pass 2: validate and insert + for _, t := range types { + if verr := ValidateType(t, knownTypes); verr != nil { + result.ValidationErrors = append(result.ValidationErrors, verr.Error()) + continue } + if err := db.InsertType(t); err != nil { + result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err)) + continue + } + result.Types++ + } + + for _, f := range functions { + if verr := ValidateFunction(f, knownFunctions, knownTypes); verr != nil { + result.ValidationErrors = append(result.ValidationErrors, verr.Error()) + continue + } + if err := db.InsertFunction(f); err != nil { + result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err)) + continue + } + result.Functions++ } return result, nil diff --git a/registry/parser_test.go b/registry/parser_test.go index 2cbbbaf8..f393c13f 100644 --- a/registry/parser_test.go +++ b/registry/parser_test.go @@ -209,6 +209,9 @@ func TestIndexFullCycle(t *testing.T) { if len(result.Errors) != 0 { t.Errorf("unexpected errors: %v", result.Errors) } + if len(result.ValidationErrors) != 0 { + t.Errorf("unexpected validation errors: %v", result.ValidationErrors) + } // Verify searchable fns, err := db.SearchFunctions("filter", "", "", "", "") @@ -236,3 +239,61 @@ func TestIndexFullCycle(t *testing.T) { t.Error("re-index should produce same counts") } } + +const invalidPipelineMD = `--- +name: bad_pipeline +kind: pipeline +lang: go +domain: core +version: "1.0.0" +purity: pure +description: "Pipeline puro sin uses_functions — debe fallar." +tags: [] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: false +tests: [] +test_file_path: "" +file_path: "functions/pipelines/bad.go" +--- +` + +func TestIndexRejectsInvalid(t *testing.T) { + root := t.TempDir() + + // Valid function + writeTempFile(t, root, "functions/core/filter_slice.md", functionMD) + // Invalid pipeline (pure + empty uses_functions) + writeTempFile(t, root, "functions/pipelines/bad.md", invalidPipelineMD) + // Valid type + writeTempFile(t, root, "types/finance/ohlcv.md", typeMD) + + dbPath := filepath.Join(root, "registry.db") + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + + result, err := Index(db, root) + if err != nil { + t.Fatal(err) + } + + // Valid entries should be indexed + if result.Functions != 1 { + t.Errorf("functions: got %d, want 1 (only the valid one)", result.Functions) + } + if result.Types != 1 { + t.Errorf("types: got %d, want 1", result.Types) + } + + // Invalid pipeline should produce validation error + if len(result.ValidationErrors) == 0 { + t.Error("expected validation errors for invalid pipeline") + } +} diff --git a/registry/validate.go b/registry/validate.go new file mode 100644 index 00000000..4d1b5e57 --- /dev/null +++ b/registry/validate.go @@ -0,0 +1,167 @@ +package registry + +import ( + "fmt" + "strings" +) + +// ValidationError represents one or more integrity violations. +type ValidationError struct { + ID string + Errors []string +} + +func (v *ValidationError) Error() string { + return fmt.Sprintf("%s: %s", v.ID, strings.Join(v.Errors, "; ")) +} + +// ValidateFunction checks integrity rules from docs/integrity.md. +// knownFunctions and knownTypes are sets of IDs that exist in the registry +// (including the current indexing batch). +func ValidateFunction(f *Function, knownFunctions, knownTypes map[string]bool) *ValidationError { + var errs []string + + // Required fields + if f.Name == "" { + errs = append(errs, "name is required") + } + if f.Kind == "" { + errs = append(errs, "kind is required") + } + if f.Lang == "" { + errs = append(errs, "lang is required") + } + if f.Domain == "" { + errs = append(errs, "domain is required") + } + if f.Description == "" { + errs = append(errs, "description is required") + } + + // Pipeline rules + if f.Kind == KindPipeline { + if f.Purity != PurityImpure { + errs = append(errs, "pipeline must be impure") + } + if len(f.UsesFunctions) == 0 { + errs = append(errs, "pipeline uses_functions cannot be empty") + } + } + + // Purity rules + if f.Purity == PurityPure { + if f.ReturnsOptional { + errs = append(errs, "pure function cannot have returns_optional: true (model as sum type)") + } + if f.ErrorType != "" { + errs = append(errs, "pure function cannot have error_type") + } + } + if f.Purity == PurityImpure && f.Kind != KindComponent { + if f.ErrorType == "" { + errs = append(errs, "impure function must declare error_type") + } + } + + // Tested rules + if f.Tested { + if f.TestFilePath == "" { + errs = append(errs, "tested: true requires test_file_path") + } + if len(f.Tests) == 0 { + errs = append(errs, "tested: true requires non-empty tests") + } + } else { + if len(f.Tests) > 0 { + errs = append(errs, "tested: false but tests is not empty") + } + if f.TestFilePath != "" { + errs = append(errs, "tested: false but test_file_path is set") + } + } + + // Component rules + if f.Kind == KindComponent { + if f.Framework == "" { + errs = append(errs, "component must declare framework") + } + if len(f.Returns) > 0 { + errs = append(errs, "component returns must be empty (use emits)") + } + if f.HasState != nil && *f.HasState && f.Purity != PurityImpure { + errs = append(errs, "component with has_state: true must be impure") + } + } + + // File path must be relative + if f.FilePath != "" && strings.HasPrefix(f.FilePath, "/") { + errs = append(errs, "file_path must be relative to registry root") + } + + // Reference validation + for _, ref := range f.UsesFunctions { + if !knownFunctions[ref] { + errs = append(errs, fmt.Sprintf("uses_functions references unknown function: %s", ref)) + } + } + for _, ref := range f.UsesTypes { + if !knownTypes[ref] { + errs = append(errs, fmt.Sprintf("uses_types references unknown type: %s", ref)) + } + } + for _, ref := range f.Returns { + if !knownTypes[ref] { + errs = append(errs, fmt.Sprintf("returns references unknown type: %s", ref)) + } + } + if f.ErrorType != "" { + if !knownTypes[f.ErrorType] { + errs = append(errs, fmt.Sprintf("error_type references unknown type: %s", f.ErrorType)) + } + } + + if len(errs) > 0 { + return &ValidationError{ID: f.ID, Errors: errs} + } + return nil +} + +// ValidateType checks integrity rules for types. +func ValidateType(t *Type, knownTypes map[string]bool) *ValidationError { + var errs []string + + if t.Name == "" { + errs = append(errs, "name is required") + } + if t.Lang == "" { + errs = append(errs, "lang is required") + } + if t.Domain == "" { + errs = append(errs, "domain is required") + } + if t.Description == "" { + errs = append(errs, "description is required") + } + if t.Algebraic != AlgebraicProduct && t.Algebraic != AlgebraicSum { + errs = append(errs, fmt.Sprintf("algebraic must be 'product' or 'sum', got %q", t.Algebraic)) + } + + if t.FilePath != "" && strings.HasPrefix(t.FilePath, "/") { + errs = append(errs, "file_path must be relative to registry root") + } + + // Self-reference check + for _, ref := range t.UsesTypes { + if ref == t.ID { + errs = append(errs, "type cannot reference itself in uses_types") + } + if !knownTypes[ref] { + errs = append(errs, fmt.Sprintf("uses_types references unknown type: %s", ref)) + } + } + + if len(errs) > 0 { + return &ValidationError{ID: t.ID, Errors: errs} + } + return nil +} diff --git a/registry/validate_test.go b/registry/validate_test.go new file mode 100644 index 00000000..92c0d7d9 --- /dev/null +++ b/registry/validate_test.go @@ -0,0 +1,258 @@ +package registry + +import ( + "strings" + "testing" +) + +func boolPtr(b bool) *bool { return &b } + +func knownFns(ids ...string) map[string]bool { + m := make(map[string]bool) + for _, id := range ids { + m[id] = true + } + return m +} + +func knownTps(ids ...string) map[string]bool { + return knownFns(ids...) +} + +func TestValidateFunction_Valid(t *testing.T) { + f := &Function{ + ID: "filter_slice_go_core", Name: "filter_slice", Kind: KindFunction, + Lang: "go", Domain: "core", Purity: PurityPure, + Description: "Filtra un slice", Version: "1.0.0", + } + if err := ValidateFunction(f, knownFns(), knownTps()); err != nil { + t.Errorf("expected valid, got: %v", err) + } +} + +func TestValidateFunction_PipelineMustBeImpure(t *testing.T) { + f := &Function{ + ID: "p_go_core", Name: "p", Kind: KindPipeline, + Lang: "go", Domain: "core", Purity: PurityPure, + Description: "bad pipeline", Version: "1.0.0", + UsesFunctions: []string{"filter_slice_go_core"}, + } + err := ValidateFunction(f, knownFns("filter_slice_go_core"), knownTps()) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "pipeline must be impure") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFunction_PipelineNeedsUsesFunctions(t *testing.T) { + f := &Function{ + ID: "p_go_core", Name: "p", Kind: KindPipeline, + Lang: "go", Domain: "core", Purity: PurityImpure, + Description: "bad pipeline", Version: "1.0.0", + ErrorType: "error_go_core", + } + err := ValidateFunction(f, knownFns(), knownTps("error_go_core")) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "uses_functions cannot be empty") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFunction_PureNoReturnsOptional(t *testing.T) { + f := &Function{ + ID: "f_go_core", Name: "f", Kind: KindFunction, + Lang: "go", Domain: "core", Purity: PurityPure, + Description: "bad", Version: "1.0.0", + ReturnsOptional: true, + } + err := ValidateFunction(f, knownFns(), knownTps()) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "returns_optional") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFunction_PureNoErrorType(t *testing.T) { + f := &Function{ + ID: "f_go_core", Name: "f", Kind: KindFunction, + Lang: "go", Domain: "core", Purity: PurityPure, + Description: "bad", Version: "1.0.0", + ErrorType: "error_go_core", + } + err := ValidateFunction(f, knownFns(), knownTps("error_go_core")) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "pure function cannot have error_type") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFunction_ImpureNeedsErrorType(t *testing.T) { + f := &Function{ + ID: "f_go_io", Name: "f", Kind: KindFunction, + Lang: "go", Domain: "io", Purity: PurityImpure, + Description: "bad", Version: "1.0.0", + } + err := ValidateFunction(f, knownFns(), knownTps()) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "impure function must declare error_type") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFunction_TestedNeedsTestFile(t *testing.T) { + f := &Function{ + ID: "f_go_core", Name: "f", Kind: KindFunction, + Lang: "go", Domain: "core", Purity: PurityPure, + Description: "test", Version: "1.0.0", + Tested: true, + } + err := ValidateFunction(f, knownFns(), knownTps()) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "test_file_path") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFunction_NotTestedNoTests(t *testing.T) { + f := &Function{ + ID: "f_go_core", Name: "f", Kind: KindFunction, + Lang: "go", Domain: "core", Purity: PurityPure, + Description: "test", Version: "1.0.0", + Tested: false, Tests: []string{"ghost test"}, TestFilePath: "test.go", + } + err := ValidateFunction(f, knownFns(), knownTps()) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "tested: false but tests is not empty") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFunction_OrphanRefs(t *testing.T) { + f := &Function{ + ID: "p_go_core", Name: "p", Kind: KindPipeline, + Lang: "go", Domain: "core", Purity: PurityImpure, + Description: "pipeline", Version: "1.0.0", + UsesFunctions: []string{"nonexistent_go_core"}, + UsesTypes: []string{"ghost_go_core"}, + Returns: []string{"phantom_go_core"}, + ErrorType: "missing_go_core", + } + err := ValidateFunction(f, knownFns(), knownTps()) + if err == nil { + t.Fatal("expected error") + } + if len(err.Errors) < 4 { + t.Errorf("expected at least 4 errors, got %d: %v", len(err.Errors), err) + } +} + +func TestValidateFunction_ComponentRules(t *testing.T) { + f := &Function{ + ID: "dt_typescript_core", Name: "DataTable", Kind: KindComponent, + Lang: "typescript", Domain: "core", Purity: PurityImpure, + Description: "table", Version: "1.0.0", + HasState: boolPtr(true), Framework: "react", + } + if err := ValidateFunction(f, knownFns(), knownTps()); err != nil { + t.Errorf("expected valid, got: %v", err) + } + + // Missing framework + f2 := *f + f2.Framework = "" + if err := ValidateFunction(&f2, knownFns(), knownTps()); err == nil { + t.Error("expected error for missing framework") + } + + // Returns should be empty + f3 := *f + f3.Returns = []string{"some_go_core"} + if err := ValidateFunction(&f3, knownFns(), knownTps("some_go_core")); err == nil { + t.Error("expected error for non-empty returns on component") + } + + // has_state: true but pure + f4 := *f + f4.Purity = PurityPure + if err := ValidateFunction(&f4, knownFns(), knownTps()); err == nil { + t.Error("expected error for stateful pure component") + } +} + +func TestValidateFunction_AbsoluteFilePath(t *testing.T) { + f := &Function{ + ID: "f_go_core", Name: "f", Kind: KindFunction, + Lang: "go", Domain: "core", Purity: PurityPure, + Description: "test", Version: "1.0.0", + FilePath: "/absolute/path.go", + } + err := ValidateFunction(f, knownFns(), knownTps()) + if err == nil { + t.Fatal("expected error for absolute file_path") + } +} + +func TestValidateType_Valid(t *testing.T) { + typ := &Type{ + ID: "ohlcv_go_finance", Name: "ohlcv", Lang: "go", Domain: "finance", + Algebraic: AlgebraicProduct, Description: "candle", Version: "1.0.0", + } + if err := ValidateType(typ, knownTps("ohlcv_go_finance")); err != nil { + t.Errorf("expected valid, got: %v", err) + } +} + +func TestValidateType_BadAlgebraic(t *testing.T) { + typ := &Type{ + ID: "t_go_core", Name: "t", Lang: "go", Domain: "core", + Algebraic: "wrong", Description: "bad", Version: "1.0.0", + } + err := ValidateType(typ, knownTps("t_go_core")) + if err == nil { + t.Fatal("expected error") + } +} + +func TestValidateType_SelfReference(t *testing.T) { + typ := &Type{ + ID: "t_go_core", Name: "t", Lang: "go", Domain: "core", + Algebraic: AlgebraicProduct, Description: "self ref", Version: "1.0.0", + UsesTypes: []string{"t_go_core"}, + } + err := ValidateType(typ, knownTps("t_go_core")) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "cannot reference itself") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateType_OrphanRef(t *testing.T) { + typ := &Type{ + ID: "t_go_core", Name: "t", Lang: "go", Domain: "core", + Algebraic: AlgebraicProduct, Description: "orphan ref", Version: "1.0.0", + UsesTypes: []string{"nonexistent_go_core"}, + } + err := ValidateType(typ, knownTps("t_go_core")) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "unknown type") { + t.Errorf("unexpected error: %v", err) + } +}