feat: indexer con validacion en dos pasadas y CLI con output de errores
Reescribe el indexer con estrategia de dos pasadas: 1. Parsea todos los .md y construye sets de IDs conocidos 2. Valida integridad contra IDs conocidos, inserta solo los validos El CLI ahora muestra INVALID para errores de validacion y ERROR para errores de insercion, separando claramente ambos. Añade test de integracion que verifica que entradas invalidas se rechazan sin afectar a las validas. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+54
-40
@@ -9,13 +9,16 @@ import (
|
||||
|
||||
// IndexResult holds stats from an indexing run.
|
||||
type IndexResult struct {
|
||||
Functions int
|
||||
Types int
|
||||
Errors []string
|
||||
Functions int
|
||||
Types int
|
||||
ValidationErrors []string
|
||||
Errors []string
|
||||
}
|
||||
|
||||
// Index walks the registry root, parses all .md files, and populates the database.
|
||||
// It purges existing data first to ensure a clean rebuild.
|
||||
// Index walks the registry root, parses all .md files, validates integrity,
|
||||
// and populates the database. It uses two passes:
|
||||
// 1. Parse all entries and collect known IDs
|
||||
// 2. Validate references against known IDs, then insert valid entries
|
||||
func Index(db *DB, root string) (*IndexResult, error) {
|
||||
if err := db.Purge(); err != nil {
|
||||
return nil, fmt.Errorf("purging database: %w", err)
|
||||
@@ -23,64 +26,75 @@ func Index(db *DB, root string) (*IndexResult, error) {
|
||||
|
||||
result := &IndexResult{}
|
||||
|
||||
// Index functions
|
||||
// Pass 1: parse everything
|
||||
var functions []*Function
|
||||
var types []*Type
|
||||
|
||||
functionsDir := filepath.Join(root, "functions")
|
||||
if _, err := os.Stat(functionsDir); err == nil {
|
||||
err := filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") {
|
||||
return nil
|
||||
}
|
||||
if info.IsDir() || !strings.HasSuffix(path, ".md") {
|
||||
return nil
|
||||
}
|
||||
|
||||
f, err := ParseFunctionMD(path)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err))
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err))
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := db.InsertFunction(f); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
|
||||
return nil
|
||||
}
|
||||
|
||||
result.Functions++
|
||||
functions = append(functions, f)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("walking functions: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Index types
|
||||
typesDir := filepath.Join(root, "types")
|
||||
if _, err := os.Stat(typesDir); err == nil {
|
||||
err := filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") {
|
||||
return nil
|
||||
}
|
||||
if info.IsDir() || !strings.HasSuffix(path, ".md") {
|
||||
return nil
|
||||
}
|
||||
|
||||
t, err := ParseTypeMD(path)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err))
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err))
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := db.InsertType(t); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
|
||||
return nil
|
||||
}
|
||||
|
||||
result.Types++
|
||||
types = append(types, t)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("walking types: %w", err)
|
||||
}
|
||||
|
||||
// Build known ID sets
|
||||
knownFunctions := make(map[string]bool, len(functions))
|
||||
for _, f := range functions {
|
||||
knownFunctions[f.ID] = true
|
||||
}
|
||||
knownTypes := make(map[string]bool, len(types))
|
||||
for _, t := range types {
|
||||
knownTypes[t.ID] = true
|
||||
}
|
||||
|
||||
// Pass 2: validate and insert
|
||||
for _, t := range types {
|
||||
if verr := ValidateType(t, knownTypes); verr != nil {
|
||||
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
|
||||
continue
|
||||
}
|
||||
if err := db.InsertType(t); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
|
||||
continue
|
||||
}
|
||||
result.Types++
|
||||
}
|
||||
|
||||
for _, f := range functions {
|
||||
if verr := ValidateFunction(f, knownFunctions, knownTypes); verr != nil {
|
||||
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
|
||||
continue
|
||||
}
|
||||
if err := db.InsertFunction(f); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
|
||||
continue
|
||||
}
|
||||
result.Functions++
|
||||
}
|
||||
|
||||
return result, nil
|
||||
|
||||
@@ -209,6 +209,9 @@ func TestIndexFullCycle(t *testing.T) {
|
||||
if len(result.Errors) != 0 {
|
||||
t.Errorf("unexpected errors: %v", result.Errors)
|
||||
}
|
||||
if len(result.ValidationErrors) != 0 {
|
||||
t.Errorf("unexpected validation errors: %v", result.ValidationErrors)
|
||||
}
|
||||
|
||||
// Verify searchable
|
||||
fns, err := db.SearchFunctions("filter", "", "", "", "")
|
||||
@@ -236,3 +239,61 @@ func TestIndexFullCycle(t *testing.T) {
|
||||
t.Error("re-index should produce same counts")
|
||||
}
|
||||
}
|
||||
|
||||
const invalidPipelineMD = `---
|
||||
name: bad_pipeline
|
||||
kind: pipeline
|
||||
lang: go
|
||||
domain: core
|
||||
version: "1.0.0"
|
||||
purity: pure
|
||||
description: "Pipeline puro sin uses_functions — debe fallar."
|
||||
tags: []
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
returns: []
|
||||
returns_optional: false
|
||||
error_type: ""
|
||||
imports: []
|
||||
tested: false
|
||||
tests: []
|
||||
test_file_path: ""
|
||||
file_path: "functions/pipelines/bad.go"
|
||||
---
|
||||
`
|
||||
|
||||
func TestIndexRejectsInvalid(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
|
||||
// Valid function
|
||||
writeTempFile(t, root, "functions/core/filter_slice.md", functionMD)
|
||||
// Invalid pipeline (pure + empty uses_functions)
|
||||
writeTempFile(t, root, "functions/pipelines/bad.md", invalidPipelineMD)
|
||||
// Valid type
|
||||
writeTempFile(t, root, "types/finance/ohlcv.md", typeMD)
|
||||
|
||||
dbPath := filepath.Join(root, "registry.db")
|
||||
db, err := Open(dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
result, err := Index(db, root)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Valid entries should be indexed
|
||||
if result.Functions != 1 {
|
||||
t.Errorf("functions: got %d, want 1 (only the valid one)", result.Functions)
|
||||
}
|
||||
if result.Types != 1 {
|
||||
t.Errorf("types: got %d, want 1", result.Types)
|
||||
}
|
||||
|
||||
// Invalid pipeline should produce validation error
|
||||
if len(result.ValidationErrors) == 0 {
|
||||
t.Error("expected validation errors for invalid pipeline")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user