feat: indexer con validacion en dos pasadas y CLI con output de errores

Reescribe el indexer con estrategia de dos pasadas:
1. Parsea todos los .md y construye sets de IDs conocidos
2. Valida integridad contra IDs conocidos, inserta solo los validos

El CLI ahora muestra INVALID para errores de validacion y ERROR
para errores de insercion, separando claramente ambos.
Añade test de integracion que verifica que entradas invalidas
se rechazan sin afectar a las validas.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-28 02:13:44 +01:00
parent 08d141ea1b
commit 2d87d6affc
3 changed files with 119 additions and 41 deletions
+54 -40
View File
@@ -9,13 +9,16 @@ import (
// IndexResult holds stats from an indexing run.
type IndexResult struct {
Functions int
Types int
Errors []string
Functions int
Types int
ValidationErrors []string
Errors []string
}
// Index walks the registry root, parses all .md files, and populates the database.
// It purges existing data first to ensure a clean rebuild.
// Index walks the registry root, parses all .md files, validates integrity,
// and populates the database. It uses two passes:
// 1. Parse all entries and collect known IDs
// 2. Validate references against known IDs, then insert valid entries
func Index(db *DB, root string) (*IndexResult, error) {
if err := db.Purge(); err != nil {
return nil, fmt.Errorf("purging database: %w", err)
@@ -23,64 +26,75 @@ func Index(db *DB, root string) (*IndexResult, error) {
result := &IndexResult{}
// Index functions
// Pass 1: parse everything
var functions []*Function
var types []*Type
functionsDir := filepath.Join(root, "functions")
if _, err := os.Stat(functionsDir); err == nil {
err := filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
if info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
f, err := ParseFunctionMD(path)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err))
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err))
return nil
}
if err := db.InsertFunction(f); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
return nil
}
result.Functions++
functions = append(functions, f)
return nil
})
if err != nil {
return nil, fmt.Errorf("walking functions: %w", err)
}
}
// Index types
typesDir := filepath.Join(root, "types")
if _, err := os.Stat(typesDir); err == nil {
err := filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
if info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
t, err := ParseTypeMD(path)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err))
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err))
return nil
}
if err := db.InsertType(t); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
return nil
}
result.Types++
types = append(types, t)
return nil
})
if err != nil {
return nil, fmt.Errorf("walking types: %w", err)
}
// Build known ID sets
knownFunctions := make(map[string]bool, len(functions))
for _, f := range functions {
knownFunctions[f.ID] = true
}
knownTypes := make(map[string]bool, len(types))
for _, t := range types {
knownTypes[t.ID] = true
}
// Pass 2: validate and insert
for _, t := range types {
if verr := ValidateType(t, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
if err := db.InsertType(t); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
continue
}
result.Types++
}
for _, f := range functions {
if verr := ValidateFunction(f, knownFunctions, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
if err := db.InsertFunction(f); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
continue
}
result.Functions++
}
return result, nil
+61
View File
@@ -209,6 +209,9 @@ func TestIndexFullCycle(t *testing.T) {
if len(result.Errors) != 0 {
t.Errorf("unexpected errors: %v", result.Errors)
}
if len(result.ValidationErrors) != 0 {
t.Errorf("unexpected validation errors: %v", result.ValidationErrors)
}
// Verify searchable
fns, err := db.SearchFunctions("filter", "", "", "", "")
@@ -236,3 +239,61 @@ func TestIndexFullCycle(t *testing.T) {
t.Error("re-index should produce same counts")
}
}
const invalidPipelineMD = `---
name: bad_pipeline
kind: pipeline
lang: go
domain: core
version: "1.0.0"
purity: pure
description: "Pipeline puro sin uses_functions — debe fallar."
tags: []
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: false
tests: []
test_file_path: ""
file_path: "functions/pipelines/bad.go"
---
`
func TestIndexRejectsInvalid(t *testing.T) {
root := t.TempDir()
// Valid function
writeTempFile(t, root, "functions/core/filter_slice.md", functionMD)
// Invalid pipeline (pure + empty uses_functions)
writeTempFile(t, root, "functions/pipelines/bad.md", invalidPipelineMD)
// Valid type
writeTempFile(t, root, "types/finance/ohlcv.md", typeMD)
dbPath := filepath.Join(root, "registry.db")
db, err := Open(dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
result, err := Index(db, root)
if err != nil {
t.Fatal(err)
}
// Valid entries should be indexed
if result.Functions != 1 {
t.Errorf("functions: got %d, want 1 (only the valid one)", result.Functions)
}
if result.Types != 1 {
t.Errorf("types: got %d, want 1", result.Types)
}
// Invalid pipeline should produce validation error
if len(result.ValidationErrors) == 0 {
t.Error("expected validation errors for invalid pipeline")
}
}