feat: indexer con validacion en dos pasadas y CLI con output de errores

Reescribe el indexer con estrategia de dos pasadas:
1. Parsea todos los .md y construye sets de IDs conocidos
2. Valida integridad contra IDs conocidos, inserta solo los validos

El CLI ahora muestra INVALID para errores de validacion y ERROR
para errores de insercion, separando claramente ambos.
Añade test de integracion que verifica que entradas invalidas
se rechazan sin afectar a las validas.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-28 02:13:44 +01:00
parent 08d141ea1b
commit 2d87d6affc
3 changed files with 119 additions and 41 deletions
+61
View File
@@ -209,6 +209,9 @@ func TestIndexFullCycle(t *testing.T) {
if len(result.Errors) != 0 {
t.Errorf("unexpected errors: %v", result.Errors)
}
if len(result.ValidationErrors) != 0 {
t.Errorf("unexpected validation errors: %v", result.ValidationErrors)
}
// Verify searchable
fns, err := db.SearchFunctions("filter", "", "", "", "")
@@ -236,3 +239,61 @@ func TestIndexFullCycle(t *testing.T) {
t.Error("re-index should produce same counts")
}
}
const invalidPipelineMD = `---
name: bad_pipeline
kind: pipeline
lang: go
domain: core
version: "1.0.0"
purity: pure
description: "Pipeline puro sin uses_functions — debe fallar."
tags: []
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: false
tests: []
test_file_path: ""
file_path: "functions/pipelines/bad.go"
---
`
func TestIndexRejectsInvalid(t *testing.T) {
root := t.TempDir()
// Valid function
writeTempFile(t, root, "functions/core/filter_slice.md", functionMD)
// Invalid pipeline (pure + empty uses_functions)
writeTempFile(t, root, "functions/pipelines/bad.md", invalidPipelineMD)
// Valid type
writeTempFile(t, root, "types/finance/ohlcv.md", typeMD)
dbPath := filepath.Join(root, "registry.db")
db, err := Open(dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
result, err := Index(db, root)
if err != nil {
t.Fatal(err)
}
// Valid entries should be indexed
if result.Functions != 1 {
t.Errorf("functions: got %d, want 1 (only the valid one)", result.Functions)
}
if result.Types != 1 {
t.Errorf("types: got %d, want 1", result.Types)
}
// Invalid pipeline should produce validation error
if len(result.ValidationErrors) == 0 {
t.Error("expected validation errors for invalid pipeline")
}
}