merge: quick/fase6-integrity — validacion de integridad al indexar

This commit is contained in:
2026-03-28 02:13:55 +01:00
5 changed files with 544 additions and 41 deletions
+4 -1
View File
@@ -94,8 +94,11 @@ func cmdIndex() {
}
fmt.Printf("Indexed %d functions, %d types\n", result.Functions, result.Types)
for _, e := range result.ValidationErrors {
fmt.Fprintf(os.Stderr, " INVALID: %s\n", e)
}
for _, e := range result.Errors {
fmt.Fprintf(os.Stderr, " warn: %s\n", e)
fmt.Fprintf(os.Stderr, " ERROR: %s\n", e)
}
}
+54 -40
View File
@@ -9,13 +9,16 @@ import (
// IndexResult holds stats from an indexing run.
type IndexResult struct {
Functions int
Types int
Errors []string
Functions int
Types int
ValidationErrors []string
Errors []string
}
// Index walks the registry root, parses all .md files, and populates the database.
// It purges existing data first to ensure a clean rebuild.
// Index walks the registry root, parses all .md files, validates integrity,
// and populates the database. It uses two passes:
// 1. Parse all entries and collect known IDs
// 2. Validate references against known IDs, then insert valid entries
func Index(db *DB, root string) (*IndexResult, error) {
if err := db.Purge(); err != nil {
return nil, fmt.Errorf("purging database: %w", err)
@@ -23,64 +26,75 @@ func Index(db *DB, root string) (*IndexResult, error) {
result := &IndexResult{}
// Index functions
// Pass 1: parse everything
var functions []*Function
var types []*Type
functionsDir := filepath.Join(root, "functions")
if _, err := os.Stat(functionsDir); err == nil {
err := filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
filepath.Walk(functionsDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
if info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
f, err := ParseFunctionMD(path)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err))
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err))
return nil
}
if err := db.InsertFunction(f); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
return nil
}
result.Functions++
functions = append(functions, f)
return nil
})
if err != nil {
return nil, fmt.Errorf("walking functions: %w", err)
}
}
// Index types
typesDir := filepath.Join(root, "types")
if _, err := os.Stat(typesDir); err == nil {
err := filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
filepath.Walk(typesDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
if info.IsDir() || !strings.HasSuffix(path, ".md") {
return nil
}
t, err := ParseTypeMD(path)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", path, err))
result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err))
return nil
}
if err := db.InsertType(t); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
return nil
}
result.Types++
types = append(types, t)
return nil
})
if err != nil {
return nil, fmt.Errorf("walking types: %w", err)
}
// Build known ID sets
knownFunctions := make(map[string]bool, len(functions))
for _, f := range functions {
knownFunctions[f.ID] = true
}
knownTypes := make(map[string]bool, len(types))
for _, t := range types {
knownTypes[t.ID] = true
}
// Pass 2: validate and insert
for _, t := range types {
if verr := ValidateType(t, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
if err := db.InsertType(t); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err))
continue
}
result.Types++
}
for _, f := range functions {
if verr := ValidateFunction(f, knownFunctions, knownTypes); verr != nil {
result.ValidationErrors = append(result.ValidationErrors, verr.Error())
continue
}
if err := db.InsertFunction(f); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err))
continue
}
result.Functions++
}
return result, nil
+61
View File
@@ -209,6 +209,9 @@ func TestIndexFullCycle(t *testing.T) {
if len(result.Errors) != 0 {
t.Errorf("unexpected errors: %v", result.Errors)
}
if len(result.ValidationErrors) != 0 {
t.Errorf("unexpected validation errors: %v", result.ValidationErrors)
}
// Verify searchable
fns, err := db.SearchFunctions("filter", "", "", "", "")
@@ -236,3 +239,61 @@ func TestIndexFullCycle(t *testing.T) {
t.Error("re-index should produce same counts")
}
}
const invalidPipelineMD = `---
name: bad_pipeline
kind: pipeline
lang: go
domain: core
version: "1.0.0"
purity: pure
description: "Pipeline puro sin uses_functions — debe fallar."
tags: []
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: false
tests: []
test_file_path: ""
file_path: "functions/pipelines/bad.go"
---
`
func TestIndexRejectsInvalid(t *testing.T) {
root := t.TempDir()
// Valid function
writeTempFile(t, root, "functions/core/filter_slice.md", functionMD)
// Invalid pipeline (pure + empty uses_functions)
writeTempFile(t, root, "functions/pipelines/bad.md", invalidPipelineMD)
// Valid type
writeTempFile(t, root, "types/finance/ohlcv.md", typeMD)
dbPath := filepath.Join(root, "registry.db")
db, err := Open(dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
result, err := Index(db, root)
if err != nil {
t.Fatal(err)
}
// Valid entries should be indexed
if result.Functions != 1 {
t.Errorf("functions: got %d, want 1 (only the valid one)", result.Functions)
}
if result.Types != 1 {
t.Errorf("types: got %d, want 1", result.Types)
}
// Invalid pipeline should produce validation error
if len(result.ValidationErrors) == 0 {
t.Error("expected validation errors for invalid pipeline")
}
}
+167
View File
@@ -0,0 +1,167 @@
package registry
import (
"fmt"
"strings"
)
// ValidationError represents one or more integrity violations.
type ValidationError struct {
ID string
Errors []string
}
func (v *ValidationError) Error() string {
return fmt.Sprintf("%s: %s", v.ID, strings.Join(v.Errors, "; "))
}
// ValidateFunction checks integrity rules from docs/integrity.md.
// knownFunctions and knownTypes are sets of IDs that exist in the registry
// (including the current indexing batch).
func ValidateFunction(f *Function, knownFunctions, knownTypes map[string]bool) *ValidationError {
var errs []string
// Required fields
if f.Name == "" {
errs = append(errs, "name is required")
}
if f.Kind == "" {
errs = append(errs, "kind is required")
}
if f.Lang == "" {
errs = append(errs, "lang is required")
}
if f.Domain == "" {
errs = append(errs, "domain is required")
}
if f.Description == "" {
errs = append(errs, "description is required")
}
// Pipeline rules
if f.Kind == KindPipeline {
if f.Purity != PurityImpure {
errs = append(errs, "pipeline must be impure")
}
if len(f.UsesFunctions) == 0 {
errs = append(errs, "pipeline uses_functions cannot be empty")
}
}
// Purity rules
if f.Purity == PurityPure {
if f.ReturnsOptional {
errs = append(errs, "pure function cannot have returns_optional: true (model as sum type)")
}
if f.ErrorType != "" {
errs = append(errs, "pure function cannot have error_type")
}
}
if f.Purity == PurityImpure && f.Kind != KindComponent {
if f.ErrorType == "" {
errs = append(errs, "impure function must declare error_type")
}
}
// Tested rules
if f.Tested {
if f.TestFilePath == "" {
errs = append(errs, "tested: true requires test_file_path")
}
if len(f.Tests) == 0 {
errs = append(errs, "tested: true requires non-empty tests")
}
} else {
if len(f.Tests) > 0 {
errs = append(errs, "tested: false but tests is not empty")
}
if f.TestFilePath != "" {
errs = append(errs, "tested: false but test_file_path is set")
}
}
// Component rules
if f.Kind == KindComponent {
if f.Framework == "" {
errs = append(errs, "component must declare framework")
}
if len(f.Returns) > 0 {
errs = append(errs, "component returns must be empty (use emits)")
}
if f.HasState != nil && *f.HasState && f.Purity != PurityImpure {
errs = append(errs, "component with has_state: true must be impure")
}
}
// File path must be relative
if f.FilePath != "" && strings.HasPrefix(f.FilePath, "/") {
errs = append(errs, "file_path must be relative to registry root")
}
// Reference validation
for _, ref := range f.UsesFunctions {
if !knownFunctions[ref] {
errs = append(errs, fmt.Sprintf("uses_functions references unknown function: %s", ref))
}
}
for _, ref := range f.UsesTypes {
if !knownTypes[ref] {
errs = append(errs, fmt.Sprintf("uses_types references unknown type: %s", ref))
}
}
for _, ref := range f.Returns {
if !knownTypes[ref] {
errs = append(errs, fmt.Sprintf("returns references unknown type: %s", ref))
}
}
if f.ErrorType != "" {
if !knownTypes[f.ErrorType] {
errs = append(errs, fmt.Sprintf("error_type references unknown type: %s", f.ErrorType))
}
}
if len(errs) > 0 {
return &ValidationError{ID: f.ID, Errors: errs}
}
return nil
}
// ValidateType checks integrity rules for types.
func ValidateType(t *Type, knownTypes map[string]bool) *ValidationError {
var errs []string
if t.Name == "" {
errs = append(errs, "name is required")
}
if t.Lang == "" {
errs = append(errs, "lang is required")
}
if t.Domain == "" {
errs = append(errs, "domain is required")
}
if t.Description == "" {
errs = append(errs, "description is required")
}
if t.Algebraic != AlgebraicProduct && t.Algebraic != AlgebraicSum {
errs = append(errs, fmt.Sprintf("algebraic must be 'product' or 'sum', got %q", t.Algebraic))
}
if t.FilePath != "" && strings.HasPrefix(t.FilePath, "/") {
errs = append(errs, "file_path must be relative to registry root")
}
// Self-reference check
for _, ref := range t.UsesTypes {
if ref == t.ID {
errs = append(errs, "type cannot reference itself in uses_types")
}
if !knownTypes[ref] {
errs = append(errs, fmt.Sprintf("uses_types references unknown type: %s", ref))
}
}
if len(errs) > 0 {
return &ValidationError{ID: t.ID, Errors: errs}
}
return nil
}
+258
View File
@@ -0,0 +1,258 @@
package registry
import (
"strings"
"testing"
)
func boolPtr(b bool) *bool { return &b }
func knownFns(ids ...string) map[string]bool {
m := make(map[string]bool)
for _, id := range ids {
m[id] = true
}
return m
}
func knownTps(ids ...string) map[string]bool {
return knownFns(ids...)
}
func TestValidateFunction_Valid(t *testing.T) {
f := &Function{
ID: "filter_slice_go_core", Name: "filter_slice", Kind: KindFunction,
Lang: "go", Domain: "core", Purity: PurityPure,
Description: "Filtra un slice", Version: "1.0.0",
}
if err := ValidateFunction(f, knownFns(), knownTps()); err != nil {
t.Errorf("expected valid, got: %v", err)
}
}
func TestValidateFunction_PipelineMustBeImpure(t *testing.T) {
f := &Function{
ID: "p_go_core", Name: "p", Kind: KindPipeline,
Lang: "go", Domain: "core", Purity: PurityPure,
Description: "bad pipeline", Version: "1.0.0",
UsesFunctions: []string{"filter_slice_go_core"},
}
err := ValidateFunction(f, knownFns("filter_slice_go_core"), knownTps())
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "pipeline must be impure") {
t.Errorf("unexpected error: %v", err)
}
}
func TestValidateFunction_PipelineNeedsUsesFunctions(t *testing.T) {
f := &Function{
ID: "p_go_core", Name: "p", Kind: KindPipeline,
Lang: "go", Domain: "core", Purity: PurityImpure,
Description: "bad pipeline", Version: "1.0.0",
ErrorType: "error_go_core",
}
err := ValidateFunction(f, knownFns(), knownTps("error_go_core"))
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "uses_functions cannot be empty") {
t.Errorf("unexpected error: %v", err)
}
}
func TestValidateFunction_PureNoReturnsOptional(t *testing.T) {
f := &Function{
ID: "f_go_core", Name: "f", Kind: KindFunction,
Lang: "go", Domain: "core", Purity: PurityPure,
Description: "bad", Version: "1.0.0",
ReturnsOptional: true,
}
err := ValidateFunction(f, knownFns(), knownTps())
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "returns_optional") {
t.Errorf("unexpected error: %v", err)
}
}
func TestValidateFunction_PureNoErrorType(t *testing.T) {
f := &Function{
ID: "f_go_core", Name: "f", Kind: KindFunction,
Lang: "go", Domain: "core", Purity: PurityPure,
Description: "bad", Version: "1.0.0",
ErrorType: "error_go_core",
}
err := ValidateFunction(f, knownFns(), knownTps("error_go_core"))
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "pure function cannot have error_type") {
t.Errorf("unexpected error: %v", err)
}
}
func TestValidateFunction_ImpureNeedsErrorType(t *testing.T) {
f := &Function{
ID: "f_go_io", Name: "f", Kind: KindFunction,
Lang: "go", Domain: "io", Purity: PurityImpure,
Description: "bad", Version: "1.0.0",
}
err := ValidateFunction(f, knownFns(), knownTps())
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "impure function must declare error_type") {
t.Errorf("unexpected error: %v", err)
}
}
func TestValidateFunction_TestedNeedsTestFile(t *testing.T) {
f := &Function{
ID: "f_go_core", Name: "f", Kind: KindFunction,
Lang: "go", Domain: "core", Purity: PurityPure,
Description: "test", Version: "1.0.0",
Tested: true,
}
err := ValidateFunction(f, knownFns(), knownTps())
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "test_file_path") {
t.Errorf("unexpected error: %v", err)
}
}
func TestValidateFunction_NotTestedNoTests(t *testing.T) {
f := &Function{
ID: "f_go_core", Name: "f", Kind: KindFunction,
Lang: "go", Domain: "core", Purity: PurityPure,
Description: "test", Version: "1.0.0",
Tested: false, Tests: []string{"ghost test"}, TestFilePath: "test.go",
}
err := ValidateFunction(f, knownFns(), knownTps())
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "tested: false but tests is not empty") {
t.Errorf("unexpected error: %v", err)
}
}
func TestValidateFunction_OrphanRefs(t *testing.T) {
f := &Function{
ID: "p_go_core", Name: "p", Kind: KindPipeline,
Lang: "go", Domain: "core", Purity: PurityImpure,
Description: "pipeline", Version: "1.0.0",
UsesFunctions: []string{"nonexistent_go_core"},
UsesTypes: []string{"ghost_go_core"},
Returns: []string{"phantom_go_core"},
ErrorType: "missing_go_core",
}
err := ValidateFunction(f, knownFns(), knownTps())
if err == nil {
t.Fatal("expected error")
}
if len(err.Errors) < 4 {
t.Errorf("expected at least 4 errors, got %d: %v", len(err.Errors), err)
}
}
func TestValidateFunction_ComponentRules(t *testing.T) {
f := &Function{
ID: "dt_typescript_core", Name: "DataTable", Kind: KindComponent,
Lang: "typescript", Domain: "core", Purity: PurityImpure,
Description: "table", Version: "1.0.0",
HasState: boolPtr(true), Framework: "react",
}
if err := ValidateFunction(f, knownFns(), knownTps()); err != nil {
t.Errorf("expected valid, got: %v", err)
}
// Missing framework
f2 := *f
f2.Framework = ""
if err := ValidateFunction(&f2, knownFns(), knownTps()); err == nil {
t.Error("expected error for missing framework")
}
// Returns should be empty
f3 := *f
f3.Returns = []string{"some_go_core"}
if err := ValidateFunction(&f3, knownFns(), knownTps("some_go_core")); err == nil {
t.Error("expected error for non-empty returns on component")
}
// has_state: true but pure
f4 := *f
f4.Purity = PurityPure
if err := ValidateFunction(&f4, knownFns(), knownTps()); err == nil {
t.Error("expected error for stateful pure component")
}
}
func TestValidateFunction_AbsoluteFilePath(t *testing.T) {
f := &Function{
ID: "f_go_core", Name: "f", Kind: KindFunction,
Lang: "go", Domain: "core", Purity: PurityPure,
Description: "test", Version: "1.0.0",
FilePath: "/absolute/path.go",
}
err := ValidateFunction(f, knownFns(), knownTps())
if err == nil {
t.Fatal("expected error for absolute file_path")
}
}
func TestValidateType_Valid(t *testing.T) {
typ := &Type{
ID: "ohlcv_go_finance", Name: "ohlcv", Lang: "go", Domain: "finance",
Algebraic: AlgebraicProduct, Description: "candle", Version: "1.0.0",
}
if err := ValidateType(typ, knownTps("ohlcv_go_finance")); err != nil {
t.Errorf("expected valid, got: %v", err)
}
}
func TestValidateType_BadAlgebraic(t *testing.T) {
typ := &Type{
ID: "t_go_core", Name: "t", Lang: "go", Domain: "core",
Algebraic: "wrong", Description: "bad", Version: "1.0.0",
}
err := ValidateType(typ, knownTps("t_go_core"))
if err == nil {
t.Fatal("expected error")
}
}
func TestValidateType_SelfReference(t *testing.T) {
typ := &Type{
ID: "t_go_core", Name: "t", Lang: "go", Domain: "core",
Algebraic: AlgebraicProduct, Description: "self ref", Version: "1.0.0",
UsesTypes: []string{"t_go_core"},
}
err := ValidateType(typ, knownTps("t_go_core"))
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "cannot reference itself") {
t.Errorf("unexpected error: %v", err)
}
}
func TestValidateType_OrphanRef(t *testing.T) {
typ := &Type{
ID: "t_go_core", Name: "t", Lang: "go", Domain: "core",
Algebraic: AlgebraicProduct, Description: "orphan ref", Version: "1.0.0",
UsesTypes: []string{"nonexistent_go_core"},
}
err := ValidateType(typ, knownTps("t_go_core"))
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "unknown type") {
t.Errorf("unexpected error: %v", err)
}
}