package registry import ( "fmt" "os" "path/filepath" "strings" "time" ) // IndexResult holds stats from an indexing run. type IndexResult struct { Functions int Types int Apps int Analysis int Projects int Vaults int Modules int UnitTests int ValidationErrors []string Warnings []string Errors []string } // Index walks the registry root, parses all .md files, validates integrity, // and populates the database. It uses two passes: // 1. Parse all entries and collect known IDs // 2. Validate references against known IDs, then insert valid entries // // Scans functions/ and types/ at the root level, plus any language-specific // directories (e.g. python/functions/, python/types/). func Index(db *DB, root string) (*IndexResult, error) { // Load existing timestamps before purging so we can preserve created_at oldFuncs, oldTypes, oldApps, oldAnalysis, oldProjects, oldVaults, oldModules, err := db.LoadTimestamps() if err != nil { return nil, fmt.Errorf("loading timestamps: %w", err) } result := &IndexResult{} // Pass 1: parse everything from all source directories var functions []*Function var types []*Type // Directories to scan for functions and types. funcDirs := []string{filepath.Join(root, "functions")} typeDirs := []string{filepath.Join(root, "types")} // Discover language-specific directories (e.g. python/functions/, python/types/) entries, _ := os.ReadDir(root) for _, e := range entries { if !e.IsDir() { continue } langFuncs := filepath.Join(root, e.Name(), "functions") if fi, err := os.Stat(langFuncs); err == nil && fi.IsDir() { funcDirs = append(funcDirs, langFuncs) } langTypes := filepath.Join(root, e.Name(), "types") if fi, err := os.Stat(langTypes); err == nil && fi.IsDir() { typeDirs = append(typeDirs, langTypes) } } // Discover module directories (modules//) — each may contain function .md // files alongside the module.md. Module entrypoint .md files (e.g. data_table.md) // live in their module dir; types still live in types/ to keep cross-module reuse. modRoot := filepath.Join(root, "modules") if fi, err := os.Stat(modRoot); err == nil && fi.IsDir() { modEntries, _ := os.ReadDir(modRoot) for _, me := range modEntries { if !me.IsDir() { continue } funcDirs = append(funcDirs, filepath.Join(modRoot, me.Name())) } } for _, dir := range funcDirs { walkMD(dir, func(path string) { f, err := ParseFunctionMD(path, root) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err)) return } functions = append(functions, f) }) } for _, dir := range typeDirs { walkMD(dir, func(path string) { t, err := ParseTypeMD(path, root) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", path, err)) return } types = append(types, t) }) } // Parse apps from apps/*/app.md (standalone apps, no project) and from // /apps/*/app.md (language-specific standalone apps, e.g. cpp/apps/). var apps []*App localAppIDs := make(map[string]bool) appsDirs := []string{filepath.Join(root, "apps")} for _, e := range entries { if !e.IsDir() { continue } langApps := filepath.Join(root, e.Name(), "apps") if fi, err := os.Stat(langApps); err == nil && fi.IsDir() { appsDirs = append(appsDirs, langApps) } } for _, appsDir := range appsDirs { fi, err := os.Stat(appsDir) if err != nil || !fi.IsDir() { continue } appEntries, _ := os.ReadDir(appsDir) for _, e := range appEntries { if !e.IsDir() { continue } appMD := filepath.Join(appsDir, e.Name(), "app.md") if _, err := os.Stat(appMD); err != nil { continue } a, err := ParseAppMD(appMD, root) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", appMD, err)) continue } apps = append(apps, a) localAppIDs[a.ID] = true } } // Parse analysis from analysis/*/analysis.md (standalone, no project) var analyses []*Analysis localAnalysisIDs := make(map[string]bool) analysisDir := filepath.Join(root, "analysis") if fi, err := os.Stat(analysisDir); err == nil && fi.IsDir() { entries, _ := os.ReadDir(analysisDir) for _, e := range entries { if !e.IsDir() { continue } analysisMD := filepath.Join(analysisDir, e.Name(), "analysis.md") if _, err := os.Stat(analysisMD); err != nil { continue } an, err := ParseAnalysisMD(analysisMD, root) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", analysisMD, err)) continue } analyses = append(analyses, an) localAnalysisIDs[an.ID] = true } } // Parse modules from modules/*/module.md var modules []*Module modulesDir := filepath.Join(root, "modules") if fi, err := os.Stat(modulesDir); err == nil && fi.IsDir() { modEntries, _ := os.ReadDir(modulesDir) for _, me := range modEntries { if !me.IsDir() { continue } modMD := filepath.Join(modulesDir, me.Name(), "module.md") if _, err := os.Stat(modMD); err != nil { continue } m, err := ParseModuleMD(modMD, root) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", modMD, err)) continue } if m.DirPath == "" { m.DirPath = filepath.Join("modules", me.Name()) } modules = append(modules, m) } } // Parse projects from projects/*/project.md var projects []*Project var vaults []*Vault localProjectIDs := make(map[string]bool) projectsDir := filepath.Join(root, "projects") if fi, err := os.Stat(projectsDir); err == nil && fi.IsDir() { projEntries, _ := os.ReadDir(projectsDir) for _, pe := range projEntries { if !pe.IsDir() { continue } projName := pe.Name() projDir := filepath.Join(projectsDir, projName) // Parse project.md projMD := filepath.Join(projDir, "project.md") if _, err := os.Stat(projMD); err != nil { continue } p, err := ParseProjectMD(projMD, root) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", projMD, err)) continue } projects = append(projects, p) localProjectIDs[p.ID] = true // Parse project apps from projects/{name}/apps/*/app.md projAppsDir := filepath.Join(projDir, "apps") if fi, err := os.Stat(projAppsDir); err == nil && fi.IsDir() { appEntries, _ := os.ReadDir(projAppsDir) for _, ae := range appEntries { if !ae.IsDir() { continue } appMD := filepath.Join(projAppsDir, ae.Name(), "app.md") if _, err := os.Stat(appMD); err != nil { continue } a, err := ParseAppMD(appMD, root) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", appMD, err)) continue } a.ProjectID = p.ID if a.DirPath == "" { a.DirPath = filepath.Join("projects", projName, "apps", ae.Name()) } apps = append(apps, a) localAppIDs[a.ID] = true } } // Parse project analysis from projects/{name}/analysis/*/analysis.md projAnalysisDir := filepath.Join(projDir, "analysis") if fi, err := os.Stat(projAnalysisDir); err == nil && fi.IsDir() { anEntries, _ := os.ReadDir(projAnalysisDir) for _, ane := range anEntries { if !ane.IsDir() { continue } anMD := filepath.Join(projAnalysisDir, ane.Name(), "analysis.md") if _, err := os.Stat(anMD); err != nil { continue } an, err := ParseAnalysisMD(anMD, root) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", anMD, err)) continue } an.ProjectID = p.ID if an.DirPath == "" { an.DirPath = filepath.Join("projects", projName, "analysis", ane.Name()) } analyses = append(analyses, an) localAnalysisIDs[an.ID] = true } } // Parse project vaults from projects/{name}/vaults/vault.yaml projVaultYAML := filepath.Join(projDir, "vaults", "vault.yaml") if _, err := os.Stat(projVaultYAML); err == nil { vs, err := ParseVaultYAML(projVaultYAML, p.ID, filepath.Join(projDir, "vaults")) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", projVaultYAML, err)) } else { vaults = append(vaults, vs...) } } } } // Parse registry-level vaults from vaults/vault.yaml registryVaultYAML := filepath.Join(root, "vaults", "vault.yaml") if _, err := os.Stat(registryVaultYAML); err == nil { vs, err := ParseVaultYAML(registryVaultYAML, "", filepath.Join(root, "vaults")) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("parse %s: %v", registryVaultYAML, err)) } else { vaults = append(vaults, vs...) } } // Selective purge: preserve remote-only apps/analysis/projects (have repo_url, not cloned locally) if err := db.PurgeLocalOnly(localAppIDs, localAnalysisIDs, localProjectIDs); err != nil { return nil, fmt.Errorf("purging database: %w", err) } // Build known ID sets knownFunctions := make(map[string]bool, len(functions)) for _, f := range functions { knownFunctions[f.ID] = true } knownTypes := make(map[string]bool, len(types)) for _, t := range types { knownTypes[t.ID] = true } now := time.Now().UTC() // Pass 2: validate, assign timestamps via hash comparison, and insert for _, t := range types { if verr := ValidateType(t, knownTypes); verr != nil { result.ValidationErrors = append(result.ValidationErrors, verr.Error()) continue } t.ContentHash = ComputeTypeHash(t) applyTimestamps(&t.CreatedAt, &t.UpdatedAt, t.ContentHash, oldTypes[t.ID], now) if err := db.InsertType(t); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", t.ID, err)) continue } result.Types++ } for _, f := range functions { if verr := ValidateFunction(f, knownFunctions, knownTypes); verr != nil { result.ValidationErrors = append(result.ValidationErrors, verr.Error()) continue } f.ContentHash = ComputeFunctionHash(f) applyTimestamps(&f.CreatedAt, &f.UpdatedAt, f.ContentHash, oldFuncs[f.ID], now) if err := db.InsertFunction(f); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", f.ID, err)) continue } result.Functions++ } for _, a := range apps { if verr := ValidateApp(a, knownFunctions, knownTypes); verr != nil { result.ValidationErrors = append(result.ValidationErrors, verr.Error()) continue } a.ContentHash = ComputeAppHash(a) applyTimestamps(&a.CreatedAt, &a.UpdatedAt, a.ContentHash, oldApps[a.ID], now) if err := db.InsertApp(a); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", a.ID, err)) continue } result.Apps++ } for _, an := range analyses { if verr := ValidateAnalysis(an, knownFunctions, knownTypes); verr != nil { result.ValidationErrors = append(result.ValidationErrors, verr.Error()) continue } an.ContentHash = ComputeAnalysisHash(an) applyTimestamps(&an.CreatedAt, &an.UpdatedAt, an.ContentHash, oldAnalysis[an.ID], now) if err := db.InsertAnalysis(an); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert %s: %v", an.ID, err)) continue } result.Analysis++ } for _, p := range projects { if verr := ValidateProject(p); verr != nil { result.ValidationErrors = append(result.ValidationErrors, verr.Error()) continue } p.ContentHash = ComputeProjectHash(p) applyTimestamps(&p.CreatedAt, &p.UpdatedAt, p.ContentHash, oldProjects[p.ID], now) if err := db.InsertProject(p); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert project %s: %v", p.ID, err)) continue } result.Projects++ } for _, v := range vaults { v.ContentHash = ComputeVaultHash(v) applyTimestamps(&v.CreatedAt, &v.UpdatedAt, v.ContentHash, oldVaults[v.ID], now) if err := db.InsertVault(v); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert vault %s: %v", v.ID, err)) continue } result.Vaults++ } for _, m := range modules { m.ContentHash = ComputeModuleHash(m) applyTimestamps(&m.CreatedAt, &m.UpdatedAt, m.ContentHash, oldModules[m.ID], now) if err := db.InsertModule(m); err != nil { result.Errors = append(result.Errors, fmt.Sprintf("insert module %s: %v", m.ID, err)) continue } if err := emitModuleVersionHeader(m, root); err != nil { result.Warnings = append(result.Warnings, fmt.Sprintf("module %s: codegen version header: %v", m.ID, err)) } result.Modules++ } // Extract unit tests from test files of tested functions if err := db.PurgeUnitTests(); err != nil { result.Warnings = append(result.Warnings, fmt.Sprintf("purging unit_tests: %v", err)) } for _, f := range functions { if !f.Tested || f.TestFilePath == "" { continue } absTestPath := filepath.Join(root, f.TestFilePath) cases, err := parseTestFile(absTestPath, f.Lang) if err != nil { result.Warnings = append(result.Warnings, fmt.Sprintf("%s: parsing tests: %v", f.ID, err)) continue } for i, tc := range cases { ut := &UnitTest{ ID: fmt.Sprintf("%s_t%d", f.ID, i), FunctionID: f.ID, Name: tc.Name, Code: tc.Code, FilePath: f.TestFilePath, Lang: f.Lang, CreatedAt: now, UpdatedAt: now, } if err := db.InsertUnitTest(ut); err != nil { result.Warnings = append(result.Warnings, fmt.Sprintf("insert unit_test %s: %v", ut.ID, err)) continue } result.UnitTests++ } } // Post-insert: warn about file_path entries that don't exist on disk // and functions missing params_schema missingParams := 0 for _, f := range functions { if f.FilePath != "" { abs := filepath.Join(root, f.FilePath) if _, err := os.Stat(abs); err != nil { result.Warnings = append(result.Warnings, fmt.Sprintf("%s: file_path %q not found", f.ID, f.FilePath)) } } if f.TestFilePath != "" { abs := filepath.Join(root, f.TestFilePath) if _, err := os.Stat(abs); err != nil { result.Warnings = append(result.Warnings, fmt.Sprintf("%s: test_file_path %q not found", f.ID, f.TestFilePath)) } } if f.ParamsSchema == "" { missingParams++ } } if missingParams > 0 { result.Warnings = append(result.Warnings, fmt.Sprintf("%d functions missing params_schema (run 'fn check params' to list)", missingParams)) } for _, t := range types { if t.FilePath != "" { abs := filepath.Join(root, t.FilePath) if _, err := os.Stat(abs); err != nil { result.Warnings = append(result.Warnings, fmt.Sprintf("%s: file_path %q not found", t.ID, t.FilePath)) } } } return result, nil } // applyTimestamps sets created_at and updated_at based on whether the entry // existed before and whether its content changed. // - New entry (no old record): both set to now // - Unchanged (hash matches): both preserved from old record // - Changed (hash differs): created_at preserved, updated_at set to now func applyTimestamps(createdAt, updatedAt *time.Time, newHash string, old timestampRecord, now time.Time) { if old.CreatedAt.IsZero() { // New entry *createdAt = now *updatedAt = now return } // Existing entry — always preserve created_at *createdAt = old.CreatedAt if old.ContentHash == newHash { // No changes — preserve updated_at too *updatedAt = old.UpdatedAt } else { *updatedAt = now } } // walkMD walks a directory recursively and calls fn for each .md file found, // skipping module.md (which is parsed separately as a Module entry). func walkMD(dir string, fn func(path string)) { if _, err := os.Stat(dir); err != nil { return } filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if err != nil || info.IsDir() || !strings.HasSuffix(path, ".md") { return nil } base := filepath.Base(path) if base == "module.md" { return nil } // Skip module-level doc files that are not function/type entries. switch base { case "MIGRATION.md", "README.md", "CHANGELOG.md": return nil } fn(path) return nil }) }