package infra import ( "bufio" "database/sql" "fmt" "os" "path/filepath" "regexp" "sort" "strings" _ "github.com/mattn/go-sqlite3" ) // CapabilityGroupAudit holds the audit result for a single capability group. type CapabilityGroupAudit struct { Group string `json:"group"` // slug del grupo (tag canonico) DeclaredInIndex bool `json:"declared_in_index"` // aparece en docs/capabilities/INDEX.md DocExists bool `json:"doc_exists"` // existe docs/capabilities/.md FunctionCount int `json:"function_count"` // funciones con ese tag en registry.db Issues []string `json:"issues"` // lista de problemas detectados OK bool `json:"ok"` // true si Issues esta vacio } // indexLinkRe matches markdown links [slug](target.md). Go regexp has no // backreferences, so we capture both sides and compare in code. var indexLinkRe = regexp.MustCompile(`\[([a-z][a-z0-9_-]*)\]\(([a-z][a-z0-9_-]*)\.md\)`) // AuditCapabilityGroups audits the drift between: // 1. Groups declared in docs/capabilities/INDEX.md (parsed via link regex). // 2. Tags present in functions.tags of registry.db (via json_each). // 3. Doc pages existing as docs/capabilities/.md on disk. // // Returns one CapabilityGroupAudit per group/candidate, ordered by Group ASC. // Tags with count >= 3 that are not declared in INDEX are surfaced as // "ungrouped_candidate". Doc files not listed in INDEX are surfaced as // "doc_orphan". func AuditCapabilityGroups(root string) ([]CapabilityGroupAudit, error) { // --- 1. Open registry.db (read-only) --- dbPath := filepath.Join(root, "registry.db") dsn := fmt.Sprintf("file:%s?mode=ro&_foreign_keys=on", dbPath) db, err := sql.Open("sqlite3", dsn) if err != nil { return nil, fmt.Errorf("audit_capability_groups: open db: %w", err) } defer db.Close() if err := db.Ping(); err != nil { return nil, fmt.Errorf("audit_capability_groups: ping db: %w", err) } // --- 2. Query all tags with their function counts --- tagCounts, err := queryTagCounts(db) if err != nil { return nil, fmt.Errorf("audit_capability_groups: query tags: %w", err) } // --- 3. Parse INDEX.md for declared group slugs --- indexPath := filepath.Join(root, "docs", "capabilities", "INDEX.md") indexSlugs, err := parseIndexSlugs(indexPath) if err != nil { return nil, fmt.Errorf("audit_capability_groups: parse INDEX.md: %w", err) } // --- 4. Scan docs/capabilities/ for existing .md files (excluding INDEX.md) --- capDir := filepath.Join(root, "docs", "capabilities") docSlugs, err := scanDocSlugs(capDir) if err != nil { return nil, fmt.Errorf("audit_capability_groups: scan docs: %w", err) } // --- 5. Build audit entries --- // We collect all relevant slugs: declared in INDEX, present as doc, or tags >= 3. allSlugs := make(map[string]struct{}) for s := range indexSlugs { allSlugs[s] = struct{}{} } for s := range docSlugs { allSlugs[s] = struct{}{} } for tag, count := range tagCounts { if count >= 3 { allSlugs[tag] = struct{}{} } } results := make([]CapabilityGroupAudit, 0, len(allSlugs)) for slug := range allSlugs { _, declaredInIndex := indexSlugs[slug] _, docExists := docSlugs[slug] count := tagCounts[slug] var issues []string if declaredInIndex { if !docExists { issues = append(issues, "doc_missing") } if count == 0 { issues = append(issues, "no_functions") } else if count < 3 { issues = append(issues, "below_minimum") } } else if docExists { // Doc exists but not in INDEX issues = append(issues, "doc_orphan") } else if count >= 3 { // Tag with enough functions but no declaration or doc issues = append(issues, "ungrouped_candidate") } results = append(results, CapabilityGroupAudit{ Group: slug, DeclaredInIndex: declaredInIndex, DocExists: docExists, FunctionCount: count, Issues: issues, OK: len(issues) == 0, }) } sort.Slice(results, func(i, j int) bool { return results[i].Group < results[j].Group }) return results, nil } // queryTagCounts returns a map of tag → function count using json_each on functions.tags. func queryTagCounts(db *sql.DB) (map[string]int, error) { rows, err := db.Query(` SELECT j.value AS tag, COUNT(*) AS cnt FROM functions f, json_each(f.tags) j GROUP BY j.value `) if err != nil { return nil, err } defer rows.Close() counts := make(map[string]int) for rows.Next() { var tag string var cnt int if err := rows.Scan(&tag, &cnt); err != nil { continue } counts[tag] = cnt } return counts, rows.Err() } // parseIndexSlugs reads INDEX.md and extracts group slugs from links of the // form [slug](slug.md). Returns the set of declared slugs. func parseIndexSlugs(indexPath string) (map[string]struct{}, error) { f, err := os.Open(indexPath) if err != nil { if os.IsNotExist(err) { // No INDEX.md — return empty set, not an error. return make(map[string]struct{}), nil } return nil, err } defer f.Close() slugs := make(map[string]struct{}) scanner := bufio.NewScanner(f) for scanner.Scan() { line := scanner.Text() matches := indexLinkRe.FindAllStringSubmatch(line, -1) for _, m := range matches { // Self-referencing link only: [slug](slug.md), not external. label := strings.TrimSpace(m[1]) target := strings.TrimSpace(m[2]) if label != "" && label == target { slugs[label] = struct{}{} } } } return slugs, scanner.Err() } // scanDocSlugs returns the set of slugs that have a corresponding .md file in // capDir, excluding INDEX.md itself. func scanDocSlugs(capDir string) (map[string]struct{}, error) { entries, err := os.ReadDir(capDir) if err != nil { if os.IsNotExist(err) { return make(map[string]struct{}), nil } return nil, err } slugs := make(map[string]struct{}) for _, e := range entries { if e.IsDir() { continue } name := e.Name() if !strings.HasSuffix(name, ".md") { continue } slug := strings.TrimSuffix(name, ".md") if slug == "INDEX" { continue } slugs[slug] = struct{}{} } return slugs, nil }