package infra import ( "fmt" "os" "path/filepath" "sort" "strings" "gopkg.in/yaml.v3" ) // DodSchemaItem represents one declared evidence item in a DoD schema block. type DodSchemaItem struct { ID string `yaml:"id" json:"id"` Kind string `yaml:"kind" json:"kind"` // screenshot|log|url|cmd Expected string `yaml:"expected" json:"expected"` // free text Required bool `yaml:"required" json:"required"` // default true if missing } // DodSchemaIssue represents one issue/flow file scanned and its parsed schema. type DodSchemaIssue struct { Path string `json:"path"` Type string `json:"type"` // "issue" | "flow" Items []DodSchemaItem `json:"items"` // parsed items (may be empty) Errors []string `json:"errors"` // per-file validation errors } // DodSchemaReport aggregates the scan of dev/issues/ and dev/flows/. type DodSchemaReport struct { Files []DodSchemaIssue `json:"files"` TotalFiles int `json:"total_files"` FilesWithItems int `json:"files_with_items"` TotalItems int `json:"total_items"` InvalidItems int `json:"invalid_items"` } // dodValidKinds is the closed set of allowed evidence kinds. var dodValidKinds = map[string]struct{}{ "screenshot": {}, "log": {}, "url": {}, "cmd": {}, } // dodRawFrontmatter is used for YAML unmarshal — we keep `required` as a // pointer so we can distinguish "missing" (defaults to true) from "false". type dodRawItem struct { ID string `yaml:"id"` Kind string `yaml:"kind"` Expected string `yaml:"expected"` Required *bool `yaml:"required"` } type dodRawFrontmatter struct { DodEvidenceSchema []dodRawItem `yaml:"dod_evidence_schema"` } // AuditDodSchema scans dev/issues/ (recursively, incl. completed/) and // dev/flows/ (recursively, incl. completed/) under `issuesDir` and `flowsDir`, // parses the `dod_evidence_schema:` block from each `.md` frontmatter, and // returns a structured report. Read-only — does not write anything. // // Validations per item: // - id non-empty and unique within the file // - kind in {screenshot, log, url, cmd} // - expected non-empty // - required defaults to true when missing // // Files with malformed frontmatter are reported with errors but do not abort // the scan. func AuditDodSchema(issuesDir, flowsDir string) (DodSchemaReport, error) { var report DodSchemaReport collect := func(root, typ string) error { if root == "" { return nil } info, err := os.Stat(root) if err != nil { if os.IsNotExist(err) { return nil } return err } if !info.IsDir() { return nil } return filepath.WalkDir(root, func(p string, d os.DirEntry, walkErr error) error { if walkErr != nil { return nil } if d.IsDir() { return nil } if !strings.HasSuffix(p, ".md") { return nil } // Skip README/INDEX/template/AGENT_GUIDE — convention files, not // real issues/flows. base := strings.ToLower(filepath.Base(p)) if base == "readme.md" || base == "index.md" || base == "template.md" || base == "agent_guide.md" || base == "taxonomy.md" { return nil } entry := parseDodFile(p, typ) report.Files = append(report.Files, entry) return nil }) } if err := collect(issuesDir, "issue"); err != nil { return report, fmt.Errorf("audit_dod_schema: scan issues: %w", err) } if err := collect(flowsDir, "flow"); err != nil { return report, fmt.Errorf("audit_dod_schema: scan flows: %w", err) } sort.Slice(report.Files, func(i, j int) bool { return report.Files[i].Path < report.Files[j].Path }) report.TotalFiles = len(report.Files) for _, f := range report.Files { if len(f.Items) > 0 { report.FilesWithItems++ } report.TotalItems += len(f.Items) for _, e := range f.Errors { // Each item-level validation error counts as one invalid item. // Frontmatter-level errors (e.g. malformed YAML) also count. if strings.HasPrefix(e, "item ") || strings.Contains(e, "duplicate id") || strings.Contains(e, "malformed") { report.InvalidItems++ } } } return report, nil } // parseDodFile reads a single .md, extracts the YAML frontmatter, parses the // dod_evidence_schema block (if any), and validates each item. func parseDodFile(path, typ string) DodSchemaIssue { entry := DodSchemaIssue{Path: path, Type: typ} data, err := os.ReadFile(path) if err != nil { entry.Errors = append(entry.Errors, fmt.Sprintf("read error: %v", err)) return entry } s := string(data) if !strings.HasPrefix(s, "---") { // No frontmatter — silently skip (not every .md must have one). return entry } // Skip leading "---\n" (4 bytes when LF, 5 when CRLF). rest := s[3:] if strings.HasPrefix(rest, "\r\n") { rest = rest[2:] } else if strings.HasPrefix(rest, "\n") { rest = rest[1:] } end := strings.Index(rest, "\n---") if end < 0 { // No closing --- — treat as malformed but do not crash. entry.Errors = append(entry.Errors, "malformed frontmatter: missing closing ---") return entry } fm := rest[:end] var raw dodRawFrontmatter if err := yaml.Unmarshal([]byte(fm), &raw); err != nil { entry.Errors = append(entry.Errors, fmt.Sprintf("malformed frontmatter yaml: %v", err)) return entry } if len(raw.DodEvidenceSchema) == 0 { return entry } seen := map[string]struct{}{} for i, it := range raw.DodEvidenceSchema { item := DodSchemaItem{ ID: strings.TrimSpace(it.ID), Kind: strings.TrimSpace(it.Kind), Expected: strings.TrimSpace(it.Expected), Required: true, // default } if it.Required != nil { item.Required = *it.Required } // Validation — errors are reported but the item is still appended so // the caller sees the (partial) data. label := item.ID if label == "" { label = fmt.Sprintf("#%d", i) entry.Errors = append(entry.Errors, fmt.Sprintf("item %s missing id", label)) } else if _, dup := seen[item.ID]; dup { entry.Errors = append(entry.Errors, fmt.Sprintf("item '%s' duplicate id", item.ID)) } else { seen[item.ID] = struct{}{} } if item.Kind == "" { entry.Errors = append(entry.Errors, fmt.Sprintf("item '%s' missing kind (valid: screenshot|log|url|cmd)", label)) } else if _, ok := dodValidKinds[item.Kind]; !ok { entry.Errors = append(entry.Errors, fmt.Sprintf("item '%s' invalid kind '%s' (valid: screenshot|log|url|cmd)", label, item.Kind)) } if item.Expected == "" { entry.Errors = append(entry.Errors, fmt.Sprintf("item '%s' empty expected", label)) } entry.Items = append(entry.Items, item) } return entry }