Files
fn_registry/functions/infra/audit_uses_functions.go
egutierrez f65178025d feat(audit+pipelines): mejor deteccion + auto-recovery TBD
- audit_uses_functions: parsea Go func name del signature (no solo PascalCase de name); skip _test.go y dirs e2e/tests/testdata/build/dist/vendor/node_modules; add scanner TS para frontend/ con import "@fn_library/<area>/<name>" → <name>_ts_<area>; unused solo flagea langs efectivamente escaneados
- full_git_push: si pre-commit hook bloquea, retry con --no-verify y reporta bypass; si push rechazado por non-fast-forward, fetch + merge --no-ff auto y reintenta; exit code 1 + bloque [!!] ERRORES si quedan errores reales
- full_git_pull: si pull --ff-only diverge, intenta merge --no-ff auto contra @{u}; conserva [merged-auto] o aborta con [diverged] si conflicto; exit code 1 si quedan repos pendientes
- slash commands /full-git-push y /full-git-pull: documentadas obligaciones del agente para garantizar TBD (master siempre alineado con remote)
- kanban app.md: quita percentile_int64 (transitivo via duration_stats)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 03:57:51 +02:00

550 lines
15 KiB
Go

package infra
import (
"bufio"
"database/sql"
"encoding/json"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"unicode"
_ "github.com/mattn/go-sqlite3"
)
// UsesFunctionsAudit holds the drift report for a single app.
type UsesFunctionsAudit struct {
AppID string // registry id, e.g. "kanban_go_tools"
Lang string // "go" or "py"
DirPath string // dir_path as stored in registry.db
Missing []string // function IDs found in imports but absent from app.md uses_functions
Unused []string // function IDs declared in app.md but not detected in code
}
// auditFnMeta holds registry metadata for a single function.
type auditFnMeta struct {
id string
name string
domain string
lang string
signature string
}
// skipDirs are directory names ignored when walking source for audits.
// Tests, build artefacts, vendored deps and per-PC state never count
// towards uses_functions of an app.
var auditSkipDirs = map[string]bool{
".git": true,
"node_modules": true,
".venv": true,
"venv": true,
"__pycache__": true,
"dist": true,
"build": true,
"vendor": true,
"testdata": true,
"e2e": true,
"tests": true,
"local_files": true,
".ipython": true,
".pytest_cache": true,
}
func auditShouldSkipDir(name string) bool { return auditSkipDirs[name] }
// AuditUsesFunctions checks every Go and Python app registered in registry.db
// and compares the uses_functions declared in the app.md manifest against the
// functions actually imported by the app's source code.
//
// For Go apps it greps for "fn-registry/functions/<domain>" import paths, then
// searches the source for the exported symbol derived from each function name
// (snake_case → PascalCase) to achieve per-function granularity within a package.
//
// For Python apps it scans for "from <pkg> import X" patterns where <pkg> matches
// a known registry domain, then resolves X to a function ID by matching the name
// field in registry.db.
//
// Returns an error only if registry.db cannot be opened. Apps where dir_path
// does not exist on disk are reported with Missing/Unused = nil (cannot inspect).
func AuditUsesFunctions(registryRoot string) ([]UsesFunctionsAudit, error) {
dbPath := filepath.Join(registryRoot, "registry.db")
dsn := fmt.Sprintf("file:%s?mode=ro&_foreign_keys=on", dbPath)
db, err := sql.Open("sqlite3", dsn)
if err != nil {
return nil, fmt.Errorf("audit_uses_functions: open db: %w", err)
}
defer db.Close()
if err := db.Ping(); err != nil {
return nil, fmt.Errorf("audit_uses_functions: ping db: %w", err)
}
// Load all Go/Python/TS functions from registry: id → name, domain, lang, signature.
rows, err := db.Query(`SELECT id, name, domain, lang, COALESCE(signature, '') FROM functions WHERE lang IN ('go','py','ts')`)
if err != nil {
return nil, fmt.Errorf("audit_uses_functions: query functions: %w", err)
}
allFunctions := make(map[string]auditFnMeta) // id → meta
for rows.Next() {
var m auditFnMeta
if err := rows.Scan(&m.id, &m.name, &m.domain, &m.lang, &m.signature); err != nil {
continue
}
allFunctions[m.id] = m
}
rows.Close()
// Load apps with lang go or py.
type appRow struct {
id string
lang string
dirPath string
usesFunctions []string
}
rows2, err := db.Query(`SELECT id, lang, dir_path, uses_functions FROM apps WHERE lang IN ('go','py')`)
if err != nil {
return nil, fmt.Errorf("audit_uses_functions: query apps: %w", err)
}
var apps []appRow
for rows2.Next() {
var a appRow
var ufJSON string
if err := rows2.Scan(&a.id, &a.lang, &a.dirPath, &ufJSON); err != nil {
continue
}
_ = json.Unmarshal([]byte(ufJSON), &a.usesFunctions)
apps = append(apps, a)
}
rows2.Close()
var results []UsesFunctionsAudit
for _, app := range apps {
absDir := app.dirPath
if !filepath.IsAbs(absDir) {
absDir = filepath.Join(registryRoot, app.dirPath)
}
audit := UsesFunctionsAudit{
AppID: app.id,
Lang: app.lang,
DirPath: app.dirPath,
}
if _, err := os.Stat(absDir); os.IsNotExist(err) {
// Cannot inspect — skip diff, leave Missing/Unused nil.
results = append(results, audit)
continue
}
// Track which langs we successfully scanned. Unused diff only flags
// declared IDs whose lang we actually inspected, so e.g. an app with
// no frontend/ dir won't get every ts_* dep marked unused.
scannedLangs := map[string]bool{}
var importedIDs []string
switch app.lang {
case "go":
importedIDs = append(importedIDs, auditGoApp(absDir, allFunctions)...)
scannedLangs["go"] = true
case "py":
importedIDs = append(importedIDs, auditPyApp(absDir, allFunctions)...)
scannedLangs["py"] = true
}
// Frontend audit: any app that bundles a frontend/ tree gets its TS
// imports inspected too (kanban, registry_dashboard, etc.).
if frontDir := filepath.Join(absDir, "frontend"); dirExists(frontDir) {
importedIDs = append(importedIDs, auditTSApp(frontDir, allFunctions)...)
scannedLangs["ts"] = true
}
// Standalone TS app or app with TS sources at root.
if !scannedLangs["ts"] && hasTSSources(absDir) {
importedIDs = append(importedIDs, auditTSApp(absDir, allFunctions)...)
scannedLangs["ts"] = true
}
declaredSet := make(map[string]bool)
for _, id := range app.usesFunctions {
declaredSet[id] = true
}
importedSet := make(map[string]bool)
for _, id := range importedIDs {
importedSet[id] = true
}
for id := range importedSet {
if !declaredSet[id] {
audit.Missing = append(audit.Missing, id)
}
}
for id := range declaredSet {
if !importedSet[id] {
m, ok := allFunctions[id]
// Only flag unused if we scanned this lang; otherwise we cannot tell.
if !ok || !scannedLangs[m.lang] {
continue
}
audit.Unused = append(audit.Unused, id)
}
}
results = append(results, audit)
}
return results, nil
}
// auditGoApp returns function IDs detected in the Go source files of appDir.
// Strategy:
// 1. Find all "fn-registry/functions/<domain>" import paths (production code only).
// 2. For each domain, collect registry functions in that domain.
// 3. Grep source files for the exported symbol. The token tried first is the
// real Go func identifier parsed from the registry signature; fallback is
// PascalCase(name). Many functions deviate (e.g. sqlite_column_exists has
// `func ColumnExists`), so signature is the source of truth.
func auditGoApp(appDir string, all map[string]auditFnMeta) []string {
// Step 1: collect imported domains.
importedDomains := collectGoImportedDomains(appDir)
if len(importedDomains) == 0 {
return nil
}
// Step 2: for each function in those domains, grep for its exported name.
var used []string
// Read all .go source once into a single blob for fast search.
blob := readGoSourceBlob(appDir)
if blob == "" {
return nil
}
for _, m := range all {
if m.lang != "go" {
continue
}
if !importedDomains[m.domain] {
continue
}
tokens := goCandidateTokens(m)
for _, tok := range tokens {
if containsToken(blob, tok) {
used = append(used, m.id)
break
}
}
}
return used
}
// goCandidateTokens returns the identifiers we try when looking for usages
// of a Go function in source. Real exported name from signature first,
// PascalCase(name) as fallback.
var goSignatureFnRe = regexp.MustCompile(`^\s*func\s+(?:\([^)]*\)\s+)?([A-Z][A-Za-z0-9_]*)`)
func goCandidateTokens(m auditFnMeta) []string {
out := []string{}
if m.signature != "" {
if match := goSignatureFnRe.FindStringSubmatch(m.signature); match != nil {
out = append(out, match[1])
}
}
pascal := snakeToPascal(m.name)
if pascal != "" && (len(out) == 0 || out[0] != pascal) {
out = append(out, pascal)
}
return out
}
// collectGoImportedDomains returns the set of registry domains imported by .go files.
var goImportRe = regexp.MustCompile(`"fn-registry/functions/([a-z]+)"`)
func collectGoImportedDomains(appDir string) map[string]bool {
domains := make(map[string]bool)
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") {
return nil
}
f, err := os.Open(path)
if err != nil {
return nil
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
line := sc.Text()
if m := goImportRe.FindStringSubmatch(line); m != nil {
domains[m[1]] = true
}
}
return nil
})
return domains
}
// readGoSourceBlob concatenates all production .go file contents in appDir
// (skips _test.go, build artefacts, vendor, etc.).
func readGoSourceBlob(appDir string) string {
var sb strings.Builder
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") {
return nil
}
data, err := os.ReadFile(path)
if err != nil {
return nil
}
sb.Write(data)
sb.WriteByte('\n')
return nil
})
return sb.String()
}
// containsToken reports whether the exported symbol appears as an identifier
// in src (preceded and followed by non-letter/non-digit/non-underscore runes,
// or at string boundaries). This avoids matching substrings inside longer names.
func containsToken(src, token string) bool {
idx := 0
for {
pos := strings.Index(src[idx:], token)
if pos < 0 {
return false
}
abs := idx + pos
before := abs == 0 || !isIdentRune(rune(src[abs-1]))
after := abs+len(token) >= len(src) || !isIdentRune(rune(src[abs+len(token)]))
if before && after {
return true
}
idx = abs + 1
}
}
func isIdentRune(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}
// auditPyApp returns function IDs detected in the Python source of appDir.
// Looks for: "from <pkg> import X, Y" patterns and resolves X, Y to function IDs.
var pyFromImportRe = regexp.MustCompile(`from\s+(\w+)\s+import\s+(.+)`)
func auditPyApp(appDir string, all map[string]auditFnMeta) []string {
// Build name→id map for py functions.
nameToID := make(map[string]string) // "metabase_auth" → "metabase_auth_py_infra"
for _, m := range all {
if m.lang == "py" {
nameToID[m.name] = m.id
}
}
usedSet := make(map[string]bool)
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
if !strings.HasSuffix(path, ".py") {
return nil
}
f, err := os.Open(path)
if err != nil {
return nil
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
line := strings.TrimSpace(sc.Text())
if m := pyFromImportRe.FindStringSubmatch(line); m != nil {
// m[2] = "X, Y, Z" or "X"
names := strings.Split(m[2], ",")
for _, nm := range names {
nm = strings.TrimSpace(nm)
nm = strings.Fields(nm)[0] // strip "as alias"
if id, ok := nameToID[nm]; ok {
usedSet[id] = true
}
}
}
}
return nil
})
var used []string
for id := range usedSet {
used = append(used, id)
}
return used
}
// snakeToPascal converts snake_case to PascalCase (Go exported name).
// E.g. "sqlite_open" → "SQLiteOpen", "http_json_response" → "HTTPJSONResponse".
// Common abbreviations are uppercased in full.
var commonAbbrevs = map[string]string{
"http": "HTTP",
"https": "HTTPS",
"sql": "SQL",
"sqlite": "SQLite",
"url": "URL",
"api": "API",
"id": "ID",
"db": "DB",
"tls": "TLS",
"tcp": "TCP",
"udp": "UDP",
"ip": "IP",
"json": "JSON",
"yaml": "YAML",
"xml": "XML",
"html": "HTML",
"css": "CSS",
"csv": "CSV",
"ssh": "SSH",
"jwt": "JWT",
"oauth": "OAuth",
"oauth2": "OAuth2",
"spa": "SPA",
"cors": "CORS",
"rbac": "RBAC",
"crud": "CRUD",
"cli": "CLI",
"cpu": "CPU",
"gpu": "GPU",
"os": "OS",
"s3": "S3",
"gcs": "GCS",
"bq": "BQ",
"ttl": "TTL",
"rgb": "RGB",
"rgba": "RGBA",
"sse": "SSE",
"ws": "WS",
"smtp": "SMTP",
"imap": "IMAP",
"pop3": "POP3",
"dns": "DNS",
"vpn": "VPN",
"cmd": "Cmd",
"ctx": "Ctx",
"cfg": "Cfg",
"env": "Env",
"io": "IO",
"ok": "OK",
"ui": "UI",
}
// hasTSSources reports whether appDir contains any production .ts/.tsx files
// (skipping the audit skip-dirs).
func hasTSSources(appDir string) bool {
found := false
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
if strings.HasSuffix(path, ".ts") || strings.HasSuffix(path, ".tsx") {
found = true
return filepath.SkipDir
}
return nil
})
return found
}
// auditTSApp scans .ts/.tsx files in appDir for imports from "@fn_library/<area>/<name>"
// and resolves them to function IDs of the form "<name>_ts_<area>". Re-exports count
// as direct usage. Test files (*.test.ts*, *.spec.ts*) are skipped.
var tsLibraryImportRe = regexp.MustCompile(`["']@fn_library/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+)["']`)
func auditTSApp(appDir string, all map[string]auditFnMeta) []string {
// Build lookup: (area=domain, name) → id for ts functions.
tsByKey := make(map[string]string) // "ui|color_bg" → "color_bg_ts_ui"
for _, m := range all {
if m.lang != "ts" {
continue
}
tsByKey[m.domain+"|"+m.name] = m.id
}
usedSet := make(map[string]bool)
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
base := info.Name()
if !(strings.HasSuffix(base, ".ts") || strings.HasSuffix(base, ".tsx")) {
return nil
}
if strings.HasSuffix(base, ".test.ts") || strings.HasSuffix(base, ".test.tsx") ||
strings.HasSuffix(base, ".spec.ts") || strings.HasSuffix(base, ".spec.tsx") ||
strings.HasSuffix(base, ".d.ts") {
return nil
}
data, err := os.ReadFile(path)
if err != nil {
return nil
}
for _, match := range tsLibraryImportRe.FindAllStringSubmatch(string(data), -1) {
area, name := match[1], match[2]
if id, ok := tsByKey[area+"|"+name]; ok {
usedSet[id] = true
}
}
return nil
})
out := make([]string, 0, len(usedSet))
for id := range usedSet {
out = append(out, id)
}
return out
}
func snakeToPascal(s string) string {
parts := strings.Split(s, "_")
var sb strings.Builder
for _, p := range parts {
if p == "" {
continue
}
if abbr, ok := commonAbbrevs[strings.ToLower(p)]; ok {
sb.WriteString(abbr)
} else {
sb.WriteString(strings.ToUpper(p[:1]) + p[1:])
}
}
return sb.String()
}