chore(infra): audit_uses_functions detecta mejor simbolos Go con abreviaturas (issue 0057)

Reduce falsos positivos en la deteccion de simbolos Go del auditor
uses_functions, por dos vias complementarias.

Fase 1 — commonAbbrevs ampliado: anade abreviaturas verificadas contra los
nombres reales de las funciones Go del registry (OHLCV, DuckDB, ClickHouse,
NordVPN, SHA256, MD5, ANSI, CIDR, AEAD, PTY, VPS, WG, VT, FFT, EMA, RSI, SMA,
VWAP, AX, E2E, URLs). El analisis empirico mostro que reduce los mismatches
PascalCase-vs-real de 76 a 40 sin romper ninguna funcion. Se documenta por que
NO se mapean "cdp" (el registry usa Cdp: CdpGetHTML, CdpNavigate) ni "pdf"
(inconsistente: CdpPrintPDF vs PdfSimpleReport) — anadirlos generaria mas
falsos positivos de los que arregla.

Fase 2 — fallback a lectura del .go: cuando ni la signature ni PascalCase(name)
localizan el simbolo, se lee el .go de la funcion del registry y se extrae el
primer func exportado top-level (cache por ejecucion para no reabrir archivos).
El fallback esta GATEADO a signature vacia: cuando la signature ya aporta un
`func <Name>` es la fuente de verdad y no se sobreescribe. Esto evita la
mis-atribucion en archivos .go compartidos por varias funciones (patron "TU
adicional", p.ej. cdp_new_tab vive en cdp_list_tabs.go): sin el gate, el primer
func del archivo (CdpListTabs) se atribuiria a cada hermano y suprimiria
hallazgos reales de "unused".

Verificacion (DoD):
- go build -tags fts5 + go vet limpios.
- Tests nuevos: TestSnakeToPascal_HandlesAbbreviations (golden + non-mappings
  cdp/pdf), TestAuditUsesFunctions_GoFileFallback (golden + error sin archivo),
  TestAuditUsesFunctions_SharedGoFileNotMisattributed (regresion del archivo
  compartido), TestGoRealExportedName (top-level/generic/missing/empty).
- A/B contra el registry real (fn doctor uses-functions): baseline 69 unused vs
  nuevo 69, cero regresion; cdp_get_html_go_browser sigue sin marcarse unused en
  script_navegador (Fase 3.1).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Egutierrez
2026-06-30 13:14:27 +02:00
parent 88eabb0457
commit 8917105184
2 changed files with 396 additions and 16 deletions
+129 -16
View File
@@ -30,6 +30,7 @@ type auditFnMeta struct {
domain string
lang string
signature string
filePath string // registry-relative path to the .go source (Go funcs only)
}
// skipDirs are directory names ignored when walking source for audits.
@@ -80,15 +81,16 @@ func AuditUsesFunctions(registryRoot string) ([]UsesFunctionsAudit, error) {
return nil, fmt.Errorf("audit_uses_functions: ping db: %w", err)
}
// Load all Go/Python/TS functions from registry: id → name, domain, lang, signature.
rows, err := db.Query(`SELECT id, name, domain, lang, COALESCE(signature, '') FROM functions WHERE lang IN ('go','py','ts')`)
// Load all Go/Python/TS functions from registry: id → name, domain, lang,
// signature, file_path. file_path feeds the Go .go fallback (see auditGoApp).
rows, err := db.Query(`SELECT id, name, domain, lang, COALESCE(signature, ''), COALESCE(file_path, '') FROM functions WHERE lang IN ('go','py','ts')`)
if err != nil {
return nil, fmt.Errorf("audit_uses_functions: query functions: %w", err)
}
allFunctions := make(map[string]auditFnMeta) // id → meta
for rows.Next() {
var m auditFnMeta
if err := rows.Scan(&m.id, &m.name, &m.domain, &m.lang, &m.signature); err != nil {
if err := rows.Scan(&m.id, &m.name, &m.domain, &m.lang, &m.signature, &m.filePath); err != nil {
continue
}
allFunctions[m.id] = m
@@ -144,7 +146,7 @@ func AuditUsesFunctions(registryRoot string) ([]UsesFunctionsAudit, error) {
switch app.lang {
case "go":
importedIDs = append(importedIDs, auditGoApp(absDir, allFunctions)...)
importedIDs = append(importedIDs, auditGoApp(absDir, allFunctions, registryRoot)...)
scannedLangs["go"] = true
case "py":
importedIDs = append(importedIDs, auditPyApp(absDir, allFunctions)...)
@@ -197,11 +199,18 @@ func AuditUsesFunctions(registryRoot string) ([]UsesFunctionsAudit, error) {
// Strategy:
// 1. Find all "fn-registry/functions/<domain>" import paths (production code only).
// 2. For each domain, collect registry functions in that domain.
// 3. Grep source files for the exported symbol. The token tried first is the
// real Go func identifier parsed from the registry signature; fallback is
// PascalCase(name). Many functions deviate (e.g. sqlite_column_exists has
// `func ColumnExists`), so signature is the source of truth.
func auditGoApp(appDir string, all map[string]auditFnMeta) []string {
// 3. Grep source files for the exported symbol. Tokens tried, in order:
// a) the real Go func identifier parsed from the registry signature;
// b) PascalCase(name) (with commonAbbrevs);
// c) the real exported func read straight from the function's .go file.
//
// Many functions deviate from snake_case→PascalCase (e.g. sqlite_column_exists
// has `func ColumnExists`, wails_bind_crud has `func GenerateWailsCRUD`). The
// signature is usually the source of truth, but some signatures omit the `func`
// keyword or list a different primary symbol; step (c) reads the .go file as a
// last-resort fallback so those cases stop being false positives ("unused").
// The .go read is cached per execution to avoid reopening the same file.
func auditGoApp(appDir string, all map[string]auditFnMeta, registryRoot string) []string {
// Step 1: collect imported domains.
importedDomains := collectGoImportedDomains(appDir)
if len(importedDomains) == 0 {
@@ -216,6 +225,10 @@ func auditGoApp(appDir string, all map[string]auditFnMeta) []string {
return nil
}
// Cache for the .go fallback: registry file_path → real exported func name.
// Populated lazily, only when the cheaper tokens fail to match.
goFileSymbolCache := make(map[string]string)
for _, m := range all {
if m.lang != "go" {
continue
@@ -223,17 +236,76 @@ func auditGoApp(appDir string, all map[string]auditFnMeta) []string {
if !importedDomains[m.domain] {
continue
}
tokens := goCandidateTokens(m)
for _, tok := range tokens {
matched := false
for _, tok := range goCandidateTokens(m) {
if containsToken(blob, tok) {
used = append(used, m.id)
matched = true
break
}
}
if !matched && goSignatureSymbol(m) == "" {
// Fallback (c): read the registry .go file and look for the real
// exported func name. Gated on an EMPTY signature symbol on purpose:
// when the signature already yields a concrete `func <Name>` it is the
// authoritative symbol, so reading the .go (which can only guess the
// file's first exported func) must not override it. Several registry
// functions share one .go file via the "TU adicional" pattern (e.g.
// cdp_new_tab lives in cdp_list_tabs.go); without this gate the first
// func would be mis-attributed to every sibling and suppress real
// "unused" findings. The file read therefore only happens for the rare
// functions whose stored signature omits the `func` keyword.
if sym := goRealExportedName(registryRoot, m.filePath, goFileSymbolCache); sym != "" {
if containsToken(blob, sym) {
matched = true
}
}
}
if matched {
used = append(used, m.id)
}
}
return used
}
// goRealExportedFnRe matches a top-level exported func declaration in a .go
// source file: `func Name(` or the generic form `func Name[T any](`. It captures
// the func identifier. Method declarations (`func (r *T) Name(`) are skipped on
// purpose — a registry function's primary symbol is a top-level func, and method
// names would risk spurious matches. Used by the .go fallback to recover the real
// symbol name when the registry signature/name heuristics fail.
var goRealExportedFnRe = regexp.MustCompile(`^func\s+([A-Z][A-Za-z0-9_]*)\s*[\(\[]`)
// goRealExportedName reads the registry .go file at filePath (relative to
// registryRoot) and returns the first exported func identifier found. Results
// are memoised in cache (filePath → symbol, "" when the file is unreadable or
// has no exported func) so a file is opened at most once per audit run.
func goRealExportedName(registryRoot, filePath string, cache map[string]string) string {
if filePath == "" {
return ""
}
if sym, ok := cache[filePath]; ok {
return sym
}
cache[filePath] = "" // pre-seed so an unreadable file is not retried
abs := filePath
if !filepath.IsAbs(abs) {
abs = filepath.Join(registryRoot, filePath)
}
f, err := os.Open(abs)
if err != nil {
return ""
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
if m := goRealExportedFnRe.FindStringSubmatch(sc.Text()); m != nil {
cache[filePath] = m[1]
return m[1]
}
}
return ""
}
// goCandidateTokens returns the identifiers we try when looking for usages
// of a Go function in source. Real exported name from signature first,
// PascalCase(name) as fallback.
@@ -241,10 +313,8 @@ var goSignatureFnRe = regexp.MustCompile(`^\s*func\s+(?:\([^)]*\)\s+)?([A-Z][A-Z
func goCandidateTokens(m auditFnMeta) []string {
out := []string{}
if m.signature != "" {
if match := goSignatureFnRe.FindStringSubmatch(m.signature); match != nil {
out = append(out, match[1])
}
if sym := goSignatureSymbol(m); sym != "" {
out = append(out, sym)
}
pascal := snakeToPascal(m.name)
if pascal != "" && (len(out) == 0 || out[0] != pascal) {
@@ -253,6 +323,21 @@ func goCandidateTokens(m auditFnMeta) []string {
return out
}
// goSignatureSymbol returns the exported Go identifier parsed from the registry
// signature (`func Name(...)` or `func (r *T) Name(...)`), or "" when the
// signature is empty or does not start with a `func` declaration. A non-empty
// result is the authoritative symbol for the function and gates off the .go
// fallback in auditGoApp.
func goSignatureSymbol(m auditFnMeta) string {
if m.signature == "" {
return ""
}
if match := goSignatureFnRe.FindStringSubmatch(m.signature); match != nil {
return match[1]
}
return ""
}
// collectGoImportedDomains returns the set of registry domains imported by .go files.
var goImportRe = regexp.MustCompile(`"fn-registry/functions/([a-z]+)"`)
@@ -452,6 +537,34 @@ var commonAbbrevs = map[string]string{
"io": "IO",
"ok": "OK",
"ui": "UI",
// Issue 0057 — abbreviations verified consistent across the registry's own
// Go func names (each entry maps a real `func <Name>` deviation). These only
// improve the PascalCase fallback; the signature and the .go fallback remain
// the primary sources of truth. Deliberately NOT added because the registry
// itself is inconsistent for them (mapping would create more mismatches than
// it fixes): "cdp" (uses Cdp: CdpGetHTML, CdpNavigate — not CDP) and
// "pdf" (CdpPrintPDF vs PdfSimpleReport).
"ohlcv": "OHLCV",
"duckdb": "DuckDB",
"clickhouse": "ClickHouse",
"nordvpn": "NordVPN",
"sha256": "SHA256",
"md5": "MD5",
"ansi": "ANSI",
"cidr": "CIDR",
"aead": "AEAD",
"pty": "PTY",
"vps": "VPS",
"wg": "WG",
"vt": "VT",
"fft": "FFT",
"ema": "EMA",
"rsi": "RSI",
"sma": "SMA",
"vwap": "VWAP",
"ax": "AX",
"e2e": "E2E",
"urls": "URLs",
}
// hasTSSources reports whether appDir contains any production .ts/.tsx files