Files
fn_registry/functions/infra/audit_uses_functions.go
T
Egutierrez 9a7a874a76 fix(infra): audit_uses_functions detecta imports Python anidados y multilinea (0056)
El parser Python de audit_uses_functions solo reconocia "from <pkg> import X"
con un unico componente de paquete (regex \w+), por lo que:

- "from <pkg>.<subpkg> import X" (import anidado) no matcheaba y la funcion se
  reportaba como falso unused_in_app_md.
- Las listas multilinea con parentesis "from <pkg> import (\n a,\n b,\n)" no se
  parseaban (escaneo linea a linea).

Cambios:
- Regex acepta puntos en el paquete y bloques parentizados multilinea.
- Resolucion validada contra el directorio de paquete del registry derivado de
  file_path (no del campo domain: las funciones metabase viven en
  python/functions/metabase/ pero tienen domain=infra). Imports de librerias
  externas se ignoran -> sin falsos missing.
- parsePyImportedSymbols descarta comentarios "# noqa", maneja "as alias" y
  star imports (tratados como vacio, no soportados por diseno).
- auditFnMeta carga file_path; query SELECT anade file_path.

Tests (functions/infra/audit_uses_functions_test.go):
- TestAuditUsesFunctions_DetectsNestedImport (golden)
- TestAuditUsesFunctions_NoFalsePositiveOnNested (edge: nested + multilinea)
- TestAuditUsesFunctions_StarImport (error/edge: star import no cuenta)

Verificado con fn doctor uses-functions sobre apps reales: drift baja de 11/42 a
9/42. mail_manager (9 falsos por "from infra.X import Y") y demand_radar (3 por
lista multilinea) quedan en 0 drift. El residual de osint_db/osint_web es carga
dinamica via importlib, documentado como fuera de alcance.

audit_uses_functions v1.0.0 -> v1.1.0. CHANGELOG actualizado.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 13:10:31 +02:00

636 lines
18 KiB
Go

package infra
import (
"bufio"
"database/sql"
"encoding/json"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"unicode"
_ "github.com/mattn/go-sqlite3"
)
// UsesFunctionsAudit holds the drift report for a single app.
type UsesFunctionsAudit struct {
AppID string // registry id, e.g. "kanban_go_tools"
Lang string // "go" or "py"
DirPath string // dir_path as stored in registry.db
Missing []string // function IDs found in imports but absent from app.md uses_functions
Unused []string // function IDs declared in app.md but not detected in code
}
// auditFnMeta holds registry metadata for a single function.
type auditFnMeta struct {
id string
name string
domain string
lang string
signature string
filePath string // file_path as stored in registry.db (used to derive the Python package dir)
}
// skipDirs are directory names ignored when walking source for audits.
// Tests, build artefacts, vendored deps and per-PC state never count
// towards uses_functions of an app.
var auditSkipDirs = map[string]bool{
".git": true,
"node_modules": true,
".venv": true,
"venv": true,
"__pycache__": true,
"dist": true,
"build": true,
"vendor": true,
"testdata": true,
"e2e": true,
"tests": true,
"local_files": true,
".ipython": true,
".pytest_cache": true,
}
func auditShouldSkipDir(name string) bool { return auditSkipDirs[name] }
// AuditUsesFunctions checks every Go and Python app registered in registry.db
// and compares the uses_functions declared in the app.md manifest against the
// functions actually imported by the app's source code.
//
// For Go apps it greps for "fn-registry/functions/<domain>" import paths, then
// searches the source for the exported symbol derived from each function name
// (snake_case → PascalCase) to achieve per-function granularity within a package.
//
// For Python apps it scans for "from <pkg> import X" patterns where the root of
// <pkg> matches a registry Python package directory (derived from file_path),
// then resolves each imported symbol to a function ID by name within that package.
// Both flat ("from metabase import X") and nested ("from metabase.cards import X")
// imports are handled, as are parenthesised multi-line lists.
//
// Returns an error only if registry.db cannot be opened. Apps where dir_path
// does not exist on disk are reported with Missing/Unused = nil (cannot inspect).
func AuditUsesFunctions(registryRoot string) ([]UsesFunctionsAudit, error) {
dbPath := filepath.Join(registryRoot, "registry.db")
dsn := fmt.Sprintf("file:%s?mode=ro&_foreign_keys=on", dbPath)
db, err := sql.Open("sqlite3", dsn)
if err != nil {
return nil, fmt.Errorf("audit_uses_functions: open db: %w", err)
}
defer db.Close()
if err := db.Ping(); err != nil {
return nil, fmt.Errorf("audit_uses_functions: ping db: %w", err)
}
// Load all Go/Python/TS functions from registry: id → name, domain, lang, signature, file_path.
rows, err := db.Query(`SELECT id, name, domain, lang, COALESCE(signature, ''), COALESCE(file_path, '') FROM functions WHERE lang IN ('go','py','ts')`)
if err != nil {
return nil, fmt.Errorf("audit_uses_functions: query functions: %w", err)
}
allFunctions := make(map[string]auditFnMeta) // id → meta
for rows.Next() {
var m auditFnMeta
if err := rows.Scan(&m.id, &m.name, &m.domain, &m.lang, &m.signature, &m.filePath); err != nil {
continue
}
allFunctions[m.id] = m
}
rows.Close()
// Load apps with lang go or py.
type appRow struct {
id string
lang string
dirPath string
usesFunctions []string
}
rows2, err := db.Query(`SELECT id, lang, dir_path, uses_functions FROM apps WHERE lang IN ('go','py')`)
if err != nil {
return nil, fmt.Errorf("audit_uses_functions: query apps: %w", err)
}
var apps []appRow
for rows2.Next() {
var a appRow
var ufJSON string
if err := rows2.Scan(&a.id, &a.lang, &a.dirPath, &ufJSON); err != nil {
continue
}
_ = json.Unmarshal([]byte(ufJSON), &a.usesFunctions)
apps = append(apps, a)
}
rows2.Close()
var results []UsesFunctionsAudit
for _, app := range apps {
absDir := app.dirPath
if !filepath.IsAbs(absDir) {
absDir = filepath.Join(registryRoot, app.dirPath)
}
audit := UsesFunctionsAudit{
AppID: app.id,
Lang: app.lang,
DirPath: app.dirPath,
}
if _, err := os.Stat(absDir); os.IsNotExist(err) {
// Cannot inspect — skip diff, leave Missing/Unused nil.
results = append(results, audit)
continue
}
// Track which langs we successfully scanned. Unused diff only flags
// declared IDs whose lang we actually inspected, so e.g. an app with
// no frontend/ dir won't get every ts_* dep marked unused.
scannedLangs := map[string]bool{}
var importedIDs []string
switch app.lang {
case "go":
importedIDs = append(importedIDs, auditGoApp(absDir, allFunctions)...)
scannedLangs["go"] = true
case "py":
importedIDs = append(importedIDs, auditPyApp(absDir, allFunctions)...)
scannedLangs["py"] = true
}
// Frontend audit: any app that bundles a frontend/ tree gets its TS
// imports inspected too (kanban, registry_dashboard, etc.).
if frontDir := filepath.Join(absDir, "frontend"); dirExists(frontDir) {
importedIDs = append(importedIDs, auditTSApp(frontDir, allFunctions)...)
scannedLangs["ts"] = true
}
// Standalone TS app or app with TS sources at root.
if !scannedLangs["ts"] && hasTSSources(absDir) {
importedIDs = append(importedIDs, auditTSApp(absDir, allFunctions)...)
scannedLangs["ts"] = true
}
declaredSet := make(map[string]bool)
for _, id := range app.usesFunctions {
declaredSet[id] = true
}
importedSet := make(map[string]bool)
for _, id := range importedIDs {
importedSet[id] = true
}
for id := range importedSet {
if !declaredSet[id] {
audit.Missing = append(audit.Missing, id)
}
}
for id := range declaredSet {
if !importedSet[id] {
m, ok := allFunctions[id]
// Only flag unused if we scanned this lang; otherwise we cannot tell.
if !ok || !scannedLangs[m.lang] {
continue
}
audit.Unused = append(audit.Unused, id)
}
}
results = append(results, audit)
}
return results, nil
}
// auditGoApp returns function IDs detected in the Go source files of appDir.
// Strategy:
// 1. Find all "fn-registry/functions/<domain>" import paths (production code only).
// 2. For each domain, collect registry functions in that domain.
// 3. Grep source files for the exported symbol. The token tried first is the
// real Go func identifier parsed from the registry signature; fallback is
// PascalCase(name). Many functions deviate (e.g. sqlite_column_exists has
// `func ColumnExists`), so signature is the source of truth.
func auditGoApp(appDir string, all map[string]auditFnMeta) []string {
// Step 1: collect imported domains.
importedDomains := collectGoImportedDomains(appDir)
if len(importedDomains) == 0 {
return nil
}
// Step 2: for each function in those domains, grep for its exported name.
var used []string
// Read all .go source once into a single blob for fast search.
blob := readGoSourceBlob(appDir)
if blob == "" {
return nil
}
for _, m := range all {
if m.lang != "go" {
continue
}
if !importedDomains[m.domain] {
continue
}
tokens := goCandidateTokens(m)
for _, tok := range tokens {
if containsToken(blob, tok) {
used = append(used, m.id)
break
}
}
}
return used
}
// goCandidateTokens returns the identifiers we try when looking for usages
// of a Go function in source. Real exported name from signature first,
// PascalCase(name) as fallback.
var goSignatureFnRe = regexp.MustCompile(`^\s*func\s+(?:\([^)]*\)\s+)?([A-Z][A-Za-z0-9_]*)`)
func goCandidateTokens(m auditFnMeta) []string {
out := []string{}
if m.signature != "" {
if match := goSignatureFnRe.FindStringSubmatch(m.signature); match != nil {
out = append(out, match[1])
}
}
pascal := snakeToPascal(m.name)
if pascal != "" && (len(out) == 0 || out[0] != pascal) {
out = append(out, pascal)
}
return out
}
// collectGoImportedDomains returns the set of registry domains imported by .go files.
var goImportRe = regexp.MustCompile(`"fn-registry/functions/([a-z]+)"`)
func collectGoImportedDomains(appDir string) map[string]bool {
domains := make(map[string]bool)
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") {
return nil
}
f, err := os.Open(path)
if err != nil {
return nil
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
line := sc.Text()
if m := goImportRe.FindStringSubmatch(line); m != nil {
domains[m[1]] = true
}
}
return nil
})
return domains
}
// readGoSourceBlob concatenates all production .go file contents in appDir
// (skips _test.go, build artefacts, vendor, etc.).
func readGoSourceBlob(appDir string) string {
var sb strings.Builder
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") {
return nil
}
data, err := os.ReadFile(path)
if err != nil {
return nil
}
sb.Write(data)
sb.WriteByte('\n')
return nil
})
return sb.String()
}
// containsToken reports whether the exported symbol appears as an identifier
// in src (preceded and followed by non-letter/non-digit/non-underscore runes,
// or at string boundaries). This avoids matching substrings inside longer names.
func containsToken(src, token string) bool {
idx := 0
for {
pos := strings.Index(src[idx:], token)
if pos < 0 {
return false
}
abs := idx + pos
before := abs == 0 || !isIdentRune(rune(src[abs-1]))
after := abs+len(token) >= len(src) || !isIdentRune(rune(src[abs+len(token)]))
if before && after {
return true
}
idx = abs + 1
}
}
func isIdentRune(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}
// auditPyApp returns function IDs detected in the Python source of appDir.
//
// It recognises "from <pkg> import X, Y" statements where <pkg> is the root of a
// registry package, resolving the imported symbols to function IDs. Both the flat
// form ("from metabase import metabase_get_card") and the nested form
// ("from metabase.cards import metabase_get_card") are handled: the root package
// (the component before the first dot) is validated against the registry's Python
// package directories and each symbol is resolved against the whole package, not
// just the named sub-module. Parenthesised multi-line import lists and trailing
// "# noqa" comments are supported.
//
// Resolution is scoped to the matched package: symbols imported from a package
// that is NOT a registry package directory (e.g. "from numpy import array") are
// ignored, so the audit never produces false "missing" hits for third-party libs.
//
// Star imports ("from <pkg> import *") are NOT supported and yield no symbols —
// star imports are discouraged in the registry; see the .md notes.
//
// The pattern accepts either a parenthesised block (which may span newlines) or
// the rest of a single line as the import list.
var pyFromImportRe = regexp.MustCompile(`from\s+([\w.]+)\s+import\s+(\([\s\S]*?\)|[^\n]+)`)
func auditPyApp(appDir string, all map[string]auditFnMeta) []string {
// Build package-dir → (name → id) map for py functions. The package directory
// is the first path component under python/functions/, which is NOT always the
// function's registry domain (e.g. metabase functions live in
// python/functions/metabase/ but have domain=infra), so it is derived from
// file_path rather than the domain field.
pkgFuncs := make(map[string]map[string]string) // "infra" → {"imap_connect": "imap_connect_py_infra"}
for _, m := range all {
if m.lang != "py" {
continue
}
pkg := pyPackageDir(m.filePath)
if pkg == "" {
continue
}
if pkgFuncs[pkg] == nil {
pkgFuncs[pkg] = make(map[string]string)
}
pkgFuncs[pkg][m.name] = m.id
}
usedSet := make(map[string]bool)
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
if !strings.HasSuffix(path, ".py") {
return nil
}
data, err := os.ReadFile(path)
if err != nil {
return nil
}
for _, m := range pyFromImportRe.FindAllStringSubmatch(string(data), -1) {
// Root package = component before the first dot. Handles both the flat
// ("metabase") and nested ("metabase.cards") import forms, plus relative
// imports ("from .config import X" → root is "" → skipped).
rootPkg := m[1]
if i := strings.IndexByte(rootPkg, '.'); i >= 0 {
rootPkg = rootPkg[:i]
}
funcs, ok := pkgFuncs[rootPkg]
if !ok {
continue
}
for _, sym := range parsePyImportedSymbols(m[2]) {
if id, ok := funcs[sym]; ok {
usedSet[id] = true
}
}
}
return nil
})
var used []string
for id := range usedSet {
used = append(used, id)
}
return used
}
// pyPackageDir returns the top-level package directory of a registry Python
// function from its file_path. For "python/functions/metabase/cards.py" it
// returns "metabase". Returns "" when the path is not under python/functions/
// or has no package component.
func pyPackageDir(filePath string) string {
const prefix = "python/functions/"
fp := filepath.ToSlash(filePath)
if !strings.HasPrefix(fp, prefix) {
return ""
}
rest := fp[len(prefix):]
if i := strings.IndexByte(rest, '/'); i >= 0 {
return rest[:i]
}
return ""
}
// parsePyImportedSymbols extracts the imported symbol names from the right-hand
// side of a Python "from X import <rhs>" statement. It handles single-line lists,
// parenthesised multi-line lists, "# ..." line comments and "as alias" renames.
// A bare "*" (star import) yields no symbols.
func parsePyImportedSymbols(rhs string) []string {
// Drop trailing line comments so "import foo # noqa" and
// "import ( # noqa\n a,\n)" don't pollute symbol parsing.
var b strings.Builder
for _, ln := range strings.Split(rhs, "\n") {
if i := strings.IndexByte(ln, '#'); i >= 0 {
ln = ln[:i]
}
b.WriteString(ln)
b.WriteByte('\n')
}
s := strings.TrimSpace(b.String())
s = strings.TrimPrefix(s, "(")
s = strings.TrimSuffix(s, ")")
var out []string
for _, part := range strings.Split(s, ",") {
fields := strings.Fields(part) // splits "foo as bar" → ["foo","as","bar"]
if len(fields) == 0 {
continue
}
sym := strings.TrimSuffix(fields[0], ")") // safety for "a, b)" tails
if sym == "" || sym == "*" {
continue
}
out = append(out, sym)
}
return out
}
// snakeToPascal converts snake_case to PascalCase (Go exported name).
// E.g. "sqlite_open" → "SQLiteOpen", "http_json_response" → "HTTPJSONResponse".
// Common abbreviations are uppercased in full.
var commonAbbrevs = map[string]string{
"http": "HTTP",
"https": "HTTPS",
"sql": "SQL",
"sqlite": "SQLite",
"url": "URL",
"api": "API",
"id": "ID",
"db": "DB",
"tls": "TLS",
"tcp": "TCP",
"udp": "UDP",
"ip": "IP",
"json": "JSON",
"yaml": "YAML",
"xml": "XML",
"html": "HTML",
"css": "CSS",
"csv": "CSV",
"ssh": "SSH",
"jwt": "JWT",
"oauth": "OAuth",
"oauth2": "OAuth2",
"spa": "SPA",
"cors": "CORS",
"rbac": "RBAC",
"crud": "CRUD",
"cli": "CLI",
"cpu": "CPU",
"gpu": "GPU",
"os": "OS",
"s3": "S3",
"gcs": "GCS",
"bq": "BQ",
"ttl": "TTL",
"rgb": "RGB",
"rgba": "RGBA",
"sse": "SSE",
"ws": "WS",
"smtp": "SMTP",
"imap": "IMAP",
"pop3": "POP3",
"dns": "DNS",
"vpn": "VPN",
"cmd": "Cmd",
"ctx": "Ctx",
"cfg": "Cfg",
"env": "Env",
"io": "IO",
"ok": "OK",
"ui": "UI",
}
// hasTSSources reports whether appDir contains any production .ts/.tsx files
// (skipping the audit skip-dirs).
func hasTSSources(appDir string) bool {
found := false
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
if strings.HasSuffix(path, ".ts") || strings.HasSuffix(path, ".tsx") {
found = true
return filepath.SkipDir
}
return nil
})
return found
}
// auditTSApp scans .ts/.tsx files in appDir for imports from "@fn_library/<area>/<name>"
// and resolves them to function IDs of the form "<name>_ts_<area>". Re-exports count
// as direct usage. Test files (*.test.ts*, *.spec.ts*) are skipped.
var tsLibraryImportRe = regexp.MustCompile(`["']@fn_library/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+)["']`)
func auditTSApp(appDir string, all map[string]auditFnMeta) []string {
// Build lookup: (area=domain, name) → id for ts functions.
tsByKey := make(map[string]string) // "ui|color_bg" → "color_bg_ts_ui"
for _, m := range all {
if m.lang != "ts" {
continue
}
tsByKey[m.domain+"|"+m.name] = m.id
}
usedSet := make(map[string]bool)
_ = filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
if auditShouldSkipDir(info.Name()) {
return filepath.SkipDir
}
return nil
}
base := info.Name()
if !(strings.HasSuffix(base, ".ts") || strings.HasSuffix(base, ".tsx")) {
return nil
}
if strings.HasSuffix(base, ".test.ts") || strings.HasSuffix(base, ".test.tsx") ||
strings.HasSuffix(base, ".spec.ts") || strings.HasSuffix(base, ".spec.tsx") ||
strings.HasSuffix(base, ".d.ts") {
return nil
}
data, err := os.ReadFile(path)
if err != nil {
return nil
}
for _, match := range tsLibraryImportRe.FindAllStringSubmatch(string(data), -1) {
area, name := match[1], match[2]
if id, ok := tsByKey[area+"|"+name]; ok {
usedSet[id] = true
}
}
return nil
})
out := make([]string, 0, len(usedSet))
for id := range usedSet {
out = append(out, id)
}
return out
}
func snakeToPascal(s string) string {
parts := strings.Split(s, "_")
var sb strings.Builder
for _, p := range parts {
if p == "" {
continue
}
if abbr, ok := commonAbbrevs[strings.ToLower(p)]; ok {
sb.WriteString(abbr)
} else {
sb.WriteString(strings.ToUpper(p[:1]) + p[1:])
}
}
return sb.String()
}