fix(infra): audit_uses_functions detecta imports Python anidados y multilinea (0056)
El parser Python de audit_uses_functions solo reconocia "from <pkg> import X" con un unico componente de paquete (regex \w+), por lo que: - "from <pkg>.<subpkg> import X" (import anidado) no matcheaba y la funcion se reportaba como falso unused_in_app_md. - Las listas multilinea con parentesis "from <pkg> import (\n a,\n b,\n)" no se parseaban (escaneo linea a linea). Cambios: - Regex acepta puntos en el paquete y bloques parentizados multilinea. - Resolucion validada contra el directorio de paquete del registry derivado de file_path (no del campo domain: las funciones metabase viven en python/functions/metabase/ pero tienen domain=infra). Imports de librerias externas se ignoran -> sin falsos missing. - parsePyImportedSymbols descarta comentarios "# noqa", maneja "as alias" y star imports (tratados como vacio, no soportados por diseno). - auditFnMeta carga file_path; query SELECT anade file_path. Tests (functions/infra/audit_uses_functions_test.go): - TestAuditUsesFunctions_DetectsNestedImport (golden) - TestAuditUsesFunctions_NoFalsePositiveOnNested (edge: nested + multilinea) - TestAuditUsesFunctions_StarImport (error/edge: star import no cuenta) Verificado con fn doctor uses-functions sobre apps reales: drift baja de 11/42 a 9/42. mail_manager (9 falsos por "from infra.X import Y") y demand_radar (3 por lista multilinea) quedan en 0 drift. El residual de osint_db/osint_web es carga dinamica via importlib, documentado como fuera de alcance. audit_uses_functions v1.0.0 -> v1.1.0. CHANGELOG actualizado. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -30,6 +30,7 @@ type auditFnMeta struct {
|
||||
domain string
|
||||
lang string
|
||||
signature string
|
||||
filePath string // file_path as stored in registry.db (used to derive the Python package dir)
|
||||
}
|
||||
|
||||
// skipDirs are directory names ignored when walking source for audits.
|
||||
@@ -62,9 +63,11 @@ func auditShouldSkipDir(name string) bool { return auditSkipDirs[name] }
|
||||
// searches the source for the exported symbol derived from each function name
|
||||
// (snake_case → PascalCase) to achieve per-function granularity within a package.
|
||||
//
|
||||
// For Python apps it scans for "from <pkg> import X" patterns where <pkg> matches
|
||||
// a known registry domain, then resolves X to a function ID by matching the name
|
||||
// field in registry.db.
|
||||
// For Python apps it scans for "from <pkg> import X" patterns where the root of
|
||||
// <pkg> matches a registry Python package directory (derived from file_path),
|
||||
// then resolves each imported symbol to a function ID by name within that package.
|
||||
// Both flat ("from metabase import X") and nested ("from metabase.cards import X")
|
||||
// imports are handled, as are parenthesised multi-line lists.
|
||||
//
|
||||
// Returns an error only if registry.db cannot be opened. Apps where dir_path
|
||||
// does not exist on disk are reported with Missing/Unused = nil (cannot inspect).
|
||||
@@ -80,15 +83,15 @@ func AuditUsesFunctions(registryRoot string) ([]UsesFunctionsAudit, error) {
|
||||
return nil, fmt.Errorf("audit_uses_functions: ping db: %w", err)
|
||||
}
|
||||
|
||||
// Load all Go/Python/TS functions from registry: id → name, domain, lang, signature.
|
||||
rows, err := db.Query(`SELECT id, name, domain, lang, COALESCE(signature, '') FROM functions WHERE lang IN ('go','py','ts')`)
|
||||
// Load all Go/Python/TS functions from registry: id → name, domain, lang, signature, file_path.
|
||||
rows, err := db.Query(`SELECT id, name, domain, lang, COALESCE(signature, ''), COALESCE(file_path, '') FROM functions WHERE lang IN ('go','py','ts')`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("audit_uses_functions: query functions: %w", err)
|
||||
}
|
||||
allFunctions := make(map[string]auditFnMeta) // id → meta
|
||||
for rows.Next() {
|
||||
var m auditFnMeta
|
||||
if err := rows.Scan(&m.id, &m.name, &m.domain, &m.lang, &m.signature); err != nil {
|
||||
if err := rows.Scan(&m.id, &m.name, &m.domain, &m.lang, &m.signature, &m.filePath); err != nil {
|
||||
continue
|
||||
}
|
||||
allFunctions[m.id] = m
|
||||
@@ -341,16 +344,46 @@ func isIdentRune(r rune) bool {
|
||||
}
|
||||
|
||||
// auditPyApp returns function IDs detected in the Python source of appDir.
|
||||
// Looks for: "from <pkg> import X, Y" patterns and resolves X, Y to function IDs.
|
||||
var pyFromImportRe = regexp.MustCompile(`from\s+(\w+)\s+import\s+(.+)`)
|
||||
//
|
||||
// It recognises "from <pkg> import X, Y" statements where <pkg> is the root of a
|
||||
// registry package, resolving the imported symbols to function IDs. Both the flat
|
||||
// form ("from metabase import metabase_get_card") and the nested form
|
||||
// ("from metabase.cards import metabase_get_card") are handled: the root package
|
||||
// (the component before the first dot) is validated against the registry's Python
|
||||
// package directories and each symbol is resolved against the whole package, not
|
||||
// just the named sub-module. Parenthesised multi-line import lists and trailing
|
||||
// "# noqa" comments are supported.
|
||||
//
|
||||
// Resolution is scoped to the matched package: symbols imported from a package
|
||||
// that is NOT a registry package directory (e.g. "from numpy import array") are
|
||||
// ignored, so the audit never produces false "missing" hits for third-party libs.
|
||||
//
|
||||
// Star imports ("from <pkg> import *") are NOT supported and yield no symbols —
|
||||
// star imports are discouraged in the registry; see the .md notes.
|
||||
//
|
||||
// The pattern accepts either a parenthesised block (which may span newlines) or
|
||||
// the rest of a single line as the import list.
|
||||
var pyFromImportRe = regexp.MustCompile(`from\s+([\w.]+)\s+import\s+(\([\s\S]*?\)|[^\n]+)`)
|
||||
|
||||
func auditPyApp(appDir string, all map[string]auditFnMeta) []string {
|
||||
// Build name→id map for py functions.
|
||||
nameToID := make(map[string]string) // "metabase_auth" → "metabase_auth_py_infra"
|
||||
// Build package-dir → (name → id) map for py functions. The package directory
|
||||
// is the first path component under python/functions/, which is NOT always the
|
||||
// function's registry domain (e.g. metabase functions live in
|
||||
// python/functions/metabase/ but have domain=infra), so it is derived from
|
||||
// file_path rather than the domain field.
|
||||
pkgFuncs := make(map[string]map[string]string) // "infra" → {"imap_connect": "imap_connect_py_infra"}
|
||||
for _, m := range all {
|
||||
if m.lang == "py" {
|
||||
nameToID[m.name] = m.id
|
||||
if m.lang != "py" {
|
||||
continue
|
||||
}
|
||||
pkg := pyPackageDir(m.filePath)
|
||||
if pkg == "" {
|
||||
continue
|
||||
}
|
||||
if pkgFuncs[pkg] == nil {
|
||||
pkgFuncs[pkg] = make(map[string]string)
|
||||
}
|
||||
pkgFuncs[pkg][m.name] = m.id
|
||||
}
|
||||
|
||||
usedSet := make(map[string]bool)
|
||||
@@ -368,23 +401,25 @@ func auditPyApp(appDir string, all map[string]auditFnMeta) []string {
|
||||
if !strings.HasSuffix(path, ".py") {
|
||||
return nil
|
||||
}
|
||||
f, err := os.Open(path)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer f.Close()
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
line := strings.TrimSpace(sc.Text())
|
||||
if m := pyFromImportRe.FindStringSubmatch(line); m != nil {
|
||||
// m[2] = "X, Y, Z" or "X"
|
||||
names := strings.Split(m[2], ",")
|
||||
for _, nm := range names {
|
||||
nm = strings.TrimSpace(nm)
|
||||
nm = strings.Fields(nm)[0] // strip "as alias"
|
||||
if id, ok := nameToID[nm]; ok {
|
||||
usedSet[id] = true
|
||||
}
|
||||
for _, m := range pyFromImportRe.FindAllStringSubmatch(string(data), -1) {
|
||||
// Root package = component before the first dot. Handles both the flat
|
||||
// ("metabase") and nested ("metabase.cards") import forms, plus relative
|
||||
// imports ("from .config import X" → root is "" → skipped).
|
||||
rootPkg := m[1]
|
||||
if i := strings.IndexByte(rootPkg, '.'); i >= 0 {
|
||||
rootPkg = rootPkg[:i]
|
||||
}
|
||||
funcs, ok := pkgFuncs[rootPkg]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, sym := range parsePyImportedSymbols(m[2]) {
|
||||
if id, ok := funcs[sym]; ok {
|
||||
usedSet[id] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -398,6 +433,57 @@ func auditPyApp(appDir string, all map[string]auditFnMeta) []string {
|
||||
return used
|
||||
}
|
||||
|
||||
// pyPackageDir returns the top-level package directory of a registry Python
|
||||
// function from its file_path. For "python/functions/metabase/cards.py" it
|
||||
// returns "metabase". Returns "" when the path is not under python/functions/
|
||||
// or has no package component.
|
||||
func pyPackageDir(filePath string) string {
|
||||
const prefix = "python/functions/"
|
||||
fp := filepath.ToSlash(filePath)
|
||||
if !strings.HasPrefix(fp, prefix) {
|
||||
return ""
|
||||
}
|
||||
rest := fp[len(prefix):]
|
||||
if i := strings.IndexByte(rest, '/'); i >= 0 {
|
||||
return rest[:i]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// parsePyImportedSymbols extracts the imported symbol names from the right-hand
|
||||
// side of a Python "from X import <rhs>" statement. It handles single-line lists,
|
||||
// parenthesised multi-line lists, "# ..." line comments and "as alias" renames.
|
||||
// A bare "*" (star import) yields no symbols.
|
||||
func parsePyImportedSymbols(rhs string) []string {
|
||||
// Drop trailing line comments so "import foo # noqa" and
|
||||
// "import ( # noqa\n a,\n)" don't pollute symbol parsing.
|
||||
var b strings.Builder
|
||||
for _, ln := range strings.Split(rhs, "\n") {
|
||||
if i := strings.IndexByte(ln, '#'); i >= 0 {
|
||||
ln = ln[:i]
|
||||
}
|
||||
b.WriteString(ln)
|
||||
b.WriteByte('\n')
|
||||
}
|
||||
s := strings.TrimSpace(b.String())
|
||||
s = strings.TrimPrefix(s, "(")
|
||||
s = strings.TrimSuffix(s, ")")
|
||||
|
||||
var out []string
|
||||
for _, part := range strings.Split(s, ",") {
|
||||
fields := strings.Fields(part) // splits "foo as bar" → ["foo","as","bar"]
|
||||
if len(fields) == 0 {
|
||||
continue
|
||||
}
|
||||
sym := strings.TrimSuffix(fields[0], ")") // safety for "a, b)" tails
|
||||
if sym == "" || sym == "*" {
|
||||
continue
|
||||
}
|
||||
out = append(out, sym)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// snakeToPascal converts snake_case to PascalCase (Go exported name).
|
||||
// E.g. "sqlite_open" → "SQLiteOpen", "http_json_response" → "HTTPJSONResponse".
|
||||
// Common abbreviations are uppercased in full.
|
||||
|
||||
Reference in New Issue
Block a user