chore: auto-commit (799 archivos)
- .claude/CLAUDE.md - .claude/commands/subagentes.md - .claude/rules/INDEX.md - .mcp.json - bash/functions/cybersecurity/analyze_dns.md - bash/functions/cybersecurity/audit_http_headers.md - bash/functions/cybersecurity/audit_ssh_config.md - bash/functions/cybersecurity/check_firewall.md - bash/functions/cybersecurity/detect_suspicious_users.md - bash/functions/cybersecurity/encrypt_file.md - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,341 @@
|
||||
package infra
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
// CopiedCodeEntry represents a function body in an app file that matches
|
||||
// the normalized body of a registry function. MVP only emits exact_copy.
|
||||
type CopiedCodeEntry struct {
|
||||
AppFile string `json:"app_file"`
|
||||
AppFunction string `json:"app_function"`
|
||||
RegistryID string `json:"registry_id"`
|
||||
BodyHash string `json:"body_hash"`
|
||||
Similarity float64 `json:"similarity"`
|
||||
Kind string `json:"kind"` // exact_copy | near_copy | partial_match
|
||||
}
|
||||
|
||||
// AuditCopiedCode walks apps/ and projects/*/apps/, extracts function
|
||||
// declarations per language, computes a normalized body hash (strip
|
||||
// comments + collapse whitespace), and matches against fingerprints
|
||||
// built from registry.db.functions.code.
|
||||
//
|
||||
// MVP scope:
|
||||
// - Languages: go, py, bash, ts, cpp
|
||||
// - Match level: exact_copy only (similarity = 1.0)
|
||||
// - Skips paths whose own file_path is the registry function (a function
|
||||
// is not a copy of itself).
|
||||
//
|
||||
// Returns the list of suspected copies. Does NOT write to any DB.
|
||||
// Persistence is the caller's responsibility (e.g. call_monitor).
|
||||
func AuditCopiedCode(registryRoot string) ([]CopiedCodeEntry, error) {
|
||||
dbPath := filepath.Join(registryRoot, "registry.db")
|
||||
db, err := sql.Open("sqlite3", dbPath+"?_journal_mode=WAL")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("audit_copied_code: open db: %w", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Build fingerprint index: normalized_hash -> [registry_id ...]
|
||||
// and skip-set: file_path -> registry_id (so we don't flag the function
|
||||
// as a copy of itself).
|
||||
rows, err := db.Query("SELECT id, name, lang, code, file_path FROM functions WHERE code != ''")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("audit_copied_code: query functions: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
fpIndex := map[string][]string{}
|
||||
registryFilePaths := map[string]struct{}{}
|
||||
for rows.Next() {
|
||||
var id, name, lang, code, filePath string
|
||||
if err := rows.Scan(&id, &name, &lang, &code, &filePath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if filePath != "" {
|
||||
registryFilePaths[filePath] = struct{}{}
|
||||
}
|
||||
// If the `code` column contains a whole module (e.g. Python file
|
||||
// with multiple defs), only index the function declaration whose
|
||||
// name matches the registry entry. Otherwise (single-fn files
|
||||
// like Go), hash the whole body. Both paths yield a single
|
||||
// normalized hash mapped to this id.
|
||||
decls := extractFunctions(code, lang)
|
||||
var fnBody string
|
||||
if len(decls) > 1 {
|
||||
for _, d := range decls {
|
||||
if d.Name == name {
|
||||
fnBody = d.Body
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if len(decls) == 1 {
|
||||
fnBody = decls[0].Body
|
||||
} else {
|
||||
fnBody = code
|
||||
}
|
||||
if fnBody == "" {
|
||||
continue
|
||||
}
|
||||
h := normalizedBodyHash(fnBody, lang)
|
||||
if h == "" {
|
||||
continue
|
||||
}
|
||||
fpIndex[h] = append(fpIndex[h], id)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Walk app trees
|
||||
scanDirs := []string{filepath.Join(registryRoot, "apps")}
|
||||
if entries, err := os.ReadDir(filepath.Join(registryRoot, "projects")); err == nil {
|
||||
for _, p := range entries {
|
||||
if !p.IsDir() {
|
||||
continue
|
||||
}
|
||||
scanDirs = append(scanDirs, filepath.Join(registryRoot, "projects", p.Name(), "apps"))
|
||||
}
|
||||
}
|
||||
|
||||
var out []CopiedCodeEntry
|
||||
for _, d := range scanDirs {
|
||||
_ = filepath.WalkDir(d, func(path string, dirent fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if dirent.IsDir() {
|
||||
if shouldSkipDir(dirent.Name()) {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
lang := langFromExt(path)
|
||||
if lang == "" {
|
||||
return nil
|
||||
}
|
||||
rel, _ := filepath.Rel(registryRoot, path)
|
||||
// Don't audit the registry function file itself
|
||||
if _, isRegistry := registryFilePaths[rel]; isRegistry {
|
||||
return nil
|
||||
}
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
funcs := extractFunctions(string(data), lang)
|
||||
for _, fn := range funcs {
|
||||
h := normalizedBodyHash(fn.Body, lang)
|
||||
if h == "" {
|
||||
continue
|
||||
}
|
||||
if matches, ok := fpIndex[h]; ok {
|
||||
for _, rid := range matches {
|
||||
out = append(out, CopiedCodeEntry{
|
||||
AppFile: rel,
|
||||
AppFunction: fn.Name,
|
||||
RegistryID: rid,
|
||||
BodyHash: h,
|
||||
Similarity: 1.0,
|
||||
Kind: "exact_copy",
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ---- Helpers (unexported) ----
|
||||
|
||||
func shouldSkipDir(name string) bool {
|
||||
switch name {
|
||||
case ".git", ".venv", "node_modules", "__pycache__", "build", "dist", "vendor", ".pytest_cache", ".cache", "_vendored":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func langFromExt(path string) string {
|
||||
switch strings.ToLower(filepath.Ext(path)) {
|
||||
case ".go":
|
||||
return "go"
|
||||
case ".py":
|
||||
return "py"
|
||||
case ".sh":
|
||||
return "bash"
|
||||
case ".ts", ".tsx":
|
||||
return "ts"
|
||||
case ".cpp", ".cc", ".cxx":
|
||||
return "cpp"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func normalizedBodyHash(code, lang string) string {
|
||||
norm := stripCommentsAndWhitespace(code, lang)
|
||||
if len(norm) < 20 {
|
||||
// Skip trivial bodies: too short to be meaningful match.
|
||||
return ""
|
||||
}
|
||||
h := sha256.Sum256([]byte(norm))
|
||||
return fmt.Sprintf("%x", h)[:16]
|
||||
}
|
||||
|
||||
func stripCommentsAndWhitespace(s, lang string) string {
|
||||
switch lang {
|
||||
case "go", "ts", "cpp":
|
||||
s = stripCStyleComments(s)
|
||||
case "py":
|
||||
s = stripPythonDocstrings(s)
|
||||
s = stripHashComments(s)
|
||||
case "bash":
|
||||
s = stripHashComments(s)
|
||||
}
|
||||
return collapseWhitespace(s)
|
||||
}
|
||||
|
||||
var (
|
||||
reCStyleSingle = regexp.MustCompile(`//[^\n]*`)
|
||||
reCStyleMulti = regexp.MustCompile(`(?s)/\*.*?\*/`)
|
||||
reHashLine = regexp.MustCompile(`(?m)#.*$`)
|
||||
reTripleQuote = regexp.MustCompile(`(?s)"""[\s\S]*?"""|'''[\s\S]*?'''`)
|
||||
reWS = regexp.MustCompile(`\s+`)
|
||||
)
|
||||
|
||||
func stripCStyleComments(s string) string {
|
||||
s = reCStyleMulti.ReplaceAllString(s, " ")
|
||||
s = reCStyleSingle.ReplaceAllString(s, " ")
|
||||
return s
|
||||
}
|
||||
|
||||
func stripHashComments(s string) string {
|
||||
return reHashLine.ReplaceAllString(s, "")
|
||||
}
|
||||
|
||||
func stripPythonDocstrings(s string) string {
|
||||
return reTripleQuote.ReplaceAllString(s, " ")
|
||||
}
|
||||
|
||||
func collapseWhitespace(s string) string {
|
||||
return strings.TrimSpace(reWS.ReplaceAllString(s, " "))
|
||||
}
|
||||
|
||||
type fnDecl struct {
|
||||
Name string
|
||||
Body string
|
||||
}
|
||||
|
||||
// extractFunctions extracts function declarations from source.
|
||||
// MVP: regex-based, naive brace matching. Misses nested closures, methods
|
||||
// with complex receivers, multi-line signatures with embedded {.
|
||||
func extractFunctions(src, lang string) []fnDecl {
|
||||
switch lang {
|
||||
case "go":
|
||||
return extractBracedFunctions(src, regexp.MustCompile(`(?m)^func\s+(?:\([^)]*\)\s+)?(\w+)\s*\(`))
|
||||
case "bash":
|
||||
return extractBracedFunctions(src, regexp.MustCompile(`(?m)^(\w[\w_]*)\s*\(\s*\)\s*\{`))
|
||||
case "ts":
|
||||
return extractBracedFunctions(src, regexp.MustCompile(`(?m)(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(`))
|
||||
case "cpp":
|
||||
return extractBracedFunctions(src, regexp.MustCompile(`(?m)^[\w:][\w:\s\*&<>,]*\s+(\w+)\s*\([^)]*\)\s*(?:const\s*)?(?:noexcept\s*)?\{`))
|
||||
case "py":
|
||||
return extractPythonFunctions(src)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractBracedFunctions matches a name via regex, then finds the body by
|
||||
// scanning to the next '{' and balancing braces. Crude: ignores strings,
|
||||
// comments, and char literals — false positives possible on weird code.
|
||||
func extractBracedFunctions(src string, re *regexp.Regexp) []fnDecl {
|
||||
var out []fnDecl
|
||||
matches := re.FindAllStringSubmatchIndex(src, -1)
|
||||
for _, m := range matches {
|
||||
name := src[m[2]:m[3]]
|
||||
// Find first '{' after match end
|
||||
start := -1
|
||||
for i := m[1]; i < len(src); i++ {
|
||||
if src[i] == '{' {
|
||||
start = i
|
||||
break
|
||||
}
|
||||
// Bail out if newline-newline encountered (not a function with body)
|
||||
if i+1 < len(src) && src[i] == '\n' && src[i+1] == '\n' {
|
||||
break
|
||||
}
|
||||
}
|
||||
if start == -1 {
|
||||
continue
|
||||
}
|
||||
depth := 0
|
||||
end := -1
|
||||
for i := start; i < len(src); i++ {
|
||||
switch src[i] {
|
||||
case '{':
|
||||
depth++
|
||||
case '}':
|
||||
depth--
|
||||
if depth == 0 {
|
||||
end = i + 1
|
||||
}
|
||||
}
|
||||
if end != -1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if end == -1 {
|
||||
continue
|
||||
}
|
||||
out = append(out, fnDecl{Name: name, Body: src[start:end]})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// extractPythonFunctions uses indentation: def name(...): ... until dedent.
|
||||
func extractPythonFunctions(src string) []fnDecl {
|
||||
var out []fnDecl
|
||||
lines := strings.Split(src, "\n")
|
||||
re := regexp.MustCompile(`^(\s*)def\s+(\w+)\s*\(`)
|
||||
for i, line := range lines {
|
||||
m := re.FindStringSubmatch(line)
|
||||
if m == nil {
|
||||
continue
|
||||
}
|
||||
defIndent := len(m[1])
|
||||
name := m[2]
|
||||
bodyLines := []string{line}
|
||||
for j := i + 1; j < len(lines); j++ {
|
||||
l := lines[j]
|
||||
if strings.TrimSpace(l) == "" {
|
||||
bodyLines = append(bodyLines, l)
|
||||
continue
|
||||
}
|
||||
indent := 0
|
||||
for _, c := range l {
|
||||
if c == ' ' || c == '\t' {
|
||||
indent++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if indent <= defIndent {
|
||||
break
|
||||
}
|
||||
bodyLines = append(bodyLines, l)
|
||||
}
|
||||
out = append(out, fnDecl{Name: name, Body: strings.Join(bodyLines, "\n")})
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user