chore: auto-commit (7 archivos)

- call_monitor
- main.go
- operations.db
- operations.db-shm
- operations.db-wal
- cluster.go
- cluster_test.go

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 16:33:24 +02:00
parent 37ae34bcfc
commit 8e51094d94
7 changed files with 320 additions and 0 deletions
BIN
View File
Binary file not shown.
+256
View File
@@ -0,0 +1,256 @@
package main
import (
"crypto/sha256"
"database/sql"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"regexp"
"sort"
"strings"
"text/tabwriter"
)
// ClusterConfig controls the pattern clustering behaviour.
type ClusterConfig struct {
MinOccurrences int
LookbackDays int
Tools []string // tool_used values to consider
}
// PatternCluster is one normalized cluster of inline snippets.
type PatternCluster struct {
Hash string
Representative string
Occurrences int
SessionIDs []string
FirstSeen int64
LastSeen int64
}
var (
reNumber = regexp.MustCompile(`\b\d+\b`)
reDQuoted = regexp.MustCompile(`"[^"]*"`)
reSQuoted = regexp.MustCompile(`'[^']*'`)
reSpaces = regexp.MustCompile(`\s+`)
rePath = regexp.MustCompile(`/[A-Za-z0-9_./-]{6,}`)
reHex = regexp.MustCompile(`\b[0-9a-f]{8,}\b`)
)
// normalizeSnippet collapses noise (numbers, quoted strings, hex, paths,
// whitespace) so that semantically-equivalent snippets hash the same.
func normalizeSnippet(s string) string {
s = strings.ToLower(s)
s = reDQuoted.ReplaceAllString(s, `"STR"`)
s = reSQuoted.ReplaceAllString(s, `'STR'`)
s = rePath.ReplaceAllString(s, "/PATH")
s = reHex.ReplaceAllString(s, "HEX")
s = reNumber.ReplaceAllString(s, "N")
s = reSpaces.ReplaceAllString(s, " ")
return strings.TrimSpace(s)
}
func hashSnippet(norm string) string {
sum := sha256.Sum256([]byte(norm))
return hex.EncodeToString(sum[:8]) // 16 hex chars, enough collision room
}
// ClusterPatterns groups inline snippets (function_id='') by normalized hash,
// upserts the patterns table, and returns clusters with occurrences >= min.
func ClusterPatterns(db *sql.DB, cfg ClusterConfig) ([]PatternCluster, error) {
if cfg.MinOccurrences <= 0 {
cfg.MinOccurrences = 3
}
if cfg.LookbackDays <= 0 {
cfg.LookbackDays = 30
}
if len(cfg.Tools) == 0 {
cfg.Tools = []string{"heredoc_py", "heredoc_bash", "sqlite_direct", "bash_other"}
}
placeholders := make([]string, len(cfg.Tools))
args := make([]any, 0, len(cfg.Tools)+1)
for i, t := range cfg.Tools {
placeholders[i] = "?"
args = append(args, t)
}
args = append(args, cfg.LookbackDays)
q := fmt.Sprintf(`
SELECT command_snippet, session_id, ts
FROM calls
WHERE function_id = ''
AND command_snippet != ''
AND tool_used IN (%s)
AND ts >= CAST(strftime('%%s', 'now', '-' || ? || ' days') AS INTEGER)
ORDER BY ts ASC`,
strings.Join(placeholders, ","))
rows, err := db.Query(q, args...)
if err != nil {
return nil, fmt.Errorf("query calls: %w", err)
}
defer rows.Close()
clusters := map[string]*PatternCluster{}
sessionsSeen := map[string]map[string]struct{}{}
for rows.Next() {
var snippet, session string
var ts int64
if err := rows.Scan(&snippet, &session, &ts); err != nil {
return nil, err
}
norm := normalizeSnippet(snippet)
if norm == "" {
continue
}
hash := hashSnippet(norm)
c, ok := clusters[hash]
if !ok {
c = &PatternCluster{
Hash: hash,
Representative: trim(snippet, 240),
FirstSeen: ts,
LastSeen: ts,
}
clusters[hash] = c
sessionsSeen[hash] = map[string]struct{}{}
}
c.Occurrences++
if ts > c.LastSeen {
c.LastSeen = ts
}
if ts < c.FirstSeen {
c.FirstSeen = ts
}
if session != "" {
if _, seen := sessionsSeen[hash][session]; !seen {
sessionsSeen[hash][session] = struct{}{}
c.SessionIDs = append(c.SessionIDs, session)
}
}
}
if err := rows.Err(); err != nil {
return nil, err
}
out := make([]PatternCluster, 0, len(clusters))
for _, c := range clusters {
if c.Occurrences >= cfg.MinOccurrences {
out = append(out, *c)
}
}
sort.Slice(out, func(i, j int) bool { return out[i].Occurrences > out[j].Occurrences })
return out, nil
}
// UpsertPatterns writes clusters to the patterns table (idempotent).
func UpsertPatterns(db *sql.DB, clusters []PatternCluster) error {
tx, err := db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
stmt, err := tx.Prepare(`
INSERT INTO patterns (pattern_hash, representative_snippet, occurrences,
session_ids_json, first_seen, last_seen, proposal_id)
VALUES (?, ?, ?, ?, ?, ?, '')
ON CONFLICT(pattern_hash) DO UPDATE SET
occurrences = excluded.occurrences,
session_ids_json = excluded.session_ids_json,
last_seen = excluded.last_seen
`)
if err != nil {
return err
}
defer stmt.Close()
for _, c := range clusters {
ids, _ := json.Marshal(c.SessionIDs)
if _, err := stmt.Exec(
c.Hash, c.Representative, c.Occurrences, string(ids),
c.FirstSeen, c.LastSeen,
); err != nil {
return err
}
}
return tx.Commit()
}
func splitCSV(s string) []string {
out := []string{}
for _, p := range strings.Split(s, ",") {
p = strings.TrimSpace(p)
if p != "" {
out = append(out, p)
}
}
return out
}
func trim(s string, max int) string {
s = strings.TrimSpace(s)
if len(s) <= max {
return s
}
return s[:max]
}
// runClusterPatterns is the CLI entry point.
func runClusterPatterns(dbPath string, cfg ClusterConfig, persist, formatJSON bool) {
db, err := openDB(dbPath)
if err != nil {
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
os.Exit(1)
}
defer db.Close()
clusters, err := ClusterPatterns(db.conn, cfg)
if err != nil {
fmt.Fprintf(os.Stderr, "cluster: %v\n", err)
os.Exit(1)
}
if persist {
if err := UpsertPatterns(db.conn, clusters); err != nil {
fmt.Fprintf(os.Stderr, "persist patterns: %v\n", err)
os.Exit(1)
}
}
if formatJSON {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(clusters)
return
}
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
fmt.Fprintf(tw, "=== %d pattern clusters (min %d occurrences, last %dd) ===\n",
len(clusters), cfg.MinOccurrences, cfg.LookbackDays)
if len(clusters) == 0 {
fmt.Fprintln(tw, "(no clusters found)")
tw.Flush()
return
}
fmt.Fprintln(tw, "HASH\tOCCURRENCES\tSESSIONS\tFIRST_SEEN\tLAST_SEEN\tSNIPPET")
for _, c := range clusters {
snippet := strings.ReplaceAll(c.Representative, "\n", "\\n")
if len(snippet) > 70 {
snippet = snippet[:70] + "..."
}
fmt.Fprintf(tw, "%s\t%d\t%d\t%d\t%d\t%s\n",
c.Hash, c.Occurrences, len(c.SessionIDs),
c.FirstSeen, c.LastSeen, snippet)
}
tw.Flush()
if persist {
fmt.Println()
fmt.Printf("upserted %d clusters into patterns table\n", len(clusters))
}
}
+51
View File
@@ -0,0 +1,51 @@
package main
import "testing"
func TestNormalizeSnippet(t *testing.T) {
cases := []struct {
in string
want string
}{
{"sqlite3 /tmp/x.db \"SELECT 42\"", "sqlite3 /PATH \"STR\""},
{"grep -n 'foo' line 123", "grep -n 'STR' line N"},
{" multi space ", "multi space"},
{"sha=ab12cd34ef56", "sha=HEX"},
}
for _, c := range cases {
got := normalizeSnippet(c.in)
if got != c.want {
t.Errorf("normalize(%q)=%q want %q", c.in, got, c.want)
}
}
}
func TestHashSnippetStable(t *testing.T) {
a := hashSnippet(normalizeSnippet("ls /tmp/foo"))
b := hashSnippet(normalizeSnippet("ls /tmp/bar"))
if a != b {
t.Errorf("expected same hash after normalization: %s vs %s", a, b)
}
c := hashSnippet(normalizeSnippet("cat /tmp/foo"))
if a == c {
t.Errorf("expected different hash for different commands")
}
}
func TestSplitCSV(t *testing.T) {
cases := []struct {
in string
want int
}{
{"a,b,c", 3},
{" a , b , c ", 3},
{"", 0},
{"a,,b", 2},
}
for _, c := range cases {
got := splitCSV(c.in)
if len(got) != c.want {
t.Errorf("splitCSV(%q) len=%d want %d (%v)", c.in, len(got), c.want, got)
}
}
}
+13
View File
@@ -62,6 +62,19 @@ func main() {
MinSuccessRate: 0.9, MinSuccessRate: 0.9,
} }
runSequences(resolveDB(*dbPath), *detect, *report, *propose, *formatJSON, *root, cfg) runSequences(resolveDB(*dbPath), *detect, *report, *propose, *formatJSON, *root, cfg)
case "cluster-patterns":
minOcc := fs.Int("min-occurrences", 3, "Minimum occurrences to keep a cluster.")
lookback := fs.Int("lookback-days", 30, "How many days of calls to scan.")
persist := fs.Bool("persist", false, "Upsert clusters into the patterns table.")
formatJSON := fs.Bool("json", false, "Output JSON instead of text.")
toolsCSV := fs.String("tools", "heredoc_py,heredoc_bash,sqlite_direct,bash_other", "Comma-separated tool_used values to consider.")
fs.Parse(os.Args[2:])
tools := splitCSV(*toolsCSV)
runClusterPatterns(resolveDB(*dbPath), ClusterConfig{
MinOccurrences: *minOcc,
LookbackDays: *lookback,
Tools: tools,
}, *persist, *formatJSON)
case "-h", "--help", "help": case "-h", "--help", "help":
usage() usage()
default: default:
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.